#!/export/opt1/perl/bin/perl -w
#DOC#################################################
#DOC
#DOC program collector.pl
#DOC
#DOC author - M Evans
#DOC date - October 2003
#DOC
#DOC purpose : Collect data requested from external data centers
#DOC
#DOC variables : @e_mail - Incoming e-mail
#DOC             @filename - List of files to be processed
#DOC             @temp - Temporary array, used to identify all data centers
#DOC             @temp2 - Temporary array, used to hold output of 'split' commands
#DOC             %from_address - Hash containing e-mail addresses of data centers
#DOC             $bad - Number of bad data requests
#DOC             $directory - Directory holding incoming e-mails
#DOC             $filename - Used to loop over each element of @filename
#DOC             $good - Number of successful data requests
#DOC             $line - Used to loop over each element of @e_mail
#DOC             $request - SQL request
#DOC             $success - Output from cndc_request (0 if bad, 1 if successful)
#DOC
#DOC environment variables : $ENV{DATA_CENTER} - Table containing data center infomation
#DOC                         $ENV{LDA} - Set on logging into Oracle
#DOC                         $ENV{LOG_TABLE} - Table containing WFARS_LOG
#DOC                         $ENV{TEMP_DIR} - Temporary directory
#DOC
#DOC calls subroutines : cndc_process
#DOC                     list_dir
#DOC                     make_temp_dir (from 'shared.pl')
#DOC                     netdc_process
#DOC                     oracle_login (from 'shared.pl')
#DOC                     oracle_logout (from 'shared.pl')
#DOC                     oracle_request (from 'shared.pl')
#DOC
#DOC other subroutines : codeco (called by 'cndc_process)
#DOC                     oracle_command (from 'shared.pl', called by 'update_log')
#DOC                     rdseed (called by 'netdc_process')
#DOC                     update_log (called by 'cndc_process' and 'netdc_process')
#DOC                     
#DOC other programs : '/export/home/matt/codeco3/bin/codeco' (called by 'codeco')
#DOC                  '/export/opt1/rdseed/rdseed' (called by 'rdseed')
#DOC                     
#DOC#################################################

use Oraperl;
use Net::FTP;
use LWP::Simple;
use URI::URL;

do '/export/home/matt/wfars/shared.pl';

use strict;

my (@e_mail, @filename, @temp, @temp2);
my (%from_address);
my ($bad, $directory, $filename, $good);
my ($line, $request, $success);

### For RDSEED ###

$ENV{SEED_TOLERANCE} = "10";

#$ENV{QWFARS}      = "/export/home/matt/dummy_wfars/";

$ENV{DATA_CENTER} = "$ENV{SCHEMA}.DATA_CENTER";
$ENV{LOG_TABLE}   = "$ENV{SCHEMA}.WFARS_LOG";

($ENV{TEMP_DIR}) = make_temp_dir();

$good = 0;
$bad  = 0;

### Find list of all files ###

#$directory = $ENV{QWFARS}."/to_do";
$directory = $ENV{QWFARS}."/incoming";
(@filename) = list_dir($directory);


### Login to database ###

($ENV{LDA}) = oracle_login();

$request = "select distinct CENTER, FROM_ADDRESS from $ENV{DATA_CENTER}";
(@temp) = oracle_request($request);

foreach (@temp) {
    @temp2 = split(/::/,$_);
    $from_address{$temp2[0]} = $temp2[1];
}


### Loop over each entry ###

foreach $filename (@filename) {

    #print "$filename\n";

    ### Store e-mail into memory ###
    
    undef @e_mail;
    
    open (FILEIN, "<$directory/$filename");
    while (<FILEIN>) { push (@e_mail, $_); }
    close FILEIN;
    
   
    ### Identify data center ###
    
    foreach $line (@e_mail) {
    
        if ($line =~ /From: /) {
        
            if ($line =~ /$from_address{"CNDC"}/) {
            
                ($success) = cndc_process(@e_mail);
                
                if ($success == 0) {
                    $bad++;
                } else {

                    $good++;
                }
                
            } elsif ($line =~ /$from_address{"IRIS_DMC"}/) {
            
                ($success) = netdc_process(@e_mail);
                
                if ($success == "1") { 
                    $good++; 
                } elsif ($success == "-1") {
                    $bad++; 
                }
            
            } elsif ($line =~ /$from_address{"IRIS_BF"}/) {
            
                ($success) = breqfast_process(@e_mail);
                
                if ($success == "1") { 
                    $good++; 
                } elsif ($success == "-1") {
                    $bad++; 
                }
            
            }
            
            
            last; # Ensure only one e-mail address identified.
    
    
        }
    
    }
    
    
    
    unlink ("$directory/$filename");
    
    

    #foreach (@e_mail) {print "$_"; }
    
    #$e_mail = <STDIN>;
}

print "Good: $good   Bad: $bad\n";

oracle_logout();

exit(0);

#DOC#################################################
#DOC
#DOC subroutine breqfast_process
#DOC
#DOC author - M Evans
#DOC date - October 2003
#DOC
#DOC purpose : Process breqfast e-mail
#DOC
#DOC input : @e_mail - E-mail to be processed
#DOC
#DOC output : -1 if no data, 0 if acknowledgement message, 1 if data
#DOC
#DOC internal : @temp - Used to stor output from 'split' commands
#DOC            $line - Used to loop over each element of @e_mail (when searching for subject)
#DOC            $line2 - Used to loop over each element of @e_mail (when searching for URL) 
#DOC            $reqid - Request ID
#DOC            $test - Output from FTP command
#DOC            $url - URL adress of data file
#DOC            
#DOC environment variables : $ENV{LDA} - Oracle login 
#DOC
#DOC calls subroutines : oracle_login (from 'shared.pl')
#DOC                     oracle_logoff (from 'shared.pl')
#DOC                     rdseed
#DOC                     update_log
#DOC
#DOC#################################################


sub breqfast_process {

    my (@e_mail) = @_;
    my (@temp, $line, $line2, $reqid, $test, $url);

    foreach $line (@e_mail) {
    
        ### Identify what to do ###

        if ($line =~ /ubject: /) {

            if ($line =~ /IRIS received your data request/) {
        
                ### Standard acknowledgement message (ignore!) ###
        
                return(0);
                
                #
                ##
                #
            
            } elsif ($line =~ / requested /) {
        
                foreach $line2 (@e_mail) {
                
                    chomp ($line2);
                    
                    if ($line2 =~ /WFARS\//) {
                    
                        #
                        ##
                        oracle_logout(); # prevent timing out
                        ##
                        #
                        
                        @temp = split(/ +/,$line2);
                        
                        $line2 = "ftp://ftp.iris.washington.edu/pub/userdata/".$temp[0];
                        
                        $url = url("$line2"); 
                        ($test) = get($url) or die "Cannot FTP $line2\n";   # FTP file over
                        
                        #
                        ##
                        ($ENV{LDA}) = oracle_login();
                        ##
                        #
                        
                        ($reqid) = rdseed($line2,$test);
                        
                        if ($reqid ne "-1") {
                            update_log($reqid);
                        } else {
                            return(-1);
                        }

                        return (1);
                        
                    }
                
                }
            
            } elsif ($line =~ /no data/) {
        
                ### Update Log and return "-1" ###
                
                foreach $line2 (@e_mail) {
                
                    chomp ($line2);
                    
                    if ($line2 =~ /Request: /) {
                        @temp = split(/ +/, $line2);
                        update_log($temp[1]);
                        
                        return (-1);
                    }
                
                }
            
            }
            
            #last;
        
        }
      
    }  


}

#DOC#################################################
#DOC
#DOC subroutine cndc_process
#DOC
#DOC author - M Evans
#DOC date - October 2003
#DOC
#DOC purpose : Process data e-mails from CNDC
#DOC
#DOC input : @e_mail - Input e-mail
#DOC
#DOC output : $reqid if successful, 0 if not.
#DOC
#DOC internal : @temp - Used to store output from 'split' commands
#DOC            $chan - Channel coed
#DOC            $check - Counter to see how many channels were successfully processed
#DOC            $ftp - Variable used by FTP processes
#DOC            $ftp_get - URL address
#DOC            $i - Loop variable
#DOC            $j - Loop variable
#DOC            $reqid - Request ID number
#DOC            $sac_file - Location of SAC file made by 'codeco'
#DOC            $size - Size of @e_mail
#DOC            $sta - Station code
#DOC            $temp - Used to loop over elements of @temp
#DOC            $test - File FTPed over
#DOC            $url - URL address
#DOC             
#DOC environment variables : $ENV{LDA} - Oracle login
#DOC
#DOC calls subroutines : codeco
#DOC                     move_sac
#DOC                     oracle_login (from 'shared.pl')
#DOC                     oracle_logout (from 'shared.pl')
#DOC                     update_log
#DOC
#DOC#################################################

sub cndc_process {

    my (@e_mail) = @_;
    
    my (@temp);
    my ($chan, $check, $ftp, $ftp_get);
    my ($i, $j, $reqid, $sac_file, $size);
    my ($sta, $temp, $test, $url);
    
    $size = @e_mail;
    $check = 0;
    
    ### First loop, check for REF_ID ($reqid) value ###
    
    for ($i=0;$i<$size;$i++) {
    
        if ($e_mail[$i] =~ /REF_ID/) {
        
            @temp  = split(/ +/,$e_mail[$i]);
            $reqid = $temp[1];
            
            chomp($reqid);
                    
        }

    }
    
    ### If no $reqid identified, ignore ###
    
    if ($reqid eq "") { return("0"); }
    
    ### Second loop, check to see if data need be FTPed ###
    ###     if so make necessary modifications          ###
    
    for ($i=0;$i<$size;$i++) {
    
        if ($e_mail[$i] =~ /FTP_LOG/) {
            
            @temp  = split(/ +/,$e_mail[$i+1]);
            $ftp_get = "ftp://".$temp[1]."".$temp[3]."/".$temp[4];
            
            chomp($ftp_get);
            
            undef @e_mail;
            
            #
            ##
            oracle_logout(); # prevent timing out
            ##
            #
            
            print "Attempting to FTP $ftp_get\n";
            
            $url = url("$ftp_get"); 
            ($test) = get($url) or die "Cannot FTP $ftp_get\n";   # FTP file over
            
            ### REMOVE FILE!!! ###
            
            print "Attempting to remove $ftp_get\n";

            $ftp = Net::FTP->new("$temp[1]", Debug => 0)
                or die "Cannot connect to $temp[1]";

            $ftp->login("anonymous",'matt@isc.ac.uk')
                or die "Cannot login ", $ftp->message;


            $ftp->delete("$temp[3]/$temp[4]")
                or die "delete command failed ", $ftp->message;

            $ftp->quit;
            
            #
            ##
            ($ENV{LDA}) = oracle_login();
            ##
            #
            
            @temp = split(/WID2 /,$test); # Split into each channel

            $j=0;

            foreach $temp (@temp) {
                if ($j > 0) {
                    $temp = "WID2 ".$temp;
                    push (@e_mail, $temp);
                }
                $j++;
            }
            
            last;
            

        }
        
    }
    
    $size = @e_mail; # FTP overwrites @e_mail #
    
    ### Third, (final) loop for WID2 (Start of data) ###

    for ($i=0;$i<$size;$i++) {
    
       if ($e_mail[$i] =~ "WID2 ") {
        
            @temp = split(/ +/,$e_mail[$i]);
            
            $sta  = $temp[3];
            $chan = $temp[4];
            
            undef @temp;
            
            {
                push (@temp, $e_mail[$i]);
                
                if ($e_mail[$i] =~ "CHK2 ") {
                    last;
                } elsif ($i == $size) {
                    last;
                }
                
                $i++;
                redo;
            } 
            
            
            ($sac_file) = codeco(@temp); # Convert to SAC
            
            move_sac($sac_file, $reqid, $sta, $chan); # Move file over
            
            $check++;
        
        }
    
    }
    
    
    update_log($reqid);
    
    if ($check >= "3") {
        return($reqid);
    } else {
        return(0);
    }


}

#DOC#################################################
#DOC
#DOC subroutine codeco
#DOC
#DOC author - M Evans
#DOC date - October 2003
#DOC
#DOC purpose : Change data in GSE format to SAC files
#DOC
#DOC input : @data - Data in GSE format
#DOC
#DOC output : $location - File in SAC format
#DOC
#DOC internal : @output - Catches any out put from 'mv' command
#DOC            $command - Command that triggers 'codeco'
#DOC            
#DOC environment variables : $ENV{TEMP_DIR} - Temporary directory
#DOC
#DOC calls subroutines : none
#DOC
#DOC other programs : '/export/home/matt/codeco3/bin/codeco'
#DOC                     
#DOC#################################################



sub codeco {

    my (@data) = @_;
    my ($command, $location, @output);
    
    open (TEMPFILE, ">$ENV{TEMP_DIR}/test.file");
    print TEMPFILE @data;
    close TEMPFILE;
    
    $location = "$ENV{TEMP_DIR}/test.out";
    
    $command = "/export/home/matt/codeco3/bin/codeco <<EOF\n$ENV{TEMP_DIR}/test.file\ngse\n$location\nSAC\nEOF\n";
    @output = `$command`;
    
    $location .= ".001";
    
    
    return $location;

}

#DOC#################################################
#DOC
#DOC subroutine list_dir
#DOC
#DOC author - M Evans
#DOC date - October 2003
#DOC
#DOC purpose : List contents of a directory, (excluding '.' and '..')
#DOC
#DOC input : $directory - Directory whose contents are to be listed
#DOC
#DOC output : @output - List of filenames
#DOC
#DOC internal : $filename - Used to loop over each file
#DOC            
#DOC environment variables : none
#DOC
#DOC calls subroutines : none
#DOC                     
#DOC#################################################

sub list_dir {
    
    my ($directory) = @_;
    my (@output, $filename);
    
    opendir (DIRECT, "$directory");
    
    foreach $filename (sort readdir(DIRECT)) {
    
        if ($filename ne ".") {
            if ($filename ne "..") {
                push (@output, $filename);
            }
        }
    
    }
    
    close DIRECT;

    return @output;

}

#DOC#################################################
#DOC
#DOC subroutine move_sac
#DOC
#DOC author - M Evans
#DOC date - October 2003
#DOC
#DOC purpose : Move (and rename) SAC files
#DOC
#DOC input : $sac_file - SAC filename
#DOC         $reqid - Request ID number
#DOC         $sta - Station code
#DOC         $chan - Channel code
#DOC
#DOC output : none
#DOC
#DOC internal : @output - Catches any out put from 'mv' command
#DOC            $command - Move command
#DOC            $request - SQL request 
#DOC            $username - User whom requested data
#DOC            
#DOC environment variables : $ENV{LOG_TABLE} - WFARS log table
#DOC                         $ENV{QWFARS} - Root directory of WFARS files
#DOC
#DOC calls subroutines : oracle_request (from 'shared.pl')
#DOC
#DOC#################################################


sub move_sac {

    my ($sac_file, $reqid, $sta, $chan) = @_;
    my ($request, $username, $command, @output);
    
    
    $request = "select distinct USERNAME from $ENV{LOG_TABLE}
                where REQID = $reqid";
                
    ($username) = oracle_request($request);
        
    $command = "mv $sac_file $ENV{QWFARS}/data/$username/$reqid.$sta.$chan\n";
    @output = `$command`;
    
    chmod(0664,"$ENV{QWFARS}/data/$username/$reqid.$sta.$chan");

    #print "$ENV{QWFARS}/data/$username/$reqid.$sta.$chan\n";

}

#DOC#################################################
#DOC
#DOC subroutine netdc_process
#DOC
#DOC author - M Evans
#DOC date - October 2003
#DOC
#DOC purpose : Process NetDC e-mail
#DOC
#DOC input : @e_mail - E-mail to be processed
#DOC
#DOC output : -1 if no data, 0 if acknowledgement message, 1 if data
#DOC
#DOC internal : @temp - Used to stor output from 'split' commands
#DOC            $line - Used to loop over each element of @e_mail (when searching for subject)
#DOC            $line2 - Used to loop over each element of @e_mail (when searching for URL) 
#DOC            $reqid - Request ID
#DOC            $test - Output from FTP command
#DOC            $url - URL adress of data file
#DOC            
#DOC environment variables : $ENV{LDA} - Oracle login 
#DOC
#DOC calls subroutines : oracle_login (from 'shared.pl')
#DOC                     oracle_logoff (from 'shared.pl')
#DOC                     rdseed
#DOC                     update_log
#DOC
#DOC#################################################


sub netdc_process {

    my (@e_mail) = @_;
    my (@temp, $line, $line2, $reqid, $test, $url);

    foreach $line (@e_mail) {
    
        ### Identify what to do ###

        if ($line =~ /ubject: /) {

            if ($line =~ /NETDC Request Received/) {
        
                ### Standard acknowledgement message (ignore!) ###
        
                return(0);
                
                #
                ##
                #
            
            } elsif ($line =~ /DATA SHIPMENT/) {
        
                foreach $line2 (@e_mail) {
                
                    chomp ($line2);
                    
                    if ($line2 =~ /ftp:/) {
                    
                        #
                        ##
                        oracle_logout(); # prevent timing out
                        ##
                        #
                    
                        $url = url("$line2"); 
                        ($test) = get($url) or die "Cannot FTP $line2\n";   # FTP file over
                        
                        #
                        ##
                        ($ENV{LDA}) = oracle_login();
                        ##
                        #
                        
                        ($reqid) = rdseed($line2,$test);
                        
                        if ($reqid ne "-1") {
                            update_log($reqid);
                        } else {
                            return(-1);
                        }

                        return (1);
                        
                    }
                
                }
            
            } elsif ($line =~ /NO DATA/) {
        
                ### Update Log and return "-1" ###
                
                foreach $line2 (@e_mail) {
                
                    chomp ($line2);
                    
                    if ($line2 =~ /label: /) {
                        @temp = split(/ +/, $line2);
                        update_log($temp[4]);
                        
                        return (-1);
                    }
                
                }
            
            }
            
            #last;
        
        }
      
    }  


}

#DOC#################################################
#DOC
#DOC subroutine rdseed
#DOC
#DOC author - M Evans
#DOC date - October 2003
#DOC
#DOC purpose : Obtain SAC files from SEED files using 'rdseed' from IRIS.
#DOC           NOTE: Local version of rdseed has been modified to prevent
#DOC           'time torn' data. See main documentation for details.
#DOC
#DOC input : $ftpname - SEED file FTPed from data center
#DOC         $data - SEED file (mixed ASCII/binary format)
#DOC
#DOC output : $reqid - Request ID number (recovered from filename)
#DOC
#DOC internal : @filename - List of created SAC files
#DOC            @response - Contents of SEED file using internal 't' option
#DOC            @temp - Used to identify $sta and $chan from filename
#DOC            @temp2 - Used to identify $sta and $chan from filename
#DOC            %stats - Hash identifying number of traces per station 
#DOC            $chan - FDSN channel code
#DOC            $file - Used to loop over each element of @filename
#DOC            $key - Key for %stats
#DOC            $line - Used to loop over each element of @response
#DOC            $sta - Station code
#DOC            $sta_list - List of OKed station code seperated by spaces
#DOC            
#DOC environment variables : $ENV{TEMP_DIR} - Temporary directory
#DOC
#DOC calls subroutines : move_sac
#DOC
#DOC other programs : '/export/opt1/rdseed/rdseed'
#DOC                     
#DOC#################################################

sub rdseed {

    my ($ftpname, $data) = @_;
    my ($reqid);
    
    my (@filename, @response, @temp, @temp2); 
    my (%stats);
    my ($chan, $file, $key, $line, $sta, $sta_list);
    
    unlink <$ENV{TEMP_DIR}/*.SAC>;
    
    undef %stats;
    
    $sta_list = "";
    
    ### Write SEED file into temporary directory ###
    
    open (TEMPFILE, ">$ENV{TEMP_DIR}/test.file");
    print TEMPFILE $data;
    close TEMPFILE;
    
    
    ### Write INPUT FILE ###
    
    open (INPUT, ">$ENV{TEMP_DIR}/input.txt");
    print INPUT "test.file\n\n\nt\nquit\n";
    close INPUT;

    system("cd $ENV{TEMP_DIR};/export/opt1/rdseed/rdseed < input.txt > t.file");
    
    ### Identify suitable channels ###

    open (TFILE, "<$ENV{TEMP_DIR}/t.file");
    while (<TFILE>) { push (@response, $_); }
    close TFILE;
    
    foreach $line (@response) {
    
        print "$line";
    
        if (substr($line,0,3) eq "   ") {
        
            @temp = split(/ +/,$line);
            $key = $temp[2];
            
            if (substr($line,21,2) eq "00") {
                $stats{$key}++;
            } elsif (substr($line,21,2) eq "  ") {
                $stats{$key}++;
            }
        
        }
    
    
    }
    
    print "\n";
    
    foreach $key (keys(%stats)) {
    
        print "$stats{$key} $key\n";
        
        if ($stats{$key} == "3") { 
            $sta_list .= "$key ";
        }
    
    
    }
    
    if ($sta_list eq "") { return(-1); }
    
    chop($sta_list); # Get rid of trailing space
    
    print "STA_LIST: $sta_list\n";
    
    ### Extract SAC files ###

    open (INPUT2, ">$ENV{TEMP_DIR}/input2.txt");
    print INPUT2 "test.file\n\n\nd\n\n$sta_list\n\n\n-- 00\n\n\n\n\n\n\n\n\nquit\n";
    close INPUT2;
    
    system("cd $ENV{TEMP_DIR};/export/opt1/rdseed/rdseed < input2.txt > junk.txt");
    
    @temp  = split(/\//,$ftpname);
    @temp2 = split(/\./,$temp[-1]);
    
    $reqid = $temp2[0];
    
    print "$reqid\n";
    
    ### Find list of SAC files ###

    @filename = `ls $ENV{TEMP_DIR}/*.SAC`;
    
    foreach $file (@filename) {
    
        chomp ($file);
        
        @temp  = split(/\//,$file);
        @temp2 = split(/\./,$temp[-1]);
        $sta   = $temp2[7];
        $chan  = $temp2[9];
        
        print "--> $reqid $sta $chan $file\n";
        move_sac($file, $reqid, $sta, $chan);
    
    }
    
    return ($reqid);    

}

#DOC#################################################
#DOC
#DOC subroutine update_log
#DOC
#DOC author - M Evans
#DOC date - October 2003
#DOC
#DOC purpose : Update WFARS log table
#DOC
#DOC input : $reqid - Request ID number
#DOC
#DOC output : none
#DOC
#DOC internal : $command - SQL command
#DOC            
#DOC environment variables : $ENV{LOG_TABLE} - WFARS log table
#DOC
#DOC calls subroutines : oracle_command (from 'shared.pl')
#DOC
#DOC#################################################


sub update_log {

    my ($reqid) = @_;
    my ($command);
    
    $command = "update $ENV{LOG_TABLE} set RECDATE = SYSDATE
                where REQID = '$reqid'";
                
    oracle_command($command);
    
    print "$reqid\n";

}

#DOC#
#DOC##
#DOC### EOF
#DOC##
#DOC#
