Skip to content

Instantly share code, notes, and snippets.

@avrilcoghlan
Last active June 19, 2022 12:20
Show Gist options
  • Save avrilcoghlan/4080983 to your computer and use it in GitHub Desktop.
Save avrilcoghlan/4080983 to your computer and use it in GitHub Desktop.

Revisions

  1. avrilcoghlan revised this gist Feb 28, 2013. 1 changed file with 386 additions and 185 deletions.
    571 changes: 386 additions & 185 deletions run_genewisedb_afterblast.pl
    Original file line number Diff line number Diff line change
    @@ -45,7 +45,8 @@ =head1 CONTACT
    # Perl script run_genewisedb_afterblast.pl
    # Written by Avril Coghlan ([email protected])
    # 8-Nov-12.
    # Last edited 8-Nov-2012.
    # Last edited 27-Nov-2012.
    # SCRIPT SYNOPSIS: run_genewisedb_afterblast.pl: run GeneWise using TreeFam HMMs, in scaffold regions that have blast matches to the proteins used to build the HMMs.
    #
    #------------------------------------------------------------------#

    @@ -133,6 +134,8 @@ =head1 CONTACT
    # TEST SUBROUTINES:

    my $PRINT_TEST_DATA = 0; # SAYS WHETHER TO PRINT DATA USED DURING TESTING.
    &test_update_feature_types_in_gff($outputdir);
    &test_update_genewise_output($outputdir);
    &test_make_seqfile($outputdir);
    &test_check_if_have_hmm_for_family($outputdir);
    &test_print_error;
    @@ -142,8 +145,8 @@ =head1 CONTACT
    &test_run_genewise_on_scaffolds($outputdir,$blast_path);
    &test_find_pos_of_hmms($outputdir);
    &test_check_formatdb_files($outputdir);
    &test_update_genewise_output($outputdir);
    &test_find_pos_of_scaffolds($outputdir);
    print STDERR "Tests done, running main code...\n";

    #------------------------------------------------------------------#

    @@ -176,30 +179,185 @@ sub run_main_program
    my $TAKE_FAMILY; # HASH TABLE THAT SAYS WHETHER TO TAKE A PARTICULAR FAMILY

    # RECORD THE POSITIONS OF THE HMMs IN $input_hmms:
    print STDERR "Reading positions of the HMMs in $input_hmms...\n";
    ($hmm_pos_file,$TAKE_FAMILY,$errorcode,$errormsg) = &find_pos_of_hmms($input_hmms,$outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }

    # RECORD THE POSITION OF THE SCAFFOLDS IN $input_fasta:
    print STDERR "Reading the positions of the scaffolds in $input_fasta...\n";
    ($scaffold_pos_file,$errorcode,$errormsg) = &find_pos_of_scaffolds($input_fasta,$outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }

    # RUN GENEWISE WITH ONE SCAFFOLD/CONTIG AT A TIME, ON ONE HMM AT A TIME:
    print STDERR "Running genewise on scaffolds to make output file $outputdir/$output...\n";
    ($errorcode,$errormsg) = &run_genewise_on_scaffolds($outputdir,$input_fasta,$input_hmms,$output,$spliceflat,$parameterfile,$treefam_seqs,
    $evalue_cutoff,$flank_length,0,$hmm_pos_file,$blast_path,$scaffold_pos_file,$TAKE_FAMILY);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }

    print STDERR "Post-processing genewise output file $outputdir/$output...\n";
    # REPLACE 'match' IN THE FEATURE COLUMN WITH 'gene', AND 'cds' WITH 'CDS':
    ($errorcode,$errormsg) = &update_feature_types_in_gff($output,$outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }

    # DELETE TEMPORARY FILES:
    system "rm -f $hmm_pos_file";
    }

    #------------------------------------------------------------------#

    # TEST &update_feature_types_in_gff

    sub test_update_feature_types_in_gff
    {
    my $outputdir = $_[0]; # DIRECTORY FOR PUTTING OUTPUT FILES INTO
    my $output; # OUTPUT GFF FILE
    my $errorcode; # RETURNED AS 0 BY A FUNCTION IF THERE IS NO ERROR
    my $errormsg; # RETURNED AS 'none' BY A FUNCTION IF THERE IS NO ERROR
    my $expected_output; # FILE WITH THE EXPECTED CONTENTS OF $output
    my $differences; # DIFFERENCES BETWEEN $output AND $expected_output
    my $length_differences; # LENGTH OF $differences
    my $line; #

    ($output,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(OUTPUT,">$output") || die "ERROR: test_update_feature_types_in_gff: cannot open $output\n";
    print OUTPUT "TF101001\n";
    print OUTPUT "V:15395346..15397430 GeneWise match 2 1681 514.02 + . ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print OUTPUT "V:15395346..15397430 GeneWise cds 2 11 0.00 + 0 ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print OUTPUT "V:15395346..15397430 GeneWise intron 12 342 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print OUTPUT "V:15395346..15397430 GeneWise cds 343 396 0.00 + 2 ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print OUTPUT "V:15395346..15397430 GeneWise intron 397 448 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print OUTPUT "V:15395346..15397430 GeneWise cds 449 543 0.00 + 2 ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print OUTPUT "V:15395346..15397430 GeneWise intron 544 594 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print OUTPUT "V:15395346..15397430 GeneWise cds 595 1017 0.00 + 0 ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print OUTPUT "V:15395346..15397430 GeneWise intron 1018 1066 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print OUTPUT "V:15395346..15397430 GeneWise cds 1067 1560 0.00 + 0 ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print OUTPUT "V:15395346..15397430 GeneWise intron 1561 1605 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print OUTPUT "V:15395346..15397430 GeneWise cds 1606 1681 0.00 + 1 ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print OUTPUT "V:15395346..15397430 GeneWise match 37437 63766 -315.16 + . ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print OUTPUT "V:15395346..15397430 GeneWise cds 37437 62083 0.00 + 0 ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print OUTPUT "V:15395346..15397430 GeneWise intron 62084 62744 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print OUTPUT "V:15395346..15397430 GeneWise cds 62745 63102 0.00 + 1 ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print OUTPUT "V:15395346..15397430 GeneWise intron 63103 63151 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print OUTPUT "V:15395346..15397430 GeneWise cds 63152 63645 0.00 + 0 ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print OUTPUT "V:15395346..15397430 GeneWise intron 63646 63690 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print OUTPUT "V:15395346..15397430 GeneWise cds 63691 63766 0.00 + 1 ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print OUTPUT "V:15395346..15397430 GeneWise match 64165 64167 0.12 - . ID=V:15395346..15397430-TF101001-genewise-prediction-3\n";
    print OUTPUT "V:15395346..15397430 GeneWise cds 64165 64167 0.00 - 0 ID=V:15395346..15397430-TF101001-genewise-prediction-3\n";
    print OUTPUT "//\n";
    close(OUTPUT);
    ($errorcode,$errormsg) = &update_feature_types_in_gff($output,$outputdir);
    if ($errorcode != 0) { print STDERR "ERROR: test_update_feature_types_in_gff: failed test1\n"; exit;}
    ($expected_output,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(EXPECTED,">$expected_output") || die "ERROR: test_update_feature_types_in_gff: cannot open $expected_output\n";
    print EXPECTED "TF101001\n";
    print EXPECTED "V:15395346..15397430 GeneWise gene 2 1681 514.02 + . ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise CDS 2 11 0.00 + 0 ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 12 342 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise CDS 343 396 0.00 + 2 ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 397 448 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise CDS 449 543 0.00 + 2 ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 544 594 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise CDS 595 1017 0.00 + 0 ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 1018 1066 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise CDS 1067 1560 0.00 + 0 ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 1561 1605 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise CDS 1606 1681 0.00 + 1 ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise gene 37437 63766 -315.16 + . ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise CDS 37437 62083 0.00 + 0 ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 62084 62744 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise CDS 62745 63102 0.00 + 1 ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 63103 63151 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise CDS 63152 63645 0.00 + 0 ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 63646 63690 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise CDS 63691 63766 0.00 + 1 ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise gene 64165 64167 0.12 - . ID=V:15395346..15397430-TF101001-genewise-prediction-3\n";
    print EXPECTED "V:15395346..15397430 GeneWise CDS 64165 64167 0.00 - 0 ID=V:15395346..15397430-TF101001-genewise-prediction-3\n";
    print EXPECTED "//\n";
    close(EXPECTED);
    $differences = "";
    open(TEMP,"diff $expected_output $output |");
    while(<TEMP>)
    {
    $line = $_;
    $differences = $differences.$line;
    }
    close(TEMP);
    $length_differences = length($differences);
    if ($length_differences != 0) { print STDERR "ERROR: test_update_feature_types_in_gff: failed test1 (output $output expected_output $expected_output)\n"; exit;}
    system "rm -f $expected_output";
    system "rm -f $output";
    }

    #------------------------------------------------------------------#

    # REPLACE 'match' IN THE FEATURE COLUMN WITH 'gene', AND 'cds' WITH 'CDS':
    # THIS IS REQUIRED TO USE THE GENEWISE OUTPUT AS INPUT TO MAKER (WHICH REQUIRES gene/CDS FEATURE TYPES):

    sub update_feature_types_in_gff
    {
    my $output = $_[0]; # NAME OF THE OUTPUT GFF FILE
    my $outputdir = $_[1]; # DIRECTORY TO PUT OUTPUT FILES INTO
    my $errorcode = 0; # RETURNED AS 0 IF THERE IS NO ERROR
    my $errormsg = "none";# RETURNED AS 'none' IF THERE IS NO ERROR
    my $cmd; # COMMAND TO RUN

    # REPLACE 'cds' WITH 'CDS' IN THE GFF FILE:
    $cmd = "sed -i \'s/cds/CDS/g\' $output";
    system "$cmd";
    sleep(60); # FORCE PERL TO WAIT UNTIL THE 'sed' JOB IS COMPLETE (THIS IS NECESSARY)

    # REPLACE 'match' WITH 'gene' IN THE GFF FILE:
    $cmd = "sed -i \'s/match/gene/g\' $output";
    system "$cmd";
    sleep(60); # FORCE PERL TO WAIT UNTIL THE 'sed' JOB IS COMPLETE (THIS IS NECESSARY)

    return($errorcode,$errormsg);

    }

    #------------------------------------------------------------------#

    # SUBROUTINE TO MAKE A FILE NAME FOR A TEMPORARY FILE:

    sub make_filename
    {
    my $outputdir = $_[0]; # OUTPUT DIRECTORY TO WRITE TEMPORARY FILE NAME TO
    my $found_name = 0; # SAYS WHETHER WE HAVE FOUND A FILE NAME YET
    my $filename = "none";# NEW FILE NAME TO USE
    my $errorcode = 0; # RETURNED AS 0 IF THERE IS NO ERROR
    my $errormsg = "none";# RETURNED AS 'none' IF THERE IS NO ERROR
    my $poss_filename; # POSSIBLE FILE NAME TO USE
    my $random_number; # RANDOM NUMBER TO USE IN TEMPORARY FILE NAME

    while($found_name == 0)
    {
    $random_number = rand();
    $poss_filename = $outputdir."/tmp".$random_number;
    if (!(-e $poss_filename))
    {
    $filename = $poss_filename;
    $found_name = 1;
    }
    }
    if ($found_name == 0 || $filename eq 'none')
    {
    $errormsg = "ERROR: make_filename: found_name $found_name filename $filename\n";
    $errorcode = 6; # ERRORCODE=6
    return($filename,$errorcode,$errormsg);
    }

    return($filename,$errorcode,$errormsg);
    }

    #------------------------------------------------------------------#

    # TEST &find_pos_of_hmms

    sub test_find_pos_of_hmms
    {
    my $outputdir = $_[0]; # DIRECTORY FOR WRITING OUTPUT FILES IN
    my $random_number; # RANDOM NUMBER TO USE IN TEMPORARY FILE NAMES
    my $input_hmmfile; # INPUT FILE OF HMMs
    my $errorcode; # RETURNED AS 0 BY A FUNCTION IF THERE IS NO ERROR
    my $errormsg; # RETURNED AS 'none' BY A FUNCTION IF THERE IS NO ERROR
    @@ -210,8 +368,8 @@ sub test_find_pos_of_hmms
    my $line; #
    my $TAKE_FAMILY; # HASH TABLE THAT SAYS WHETHER TO TAKE A PARTICULAR FAMILY

    $random_number = rand();
    $input_hmmfile = $outputdir."/tmp".$random_number;
    ($input_hmmfile,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(HMMFILE,">$input_hmmfile") || die "ERROR: test_find_pos_of_hmms: cannot open $input_hmmfile\n";
    print HMMFILE "HMMER2.0 [2.3.2]\n";
    print HMMFILE "NAME TF101093.0\n";
    @@ -240,8 +398,8 @@ sub test_find_pos_of_hmms
    !(defined($TAKE_FAMILY->{"TF101095"})) || !(defined($TAKE_FAMILY->{"TF101096"})) ||
    !(defined($TAKE_FAMILY->{"TF101097"})) || !(defined($TAKE_FAMILY->{"TF101098"})) ||
    !(defined($TAKE_FAMILY->{"TF101099"})) ) { print STDERR "ERROR: test_find_pos_of_hmms: failed test1\n"; exit;}
    $random_number = rand();
    $expected_hmm_pos_file = $outputdir."/tmp".$random_number;
    ($expected_hmm_pos_file,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(EXPECTED,">$expected_hmm_pos_file") || die "ERROR: test_find_pos_of_hmms: cannot open $expected_hmm_pos_file\n";
    print EXPECTED "TF101093 3\n";
    print EXPECTED "TF101094 6\n";
    @@ -275,7 +433,6 @@ sub test_find_pos_of_hmms
    sub test_find_pos_of_scaffolds
    {
    my $outputdir = $_[0]; ## DIRECTORY FOR WRITING OUTPUT FILES IN
    my $random_number; ## RANDOM NUMBER TO USE IN TEMPORARY FILE NAMES
    my $input_fasta; ## INPUT FILE OF SCAFFOLDS
    my $errorcode; ## RETURNED AS 0 BY A FUNCTION IF THERE IS NO ERROR
    my $errormsg; ## RETURNED AS 'none' BY A FUNCTION IF THERE IS NO ERROR
    @@ -285,8 +442,8 @@ sub test_find_pos_of_scaffolds
    my $length_differences; ## LENGTH OF $differences
    my $line; ##

    $random_number = rand();
    $input_fasta = $outputdir."/tmp".$random_number;
    ($input_fasta,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(FASTA,">$input_fasta") || die "ERROR: test_find_pos_of_scaffolds: cannot open $input_fasta\n";
    print FASTA ">scaffold2\n";
    print FASTA "ABCDEFG\n";
    @@ -301,8 +458,8 @@ sub test_find_pos_of_scaffolds
    close(FASTA);
    ($scaffold_pos_file,$errorcode,$errormsg) = &find_pos_of_scaffolds($input_fasta,$outputdir);
    if ($errorcode != 0) { print STDERR "ERROR: test_find_pos_of_scaffolds: failed test1\n"; exit;}
    $random_number = rand();
    $expected_scaffold_pos_file = $outputdir."/tmp".$random_number;
    ($expected_scaffold_pos_file,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(EXPECTED,">$expected_scaffold_pos_file") || die "ERROR: test_find_pos_of_scaffolds: cannot open $expected_scaffold_pos_file\n";
    print EXPECTED "scaffold2 2\n";
    print EXPECTED "scaffold3 5\n";
    @@ -341,12 +498,11 @@ sub find_pos_of_scaffolds
    my $line_no; ## LINE NUMBER THAT THE NAME OF A SCAFFOLD APPEARS ON
    my $scaffold; ## SCAFFOLD IN $input_fasta
    my $prev_scaffold = "none";## PREVIOUS SCAFFOLD IN $input_fasta
    my $random_number; ## RANDOM NUMBER TO USE IN TEMPORARY FILE NAME
    my $i; ##

    # OPEN A FILE TO PUT THE POSITIONS OF SCAFFOLDS IN $input_fasta:
    $random_number = rand();
    $scaffold_pos_file = $outputdir."/tmp".$random_number;
    ($scaffold_pos_file,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(SCAFFOLD_POS,">$scaffold_pos_file") || die "ERROR: find_pos_of_scaffolds: cannot open $scaffold_pos_file\n";

    # FIND THE LINES THAT SEQUENCES START ON:
    @@ -401,12 +557,11 @@ sub find_pos_of_hmms
    my $line_no; # LINE NUMBER THAT THE NAME OF A HMM APPEARS ON
    my $family; # TREEFAM FAMILY FOR A HMM IN $input_hmms
    my $prev_family = "none";# TREEFAM FAMILY FOR PREVIOUS HMM IN $input_hmms
    my $random_number; # RANDOM NUMBER TO USE IN TEMPORARY FILE NAME
    my %TAKE_FAMILY = (); # HASH TABLE THAT SAYS WHETHER TO TAKE A PARTICULAR FAMILY

    # OPEN A FILE TO PUT THE POSITIONS OF HMMs IN $input_hmms:
    $random_number = rand();
    $hmm_pos_file = $outputdir."/tmp".$random_number;
    ($hmm_pos_file,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(HMM_POS,">$hmm_pos_file") || die "ERROR: find_pos_of_hmms: cannot open $hmm_pos_file\n";

    # FIND THE LINES THAT HMMs END ON:
    @@ -462,7 +617,6 @@ sub test_run_genewise_on_scaffolds
    my $input_hmms; # INPUT FILE OF HMMs
    my $fam_seqs; # INPUT FILE WITH SEQUENCES IN FAMILIES.
    my $output; # OUTPUT FILE FOR GENEWISE OUTPUT
    my $random_number; # RANDOM NUMBER TO USE IN TEMPORARY FILE NAMES
    my $expected_output; # FILE CONTAINING THE EXPECTED CONTENT OF $output
    my $differences; # DIFFERENCES BETWEEN $output AND $expected_output
    my $length_differences; # LENGTH OF $differences
    @@ -473,13 +627,16 @@ sub test_run_genewise_on_scaffolds
    my $i; #
    my $scaffold_pos_file; # FILE WITH POSITIONS OF SCAFFOLDS IN $input_fasta
    my $TAKE_FAMILY; # HASH TABLE THAT SAYS WHETHER TO TAKE A FAMILY
    my @temp; #

    # DEFINE THE OUTPUT FILE NAME FOR GENEWISE OUTPUT:
    $random_number = rand();
    $output = "tmp".$random_number;
    ($output,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    @temp = split(/\//,$output);
    $output = $temp[$#temp];
    # WRITE A FILE WITH THE INPUT SEQUENCES IN THE FAMILY:
    $random_number = rand();
    $fam_seqs = $outputdir."/tmp".$random_number;
    ($fam_seqs,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(FAM_SEQS,">$fam_seqs") || die "ERROR: test_run_genewise_on_scaffolds: cannot open $fam_seqs\n";
    print FAM_SEQS "TF101001\n";
    print FAM_SEQS ">At1g16330.1\n";
    @@ -537,8 +694,8 @@ sub test_run_genewise_on_scaffolds
    print FAM_SEQS "#END\n";
    close(FAM_SEQS);
    # WRITE AN INPUT FASTA FILE:
    $random_number = rand();
    $input_fasta = $outputdir."/tmp".$random_number;
    ($input_fasta,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(INPUT_FASTA,">$input_fasta") || die "ERROR: test_run_genewise_on_scaffolds: cannot open $input_fasta\n";
    print INPUT_FASTA ">V:15395346..15397430\n";
    print INPUT_FASTA "gaatgttattttcttatcaataagcacattttctatcaaacccagttcactttcaaattt\n";
    @@ -617,8 +774,8 @@ sub test_run_genewise_on_scaffolds
    print INPUT_FASTA "cttgatcggcgcaactttacaagcggtttttgtccttacagtgct\n";
    close(INPUT_FASTA);
    # WRITE AN INPUT FILE OF HMMs:
    $random_number = rand();
    $input_hmms = $outputdir."/tmp".$random_number;
    ($input_hmms,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(INPUT_HMMS,">$input_hmms") || die "ERROR: test_run_genewise_on_scaffolds: cannot open $input_hmms\n";
    print INPUT_HMMS "HMMER2.0 [2.3.2]\n";
    print INPUT_HMMS "NAME TF101001\n";
    @@ -1978,32 +2135,32 @@ sub test_run_genewise_on_scaffolds
    $blast_path,$scaffold_pos_file,$TAKE_FAMILY);
    if ($errorcode != 0) { print STDERR "ERROR: test_run_genewise_on_scaffolds: failed test1\n"; exit;}
    # WRITE A FILE WITH THE EXPECTED OUTPUT:
    $random_number = rand();
    $expected_output = $outputdir."/tmp".$random_number;
    ($expected_output,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(EXPECTED,">$expected_output") || die "ERROR: test_run_genewise_on_scaffolds: cannot open $expected_output\n";
    print EXPECTED "TF101001\n";
    print EXPECTED "V:15395346..15397430 GeneWise match 2 1681 514.02 + . V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise cds 2 11 0.00 + 0 V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 12 342 0.00 + . V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise cds 343 396 0.00 + 2 V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 397 448 0.00 + . V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise cds 449 543 0.00 + 2 V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 544 594 0.00 + . V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise cds 595 1017 0.00 + 0 V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 1018 1066 0.00 + . V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise cds 1067 1560 0.00 + 0 V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 1561 1605 0.00 + . V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise cds 1606 1681 0.00 + 1 V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise match 37437 63766 -315.16 + . V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise cds 37437 62083 0.00 + 0 V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 62084 62744 0.00 + . V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise cds 62745 63102 0.00 + 1 V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 63103 63151 0.00 + . V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise cds 63152 63645 0.00 + 0 V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 63646 63690 0.00 + . V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise cds 63691 63766 0.00 + 1 V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise match 64167 64165 0.12 - . V:15395346..15397430-TF101001-genewise-prediction-3\n";
    print EXPECTED "V:15395346..15397430 GeneWise cds 64167 64165 0.00 - 0 V:15395346..15397430-TF101001-genewise-prediction-3\n";
    print EXPECTED "V:15395346..15397430 GeneWise match 2 1681 514.02 + . ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise cds 2 11 0.00 + 0 ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 12 342 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise cds 343 396 0.00 + 2 ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 397 448 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise cds 449 543 0.00 + 2 ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 544 594 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise cds 595 1017 0.00 + 0 ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 1018 1066 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise cds 1067 1560 0.00 + 0 ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 1561 1605 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise cds 1606 1681 0.00 + 1 ID=V:15395346..15397430-TF101001-genewise-prediction-1\n";
    print EXPECTED "V:15395346..15397430 GeneWise match 37437 63766 -315.16 + . ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise cds 37437 62083 0.00 + 0 ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 62084 62744 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise cds 62745 63102 0.00 + 1 ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 63103 63151 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise cds 63152 63645 0.00 + 0 ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise intron 63646 63690 0.00 + . ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise cds 63691 63766 0.00 + 1 ID=V:15395346..15397430-TF101001-genewise-prediction-2\n";
    print EXPECTED "V:15395346..15397430 GeneWise match 64165 64167 0.12 - . ID=V:15395346..15397430-TF101001-genewise-prediction-3\n";
    print EXPECTED "V:15395346..15397430 GeneWise cds 64165 64167 0.00 - 0 ID=V:15395346..15397430-TF101001-genewise-prediction-3\n";
    print EXPECTED "//\n";
    close(EXPECTED);
    $differences = "";
    @@ -2036,11 +2193,13 @@ sub test_run_genewise_on_scaffolds
    system "rm -f $formatdbfile";

    # DEFINE THE OUTPUT FILE NAME FOR GENEWISE OUTPUT:
    $random_number = rand();
    $output = "tmp".$random_number;
    ($output,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    @temp = split(/\//,$output);
    $output = $temp[$#temp];
    # WRITE A FILE WITH THE INPUT SEQUENCES IN THE FAMILY:
    $random_number = rand();
    $fam_seqs = $outputdir."/tmp".$random_number;
    ($fam_seqs,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(FAM_SEQS,">$fam_seqs") || die "ERROR: test_run_genewise_on_scaffolds: cannot open $fam_seqs\n";
    print FAM_SEQS "TF101526\n";
    print FAM_SEQS ">At1g29550.1\n";
    @@ -2335,8 +2494,8 @@ sub test_run_genewise_on_scaffolds
    close(FAM_SEQS);

    # WRITE AN INPUT FASTA FILE:
    $random_number = rand();
    $input_fasta = $outputdir."/tmp".$random_number;
    ($input_fasta,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(INPUT_FASTA,">$input_fasta") || die "ERROR: test_run_genewise_on_scaffolds: cannot open $input_fasta\n";
    print INPUT_FASTA ">scaffold2.size1663701.3.1253264-1653727\n";
    print INPUT_FASTA "TTTCCCTTCTCTATGAAAGCTTTGATATTGAGCTTTTGTCAATGATGAATGATCCCTTAA\n";
    @@ -3927,8 +4086,8 @@ sub test_run_genewise_on_scaffolds
    print INPUT_FASTA "T\n";
    close(INPUT_FASTA);
    # WRITE AN INPUT FILE OF HMMs:
    $random_number = rand();
    $input_hmms = $outputdir."/tmp".$random_number;
    ($input_hmms,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(INPUT_HMMS,">$input_hmms") || die "ERROR: test_run_genewise_on_scaffolds: cannot open $input_hmms\n";
    print INPUT_HMMS "HMMER2.0 [2.3.2]\n";
    print INPUT_HMMS "NAME TF105015\n";
    @@ -7182,46 +7341,46 @@ sub test_run_genewise_on_scaffolds
    $blast_path,$scaffold_pos_file,$TAKE_FAMILY);
    if ($errorcode != 0) { print STDERR "ERROR: test_run_genewise_on_scaffolds: failed test2\n"; exit;}
    # WRITE A FILE WITH THE EXPECTED OUTPUT:
    $random_number = rand();
    $expected_output = $outputdir."/tmp".$random_number;
    ($expected_output,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(EXPECTED,">$expected_output") || die "ERROR: test_run_genewise_on_scaffolds: cannot open $expected_output\n";
    print EXPECTED "TF101526\n";
    print EXPECTED "scaffold2.size1663701.3.1253264-1653727 GeneWise match 2 4 -17.91 + . scaffold2.size1663701.3.1253264-1653727-TF101526-genewise-prediction-1\n";
    print EXPECTED "scaffold2.size1663701.3.1253264-1653727 GeneWise cds 2 4 0.00 + 0 scaffold2.size1663701.3.1253264-1653727-TF101526-genewise-prediction-1\n";
    print EXPECTED "scaffold2.size1663701.3.1253264-1653727 GeneWise match 7 15 2.43 + . scaffold2.size1663701.3.1253264-1653727-TF101526-genewise-prediction-2\n";
    print EXPECTED "scaffold2.size1663701.3.1253264-1653727 GeneWise cds 7 15 0.00 + 0 scaffold2.size1663701.3.1253264-1653727-TF101526-genewise-prediction-2\n";
    print EXPECTED "scaffold5.size1039674 GeneWise match 2 7 -16.98 + . scaffold5.size1039674-TF101526-genewise-prediction-1\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 2 7 0.00 + 0 scaffold5.size1039674-TF101526-genewise-prediction-1\n";
    print EXPECTED "scaffold5.size1039674 GeneWise match 9 231 -14.55 + . scaffold5.size1039674-TF101526-genewise-prediction-2\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 9 43 0.00 + 0 scaffold5.size1039674-TF101526-genewise-prediction-2\n";
    print EXPECTED "scaffold5.size1039674 GeneWise intron 44 58 0.00 + . scaffold5.size1039674-TF101526-genewise-prediction-2\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 59 122 0.00 + 1 scaffold5.size1039674-TF101526-genewise-prediction-2\n";
    print EXPECTED "scaffold5.size1039674 GeneWise intron 123 189 0.00 + . scaffold5.size1039674-TF101526-genewise-prediction-2\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 190 231 0.00 + 0 scaffold5.size1039674-TF101526-genewise-prediction-2\n";
    print EXPECTED "scaffold5.size1039674 GeneWise match 44998 44996 2.11 - . scaffold5.size1039674-TF101526-genewise-prediction-3\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 44998 44996 0.00 - 0 scaffold5.size1039674-TF101526-genewise-prediction-3\n";
    print EXPECTED "scaffold5.size1039674 GeneWise match 11764 9587 94.51 - . scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 11764 11737 0.00 - 0 scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise intron 11736 11099 0.00 - . scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 11098 10994 0.00 - 2 scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise intron 10993 10932 0.00 - . scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 10931 10857 0.00 - 2 scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise intron 10856 10620 0.00 - . scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 10619 10543 0.00 - 2 scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise intron 10542 10419 0.00 - . scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 10418 10284 0.00 - 0 scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise intron 10283 9915 0.00 - . scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 9914 9778 0.00 - 0 scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise intron 9777 9761 0.00 - . scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 9760 9734 0.00 - 1 scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise intron 9733 9597 0.00 - . scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 9596 9587 0.00 - 1 scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold2.size1663701.3.1253264-1653727 GeneWise match 2 4 -17.91 + . ID=scaffold2.size1663701.3.1253264-1653727-TF101526-genewise-prediction-1\n";
    print EXPECTED "scaffold2.size1663701.3.1253264-1653727 GeneWise cds 2 4 0.00 + 0 ID=scaffold2.size1663701.3.1253264-1653727-TF101526-genewise-prediction-1\n";
    print EXPECTED "scaffold2.size1663701.3.1253264-1653727 GeneWise match 7 15 2.43 + . ID=scaffold2.size1663701.3.1253264-1653727-TF101526-genewise-prediction-2\n";
    print EXPECTED "scaffold2.size1663701.3.1253264-1653727 GeneWise cds 7 15 0.00 + 0 ID=scaffold2.size1663701.3.1253264-1653727-TF101526-genewise-prediction-2\n";
    print EXPECTED "scaffold5.size1039674 GeneWise match 2 7 -16.98 + . ID=scaffold5.size1039674-TF101526-genewise-prediction-1\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 2 7 0.00 + 0 ID=scaffold5.size1039674-TF101526-genewise-prediction-1\n";
    print EXPECTED "scaffold5.size1039674 GeneWise match 9 231 -14.55 + . ID=scaffold5.size1039674-TF101526-genewise-prediction-2\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 9 43 0.00 + 0 ID=scaffold5.size1039674-TF101526-genewise-prediction-2\n";
    print EXPECTED "scaffold5.size1039674 GeneWise intron 44 58 0.00 + . ID=scaffold5.size1039674-TF101526-genewise-prediction-2\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 59 122 0.00 + 1 ID=scaffold5.size1039674-TF101526-genewise-prediction-2\n";
    print EXPECTED "scaffold5.size1039674 GeneWise intron 123 189 0.00 + . ID=scaffold5.size1039674-TF101526-genewise-prediction-2\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 190 231 0.00 + 0 ID=scaffold5.size1039674-TF101526-genewise-prediction-2\n";
    print EXPECTED "scaffold5.size1039674 GeneWise match 44996 44998 2.11 - . ID=scaffold5.size1039674-TF101526-genewise-prediction-3\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 44996 44998 0.00 - 0 ID=scaffold5.size1039674-TF101526-genewise-prediction-3\n";
    print EXPECTED "scaffold5.size1039674 GeneWise match 9587 11764 94.51 - . ID=scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 11737 11764 0.00 - 0 ID=scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise intron 11099 11736 0.00 - . ID=scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 10994 11098 0.00 - 2 ID=scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise intron 10932 10993 0.00 - . ID=scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 10857 10931 0.00 - 2 ID=scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise intron 10620 10856 0.00 - . ID=scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 10543 10619 0.00 - 2 ID=scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise intron 10419 10542 0.00 - . ID=scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 10284 10418 0.00 - 0 ID=scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise intron 9915 10283 0.00 - . ID=scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 9778 9914 0.00 - 0 ID=scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise intron 9761 9777 0.00 - . ID=scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 9734 9760 0.00 - 1 ID=scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise intron 9597 9733 0.00 - . ID=scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "scaffold5.size1039674 GeneWise cds 9587 9596 0.00 - 1 ID=scaffold5.size1039674-TF101526-genewise-prediction-4\n";
    print EXPECTED "//\n";
    print EXPECTED "TF105015\n";
    print EXPECTED "scaffold2.size1663701.3.1253264-1653727 GeneWise match 50000 49989 -11.34 - . scaffold2.size1663701.3.1253264-1653727-TF105015-genewise-prediction-1\n";
    print EXPECTED "scaffold2.size1663701.3.1253264-1653727 GeneWise cds 50000 49989 0.00 - 0 scaffold2.size1663701.3.1253264-1653727-TF105015-genewise-prediction-1\n";
    print EXPECTED "scaffold2.size1663701.3.1253264-1653727 GeneWise match 49986 49945 6.12 - . scaffold2.size1663701.3.1253264-1653727-TF105015-genewise-prediction-2\n";
    print EXPECTED "scaffold2.size1663701.3.1253264-1653727 GeneWise cds 49986 49945 0.00 - 0 scaffold2.size1663701.3.1253264-1653727-TF105015-genewise-prediction-2\n";
    print EXPECTED "scaffold2.size1663701.3.1253264-1653727 GeneWise match 49989 50000 -11.34 - . ID=scaffold2.size1663701.3.1253264-1653727-TF105015-genewise-prediction-1\n";
    print EXPECTED "scaffold2.size1663701.3.1253264-1653727 GeneWise cds 49989 50000 0.00 - 0 ID=scaffold2.size1663701.3.1253264-1653727-TF105015-genewise-prediction-1\n";
    print EXPECTED "scaffold2.size1663701.3.1253264-1653727 GeneWise match 49945 49986 6.12 - . ID=scaffold2.size1663701.3.1253264-1653727-TF105015-genewise-prediction-2\n";
    print EXPECTED "scaffold2.size1663701.3.1253264-1653727 GeneWise cds 49945 49986 0.00 - 0 ID=scaffold2.size1663701.3.1253264-1653727-TF105015-genewise-prediction-2\n";
    print EXPECTED "//\n";
    close(EXPECTED);
    $differences = "";
    @@ -7265,15 +7424,14 @@ sub test_make_hmmfile
    my $errormsg; # RETURNED AS 'none' BY A FUNCTION IF THERE IS NO ERROR
    my $hmmfile; # FILE CONTAINING A HMM
    my $input_hmms; # INPUT FILE OF HMMS
    my $random_number; # RANDOM NUMBER TO USE IN TEMPORARY FILE NAME
    my $expected_hmmfile; # FILE CONTAINING EXPECTED CONTENT OF $hmmfile
    my $differences; # DIFFERENCES BETWEEN $hmmfile AND $expected_hmmfile
    my $length_differences; # LENGTH OF $differences
    my $line; #

    # WRITE THE INPUT FILE OF HMMS:
    $random_number = rand();
    $input_hmms = $outputdir."/tmp".$random_number;
    ($input_hmms,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(INPUT_HMMS,">$input_hmms") || die "ERROR: test_make_hmmfile: cannot open $input_hmms\n";
    print INPUT_HMMS "HMMER2.0 [2.3.2]\n";
    print INPUT_HMMS "NAME TF350597.0\n";
    @@ -7294,8 +7452,8 @@ sub test_make_hmmfile
    ($hmmfile,$errorcode,$errormsg) = &make_hmmfile($outputdir,$input_hmms,6,10);
    if ($errorcode != 0) { print STDERR "ERROR: test_make_hmmfile: failed test1\n"; exit;}
    # OPEN A FILE CONTAINING THE EXPECTED CONTENTS OF $hmmfile:
    $random_number = rand();
    $expected_hmmfile = $outputdir."/tmp".$random_number;
    ($expected_hmmfile,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(EXPECTED,">$expected_hmmfile") || die "ERROR: test_make_hmmfile: cannot open $expected_hmmfile\n";
    print EXPECTED "HMMER2.0 [2.3.2]\n";
    print EXPECTED "NAME TF101000.0\n";
    @@ -7318,8 +7476,8 @@ sub test_make_hmmfile
    system "rm -f $expected_hmmfile";

    # WRITE THE INPUT FILE OF HMMS:
    $random_number = rand();
    $input_hmms = $outputdir."/tmp".$random_number;
    ($input_hmms,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(INPUT_HMMS,">$input_hmms") || die "ERROR: test_make_hmmfile: cannot open $input_hmms\n";
    print INPUT_HMMS "HMMER2.0 [2.3.2]\n";
    print INPUT_HMMS "NAME TF350597.0\n";
    @@ -7340,8 +7498,8 @@ sub test_make_hmmfile
    ($hmmfile,$errorcode,$errormsg) = &make_hmmfile($outputdir,$input_hmms,11,15);
    if ($errorcode != 0) { print STDERR "ERROR: test_make_hmmfile: failed test2\n"; exit;}
    # OPEN A FILE CONTAINING THE EXPECTED CONTENTS OF $hmmfile:
    $random_number = rand();
    $expected_hmmfile = $outputdir."/tmp".$random_number;
    ($expected_hmmfile,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(EXPECTED,">$expected_hmmfile") || die "ERROR: test_make_hmmfile: cannot open $expected_hmmfile\n";
    print EXPECTED "HMMER2.0 [2.3.2]\n";
    print EXPECTED "NAME TF101001.0\n";
    @@ -7364,8 +7522,8 @@ sub test_make_hmmfile
    system "rm -f $expected_hmmfile";

    # WRITE THE INPUT FILE OF HMMS:
    $random_number = rand();
    $input_hmms = $outputdir."/tmp".$random_number;
    ($input_hmms,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(INPUT_HMMS,">$input_hmms") || die "ERROR: test_make_hmmfile: cannot open $input_hmms\n";
    print INPUT_HMMS "HMMER2.0 [2.3.2]\n"; # LINE 1
    print INPUT_HMMS "NAME TF350597.0\n";
    @@ -7386,8 +7544,8 @@ sub test_make_hmmfile
    ($hmmfile,$errorcode,$errormsg) = &make_hmmfile($outputdir,$input_hmms,1,5);
    if ($errorcode != 0) { print STDERR "ERROR: test_make_hmmfile: failed test3\n"; exit;}
    # OPEN A FILE CONTAINING THE EXPECTED CONTENTS OF $hmmfile:
    $random_number = rand();
    $expected_hmmfile = $outputdir."/tmp".$random_number;
    ($expected_hmmfile,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(EXPECTED,">$expected_hmmfile") || die "ERROR: test_make_hmmfile: cannot open $expected_hmmfile\n";
    print EXPECTED "HMMER2.0 [2.3.2]\n";
    print EXPECTED "NAME TF350597.0\n";
    @@ -7410,8 +7568,8 @@ sub test_make_hmmfile
    system "rm -f $expected_hmmfile";

    # WRITE THE INPUT FILE OF HMMS:
    $random_number = rand();
    $input_hmms = $outputdir."/tmp".$random_number;
    ($input_hmms,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(INPUT_HMMS,">$input_hmms") || die "ERROR: test_make_hmmfile: cannot open $input_hmms\n";
    print INPUT_HMMS "HMMER2.0 [2.3.2]\n";
    print INPUT_HMMS "NAME TF350597.0\n";
    @@ -7449,12 +7607,11 @@ sub make_hmmfile
    my $hmmfile; ## FILE WITH HMM FOR THE FAMILY
    my $errorcode = 0; ## RETURNED AS 0 IF THERE IS NO ERROR
    my $errormsg = "none";## RETURNED AS 'none' IF THERE IS NO ERROR
    my $random_number; ## RANDOM NUMBER TO USE IN TEMPORARY FILE NAMES
    my $hmm_length; ## LENGTH OF THE HMM FOR FAMILY IN $input_hmms

    # OPEN AN OUTPUT FILE FOR THE HMM FROM FAMILY $family:
    $random_number = rand();
    $hmmfile = $outputdir."/tmp".$random_number;
    ($hmmfile,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }

    # CHECK $hmm_start_pos AND $hmm_end_pos ARE NUMBERS:
    if ($hmm_start_pos eq 'none' || $hmm_end_pos eq 'none')
    @@ -7488,11 +7645,10 @@ sub test_update_genewise_output
    my $differences; ## DIFFERENCES BETWEEN $genewise_output2 AND $expected_genewise_output2
    my $length_differences; ## LENGTH OF $differences
    my $line; ##
    my $random_number; ## RANDOM NUMBER TO USE IN TEMPORARY FILE NAMES

    # OPEN A FILE WITH GENEWISE OUTPUT:
    $random_number = rand();
    $genewise_output = $outputdir."/tmp".$random_number;
    ($genewise_output,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(GENEWISE_OUTPUT,">$genewise_output") || die "ERROR: test_update_genewise_output: cannot open $genewise_output\n";
    print GENEWISE_OUTPUT "scaffold1 GeneWise match 2 4 -19.50 + . scaffold1-genewise-prediction-1\n";
    print GENEWISE_OUTPUT "scaffold1 GeneWise cds 2 4 0.00 + 0 scaffold1-genewise-prediction-1\n";
    @@ -7507,17 +7663,17 @@ sub test_update_genewise_output
    ($genewise_output2,$errorcode,$errormsg) = &update_genewise_output($genewise_output,$outputdir,"scaffold1",'TF101000');
    if ($errorcode != 0) { print STDERR "ERROR: test_update_genewise_output: failed test1\n"; exit;}
    # OPEN A FILE WITH THE EXPECTED OUTPUT:
    $random_number = rand();
    $expected_genewise_output2 = $outputdir."/tmp".$random_number;
    ($expected_genewise_output2,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(EXPECTED,">$expected_genewise_output2") || die "ERROR: test_update_genewise_output: cannot open $expected_genewise_output2";
    print EXPECTED "scaffold1 GeneWise match 2 4 -19.50 + . scaffold1-TF101000-genewise-prediction-1\n";
    print EXPECTED "scaffold1 GeneWise cds 2 4 0.00 + 0 scaffold1-TF101000-genewise-prediction-1\n";
    print EXPECTED "scaffold1 GeneWise match 7 24 0.91 + . scaffold1-TF101000-genewise-prediction-2\n";
    print EXPECTED "scaffold1 GeneWise cds 7 24 0.00 + 0 scaffold1-TF101000-genewise-prediction-2\n";
    print EXPECTED "scaffold1 GeneWise match 2 4 -19.50 + . scaffold1-TF101000-genewise-prediction-3\n";
    print EXPECTED "scaffold1 GeneWise cds 2 4 0.00 + 0 scaffold1-TF101000-genewise-prediction-3\n";
    print EXPECTED "scaffold1 GeneWise match 7 24 0.91 + . scaffold1-TF101000-genewise-prediction-4\n";
    print EXPECTED "scaffold1 GeneWise cds 7 24 0.00 + 0 scaffold1-TF101000-genewise-prediction-4\n";
    print EXPECTED "scaffold1 GeneWise match 2 4 -19.50 + . ID=scaffold1-TF101000-genewise-prediction-1\n";
    print EXPECTED "scaffold1 GeneWise cds 2 4 0.00 + 0 ID=scaffold1-TF101000-genewise-prediction-1\n";
    print EXPECTED "scaffold1 GeneWise match 7 24 0.91 + . ID=scaffold1-TF101000-genewise-prediction-2\n";
    print EXPECTED "scaffold1 GeneWise cds 7 24 0.00 + 0 ID=scaffold1-TF101000-genewise-prediction-2\n";
    print EXPECTED "scaffold1 GeneWise match 2 4 -19.50 + . ID=scaffold1-TF101000-genewise-prediction-3\n";
    print EXPECTED "scaffold1 GeneWise cds 2 4 0.00 + 0 ID=scaffold1-TF101000-genewise-prediction-3\n";
    print EXPECTED "scaffold1 GeneWise match 7 24 0.91 + . ID=scaffold1-TF101000-genewise-prediction-4\n";
    print EXPECTED "scaffold1 GeneWise cds 7 24 0.00 + 0 ID=scaffold1-TF101000-genewise-prediction-4\n";
    close(EXPECTED);
    $differences = "";
    open(TEMP,"diff $genewise_output2 $expected_genewise_output2 |");
    @@ -7534,8 +7690,8 @@ sub test_update_genewise_output
    system "rm -f $expected_genewise_output2";

    # OPEN A FILE WITH GENEWISE OUTPUT:
    $random_number = rand();
    $genewise_output = $outputdir."/tmp".$random_number;
    ($genewise_output,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(GENEWISE_OUTPUT,">$genewise_output") || die "ERROR: test_update_genewise_output: cannot open $genewise_output\n";
    print GENEWISE_OUTPUT "scaffold1 GeneWise match 2 4 -19.50 + . scaffold1-genewise-prediction-1\n";
    print GENEWISE_OUTPUT "scaffold1 GeneWise cds 2 4 0.00 + 0 scaffold1-genewise-prediction-1\n";
    @@ -7553,8 +7709,8 @@ sub test_update_genewise_output
    system "rm -f $genewise_output2";

    # OPEN A FILE WITH GENEWISE OUTPUT:
    $random_number = rand();
    $genewise_output = $outputdir."/tmp".$random_number;
    ($genewise_output,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(GENEWISE_OUTPUT,">$genewise_output") || die "ERROR: test_update_genewise_output: cannot open $genewise_output\n";
    print GENEWISE_OUTPUT "scaffold1 GeneWise match 2 4 -19.50 + . scaffold1-genewise-prediction-1\n";
    print GENEWISE_OUTPUT "scaffold1 GeneWise cds 2 4 0.00 + 0 scaffold1-genewise-prediction-1\n";
    @@ -7571,7 +7727,50 @@ sub test_update_genewise_output
    if ($errorcode != 30) { print STDERR "ERROR: test_update_genewise_output: failed test3\n"; exit;}
    system "rm -f $genewise_output";
    system "rm -f $genewise_output2";



    # OPEN A FILE WITH GENEWISE OUTPUT:
    ($genewise_output,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(GENEWISE_OUTPUT,">$genewise_output") || die "ERROR: test_update_genewise_output: cannot open $genewise_output\n";
    print GENEWISE_OUTPUT "scaffold1 GeneWise match 4 2 -19.50 - . scaffold1-genewise-prediction-1\n";
    print GENEWISE_OUTPUT "scaffold1 GeneWise cds 4 2 0.00 - 0 scaffold1-genewise-prediction-1\n";
    print GENEWISE_OUTPUT "scaffold1 GeneWise match 24 7 0.91 - . scaffold1-genewise-prediction-2\n";
    print GENEWISE_OUTPUT "scaffold1 GeneWise cds 24 7 0.00 - 0 scaffold1-genewise-prediction-2\n";
    print GENEWISE_OUTPUT "//\n";
    print GENEWISE_OUTPUT "scaffold1 GeneWise match 2 4 -19.50 + . scaffold1-genewise-prediction-1\n";
    print GENEWISE_OUTPUT "scaffold1 GeneWise cds 2 4 0.00 + 0 scaffold1-genewise-prediction-1\n";
    print GENEWISE_OUTPUT "scaffold1 GeneWise match 7 24 0.91 + . scaffold1-genewise-prediction-2\n";
    print GENEWISE_OUTPUT "scaffold1 GeneWise cds 7 24 0.00 + 0 scaffold1-genewise-prediction-2\n";
    close(GENEWISE_OUTPUT);
    ($genewise_output2,$errorcode,$errormsg) = &update_genewise_output($genewise_output,$outputdir,"scaffold1",'TF101000');
    if ($errorcode != 0) { print STDERR "ERROR: test_update_genewise_output: failed test4\n"; exit;}
    # OPEN A FILE WITH THE EXPECTED OUTPUT:
    ($expected_genewise_output2,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(EXPECTED,">$expected_genewise_output2") || die "ERROR: test_update_genewise_output: cannot open $expected_genewise_output2";
    print EXPECTED "scaffold1 GeneWise match 2 4 -19.50 - . ID=scaffold1-TF101000-genewise-prediction-1\n";
    print EXPECTED "scaffold1 GeneWise cds 2 4 0.00 - 0 ID=scaffold1-TF101000-genewise-prediction-1\n";
    print EXPECTED "scaffold1 GeneWise match 7 24 0.91 - . ID=scaffold1-TF101000-genewise-prediction-2\n";
    print EXPECTED "scaffold1 GeneWise cds 7 24 0.00 - 0 ID=scaffold1-TF101000-genewise-prediction-2\n";
    print EXPECTED "scaffold1 GeneWise match 2 4 -19.50 + . ID=scaffold1-TF101000-genewise-prediction-3\n";
    print EXPECTED "scaffold1 GeneWise cds 2 4 0.00 + 0 ID=scaffold1-TF101000-genewise-prediction-3\n";
    print EXPECTED "scaffold1 GeneWise match 7 24 0.91 + . ID=scaffold1-TF101000-genewise-prediction-4\n";
    print EXPECTED "scaffold1 GeneWise cds 7 24 0.00 + 0 ID=scaffold1-TF101000-genewise-prediction-4\n";
    close(EXPECTED);
    $differences = "";
    open(TEMP,"diff $genewise_output2 $expected_genewise_output2 |");
    while(<TEMP>)
    {
    $line = $_;
    $differences = $differences.$line;
    }
    close(TEMP);
    $length_differences = length($differences);
    if ($length_differences != 0) { print STDERR "ERROR: test_update_genewise_output: failed test4 (files $genewise_output2 $expected_genewise_output2)\n"; exit;}
    system "rm -f $genewise_output";
    system "rm -f $genewise_output2";
    system "rm -f $expected_genewise_output2";
    }

    #------------------------------------------------------------------#
    @@ -7587,21 +7786,22 @@ sub update_genewise_output
    my $errorcode = 0; ## RETURNED AS 0 IF THERE IS NO ERROR
    my $errormsg = "none";## RETURNED AS 'none' IF THERE IS NO ERROR
    my $genewise_output2 = "none";## FILE WITH GENEWISE OUTPUT IN SCAFFOLD COORDAINTES
    my $random_number; ## RANDOM NUMBER TO USE IN TEMPORARY FILE NAMES
    my $line; ##
    my @temp; ##
    my $start; ## START OF FEATURE IN GENEWISE OUTPUT FILE
    my $end; ## END OF FEATURE IN GENEWISE OUTPUT FILE
    my $strand; ## STRAND OF FEATURE IN GENEWISE OUTPUT FILE
    my $name; ## NAME OF FEATURE IN GENEWISE OUTPUT FILE
    my $seqname; ## NAME OF SEQUENCE USED FOR RUNNING GENEWISE
    my $new_name; ## NEW NAME OF THE FEATURE, USING THE SCAFFOLD NAME
    my $num_predictions = 0; ## NUMBER OF GENE PREDICTIONS SEEN IN A SCAFFOLD
    my %SEEN = (); ## HASH TABLE TO RECORD WHETHER WE HAVE SEEN A PARTICULAR GENE PREDICTION BEFORE
    my $prev_line; ## PREVIOUS LINE OF $genewise_output
    my $tmp; ##

    # MAKE A TEMPORARY FILE TO PUT THE GENEWISE OUTPUT WITH SCAFFOLD COORDINATES INTO:
    $random_number = rand();
    $genewise_output2 = $outputdir."/tmp".$random_number;
    ($genewise_output2,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(GENEWISE_OUTPUT2,">$genewise_output2") || die "ERROR: update_genewise_output: cannot open $genewise_output2\n";

    # READ IN THE INPUT FILE OF GENEWISE OUTPUT:
    @@ -7617,6 +7817,7 @@ sub update_genewise_output
    $seqname = $temp[0];
    $start = $temp[3];
    $end = $temp[4];
    $strand = $temp[6];
    $name = $temp[8];
    if ($seqname ne $scaffold)
    {
    @@ -7630,9 +7831,17 @@ sub update_genewise_output
    $SEEN{$name} = 1;
    $num_predictions++;
    }
    $new_name = $scaffold."-".$family."-genewise-prediction-".$num_predictions;
    # FOR MAKER, YOU NEED START < END, EVEN IF IT IS A NEGATIVE STRAND GENE:
    if ($end < $start)
    {
    $tmp = $start;
    $start = $end;
    $end = $tmp;
    }
    # FOR MAKER, YOU NEED TO START THE FINAL COLUMN WITH ID=scaffold...
    $new_name = "ID=".$scaffold."-".$family."-genewise-prediction-".$num_predictions;
    # PRINT OUT TO $genewise_output2:
    print GENEWISE_OUTPUT2 "$scaffold\t$temp[1]\t$temp[2]\t$temp[3]\t$temp[4]\t$temp[5]\t$temp[6]\t$temp[7]\t$new_name\n";
    print GENEWISE_OUTPUT2 "$scaffold\t$temp[1]\t$temp[2]\t$start\t$end\t$temp[5]\t$temp[6]\t$temp[7]\t$new_name\n";
    }
    elsif ($#temp > 0 && $line ne '//')
    {
    @@ -7660,15 +7869,14 @@ sub test_check_if_have_hmm_for_family
    my $take; # SAYS WHETHER TO TAKE A PARTICULAR FAMILY
    my $errorcode; # RETURNED AS 0 BY A FUNCTION IF THERE IS NO ERROR
    my $errormsg; # RETURNED AS 'none' BY A FUNCTION IF THERE IS NO ERROR
    my $random_number; # RANDOM NUMBER TO USE IN TEMPORARY FILE NAME
    my $hmm_start_pos; # START POSITION OF A HMM IN $input_hmms
    my $hmm_end_pos; # END POSITION OF A HMM IN $input_hmms
    my $hmm_pos_file; # FILE WITH POSITIONS OF HMMs IN $input_hmms
    my $TAKE_FAMILY; # HASH TABLE THAT SAYS WHETHER TO TAKE A FAMILY

    # WRITE THE INPUT FILE OF HMMS:
    $random_number = rand();
    $input_hmms = $outputdir."/tmp".$random_number;
    ($input_hmms,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(INPUT_HMMS,">$input_hmms") || die "ERROR: test_check_if_have_hmm_for_family: cannot open $input_hmms\n";
    print INPUT_HMMS "HMMER2.0 [2.3.2]\n";
    print INPUT_HMMS "NAME TF350597.0\n";
    @@ -7696,8 +7904,8 @@ sub test_check_if_have_hmm_for_family
    system "rm -f $hmm_pos_file";

    # WRITE THE INPUT FILE OF HMMS:
    $random_number = rand();
    $input_hmms = $outputdir."/tmp".$random_number;
    ($input_hmms,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(INPUT_HMMS,">$input_hmms") || die "ERROR: test_check_if_have_hmm_for_family: cannot open $input_hmms\n";
    print INPUT_HMMS "HMMER2.0 [2.3.2]\n";
    print INPUT_HMMS "NAME TF350597.0\n";
    @@ -7725,8 +7933,8 @@ sub test_check_if_have_hmm_for_family
    system "rm -f $hmm_pos_file";

    # WRITE THE INPUT FILE OF HMMS:
    $random_number = rand();
    $input_hmms = $outputdir."/tmp".$random_number;
    ($input_hmms,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(INPUT_HMMS,">$input_hmms") || die "ERROR: test_check_if_have_hmm_for_family: cannot open $input_hmms\n";
    print INPUT_HMMS "HMMER2.0 [2.3.2]\n";
    print INPUT_HMMS "NAME TF350597.0\n";
    @@ -7754,8 +7962,8 @@ sub test_check_if_have_hmm_for_family
    system "rm -f $hmm_pos_file";

    # WRITE THE INPUT FILE OF HMMS:
    $random_number = rand();
    $input_hmms = $outputdir."/tmp".$random_number;
    ($input_hmms,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(INPUT_HMMS,">$input_hmms") || die "ERROR: test_check_if_have_hmm_for_family: cannot open $input_hmms\n";
    print INPUT_HMMS "HMMER2.0 [2.3.2]\n";
    print INPUT_HMMS "NAME TF350597.0\n";
    @@ -7791,7 +7999,6 @@ sub test_check_if_have_hmm_for_family
    sub test_make_seqfile
    {
    my $outputdir = $_[0]; # DIRECTORY WITH OUTPUT FILES
    my $random_number; # RANDOM NUMBER TO USE IN TEMPORARY FILE NAMES
    my $input_fasta; # INPUT FASTA FILE OF SCAFFOLD SEQUENCES
    my $seqfile; # FILE WITH SEQUENE OF A PARTICULAR SCAFFOLD
    my $scaffold_length; # LENGTH OF A PARTICULAR SCAFFOLD
    @@ -7804,8 +8011,8 @@ sub test_make_seqfile
    my $line; #

    # OPEN AN INPUT FASTA FILE OF SCAFFOLD SEQUENCES:
    $random_number = rand();
    $input_fasta = $outputdir."/tmp".$random_number;
    ($input_fasta,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(INPUT_FASTA,">$input_fasta") || die "ERROR: test_make_seqfile: cannot open $input_fasta\n";
    print INPUT_FASTA ">scaffold1\n";
    print INPUT_FASTA "ABCDEFGHIJ\n";
    @@ -7820,8 +8027,8 @@ sub test_make_seqfile
    ($seqfile,$scaffold_length,$errorcode,$errormsg) = &make_seqfile($input_fasta,$outputdir,"scaffold1",$scaffold_pos_file);
    if ($errorcode != 0 || $scaffold_length != 20) { print STDERR "ERROR: test_make_seqfile: failed test1 (errorcode $errorcode scaffold_length $scaffold_length\n"; exit;}
    # OPEN A FILE CONTAINING THE EXPECTED CONTENTS OF $seqfile:
    $random_number = rand();
    $expected_seqfile = $outputdir."/tmp".$random_number;
    ($expected_seqfile,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(EXPECTED,">$expected_seqfile") || die "ERROR: test_make_seqfile: cannot open $expected_seqfile\n";
    print EXPECTED ">scaffold1\n";
    print EXPECTED "ABCDEFGHIJ\n";
    @@ -7843,8 +8050,8 @@ sub test_make_seqfile
    system "rm -f $scaffold_pos_file";

    # OPEN AN INPUT FASTA FILE OF SCAFFOLD SEQUENCES:
    $random_number = rand();
    $input_fasta = $outputdir."/tmp".$random_number;
    ($input_fasta,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(INPUT_FASTA,">$input_fasta") || die "ERROR: test_make_seqfile: cannot open $input_fasta\n";
    print INPUT_FASTA ">scaffold1\n";
    print INPUT_FASTA "ABCDEFGHIJ\n";
    @@ -7859,8 +8066,8 @@ sub test_make_seqfile
    ($seqfile,$scaffold_length,$errorcode,$errormsg) = &make_seqfile($input_fasta,$outputdir,"scaffold2",$scaffold_pos_file);
    if ($errorcode != 0 || $scaffold_length != 5) { print STDERR "ERROR: test_make_seqfile: failed test2\n"; exit;}
    # OPEN A FILE CONTAINING THE EXPECTED CONTENTS OF $seqfile:
    $random_number = rand();
    $expected_seqfile = $outputdir."/tmp".$random_number;
    ($expected_seqfile,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(EXPECTED,">$expected_seqfile") || die "ERROR: test_make_seqfile: cannot open $expected_seqfile\n";
    print EXPECTED ">scaffold2\n";
    print EXPECTED "UVXYZ\n";
    @@ -7881,8 +8088,8 @@ sub test_make_seqfile
    system "rm -f $scaffold_pos_file";

    # OPEN AN INPUT FASTA FILE OF SCAFFOLD SEQUENCES:
    $random_number = rand();
    $input_fasta = $outputdir."/tmp".$random_number;
    ($input_fasta,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(INPUT_FASTA,">$input_fasta") || die "ERROR: test_make_seqfile: cannot open $input_fasta\n";
    print INPUT_FASTA ">scaffold1\n";
    print INPUT_FASTA "ABCDEFGHIJ\n";
    @@ -7891,8 +8098,8 @@ sub test_make_seqfile
    print INPUT_FASTA "UVXYZ\n";
    close(INPUT_FASTA);
    # RECORD THE POSITION OF THE SCAFFOLDS IN $input_fasta:
    $random_number = rand();
    $scaffold_pos_file = $outputdir."/tmp".$random_number;
    ($scaffold_pos_file,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(SCAFFOLD_POS,">$scaffold_pos_file") || die "ERROR: test_make_seqfile: cannot open $scaffold_pos_file";
    print SCAFFOLD_POS "scaffold1 3\n";
    print SCAFFOLD_POS "scaffold2 5\n";
    @@ -7906,8 +8113,8 @@ sub test_make_seqfile
    system "rm -f $scaffold_pos_file";

    # OPEN AN INPUT FASTA FILE OF SCAFFOLD SEQUENCES:
    $random_number = rand();
    $input_fasta = $outputdir."/tmp".$random_number;
    ($input_fasta,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(INPUT_FASTA,">$input_fasta") || die "ERROR: test_make_seqfile: cannot open $input_fasta\n";
    print INPUT_FASTA ">scaffold1\n";
    print INPUT_FASTA "ABCDEFGHIJ\n";
    @@ -7916,8 +8123,8 @@ sub test_make_seqfile
    print INPUT_FASTA "UVXYZ\n";
    close(INPUT_FASTA);
    # RECORD THE POSITION OF THE SCAFFOLDS IN $input_fasta:
    $random_number = rand();
    $scaffold_pos_file = $outputdir."/tmp".$random_number;
    ($scaffold_pos_file,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(SCAFFOLD_POS,">$scaffold_pos_file") || die "ERROR: test_make_seqfile: cannot open $scaffold_pos_file";
    print SCAFFOLD_POS "scaffold2 5\n";
    print SCAFFOLD_POS "scaffold3 3\n";
    @@ -7954,7 +8161,6 @@ sub make_seqfile
    my $scaffold_end_pos = "none";# END POSITION OF $scaffold IN $input_fasta
    my $line; #
    my $line_num = "none";# LINE NUMBER THAT $scaffold APPEARS ON IN $scaffold_pos_file
    my $random_number; # RANDOM NUMBER TO USE IN TEMPORARY FILE NAMES
    my $i; #
    my $scaffold_length_lines; # NUMBER OF LINES TAKEN UP BY $scaffold IN $input_fasta

    @@ -8040,8 +8246,8 @@ sub make_seqfile
    }

    # OPEN AN OUTPUT FILE FOR THE SEQUENCE OF SCAFFOLD $scaffold:
    $random_number = rand();
    $seqfile = $outputdir."/tmp".$random_number;
    ($seqfile,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }

    # CHECK $scaffold_start_pos AND $scaffold_end_pos ARE NUMBERS:
    if ($scaffold_start_pos eq 'none' || $scaffold_end_pos eq 'none')
    @@ -8189,14 +8395,13 @@ sub check_if_have_hmm_for_family
    sub test_check_formatdb_files
    {
    my $outputdir = $_[0]; # DIRECTORY TO WRITE OUTPUT FILES IN
    my $random_number; # RANDOM NUMBER TO USE IN TEMPORARY FILE NAMES
    my $input_fasta; # INPUT FASTA FILE
    my $cmd; # COMMAND TO RUN
    my $formatdbfile; # OUTPUT FILE MADE BY FORMATDB

    # WRITE AN INPUT FASTA FILE:
    $random_number = rand();
    $input_fasta = $outputdir."/tmp".$random_number;
    ($input_fasta,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(INPUT_FASTA,">$input_fasta") || die "ERROR: test_check_formatdb_file: cannot open $input_fasta\n";
    print INPUT_FASTA ">V:15395346..15397430\n";
    print INPUT_FASTA "gaatgttattttcttatcaataagcacattttctatcaaacccagttcactttcaaattt\n";
    @@ -8354,7 +8559,6 @@ sub run_genewise_on_scaffolds
    my @temp; #
    my $family = "none";# TREEFAM FAMILY NAME
    my $seqfile; # FILE CONTAINING THE SEQUENCES FOR A FAMILY
    my $random_number; # RANDOM NUMBER TO USE IN TEMPORARY FILE NAME
    my $BLAST; # HASH TABLE WITH POSITIONS OF BLAST MATCHES IN SCAFFOLDS
    my $hit; # REGION OF A BLAST HIT BETWEEN THE FAMILY AND A SCAFFOLD
    my $scaffold; # A SCAFFOLD SEQUENCE
    @@ -8379,15 +8583,15 @@ sub run_genewise_on_scaffolds

    # OPEN THE OUTPUT FILE FOR THE GENEWISE OUTPUT:
    $output = $outputdir."/".$output;
    open(OUTPUT,">$output") || die "ERROR: run_genewise_on_scaffolds: cannot open $output\n";
    open(OUTPUT,">$output") || die "ERROR: run_genewise_on_scaffolds: cannot open output $output\n";
    close(OUTPUT);

    # CHECK IF THE FORMATDB FILES ARE COMPLETE:
    ($errorcode,$errormsg) = &check_formatdb_files($input_fasta);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }

    # READ IN THE SEQUENCES IN EACH FAMILY, FAMILY BY FAMILY:
    open(TREEFAM_SEQS,"$treefam_seqs") || die "ERROR: run_genewise_on_scaffolds: cannot open $treefam_seqs\n";
    open(TREEFAM_SEQS,"$treefam_seqs") || die "ERROR: run_genewise_on_scaffolds: cannot open treefam_seqs $treefam_seqs\n";
    while(<TREEFAM_SEQS>)
    {
    $line = $_;
    @@ -8403,9 +8607,9 @@ sub run_genewise_on_scaffolds
    if ($take == 1)
    {
    # OPEN A TEMPORARY FILE FOR THE SEQUENCES FOR THIS FAMILY:
    $random_number = rand();
    $seqfile = $outputdir."/tmp".$random_number;
    open(SEQFILE,">$seqfile") || die "ERROR: run_genewise_on_scaffolds: cannot open $seqfile\n";
    ($seqfile,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(SEQFILE,">$seqfile") || die "ERROR: run_genewise_on_scaffolds: cannot open seqfile $seqfile\n";
    # WRITE THE FAMILY NAME IN THE OUTPUT FILE:
    system "echo $family >> $output";
    }
    @@ -8521,12 +8725,11 @@ sub test_check_for_blast_match
    my $seqfile; # FILE WITH THE PROTEIN SEQUENCES
    my $scaffolds; # FILE WITH THE DNA SEQUENCES OF THE SCAFFOLDS
    my $BLAST; # HASH TABLE WITH POSITIONS OF HITS IN SCAFFOLDS
    my $random_number; # RANDOM NUMBER TO USE IN TEMPORARY FILE NAME
    my $formatdbfile; # FORMAT FILE PRODUCED BY formatdb

    # MAKE A FILE WITH THE PROTEIN SEQUENCES:
    $random_number = rand();
    $seqfile = $outputdir."/tmp".$random_number;
    ($seqfile,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(SEQFILE,">$seqfile") || die "ERROR: test_check_for_blast_match: cannot open $seqfile\n";
    print SEQFILE ">FOX-1\n";
    print SEQFILE "MQALYQLSATGAQQQNQQIPIGLSNSLLYQQLAAHQQIAAQQHQQQLAVSAAHQTQNNIMLATSAPSLINHMENSTDGKV\n";
    @@ -8537,8 +8740,8 @@ sub test_check_for_blast_match
    print SEQFILE "QMNPALRTLNRFTPY\n";
    close(SEQFILE);
    # MAKE A FILE WITH THE DNA SEQUENCES:
    $random_number = rand();
    $scaffolds = $outputdir."/tmp".$random_number;
    ($scaffolds,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(SCAFFOLDS,">$scaffolds") || die "ERROR: test_check_for_blast_match: cannot open $scaffolds\n";
    print SCAFFOLDS ">X:2445317..2450316\n";
    print SCAFFOLDS "ttttcctgtaaagcaaagcattcttgctcattcaccacaatgccacattattgtgcttct\n";
    @@ -8682,7 +8885,6 @@ sub check_for_blast_match
    my $flank_length = $_[4]; ## LENGTH OF SEQUENCE TO TAKE ON EITHER SIDE OF A BLAST MATCH
    my $blast_path = $_[5]; ## PATH TO THE FORMATDB AND BLASTALL EXECUTABLES
    my $testing = $_[6]; ## SAYS WHETHER THIS IS BEING CALLED FROM A TESTING SUBROUTINE
    my $random_number; ## RANDOM NUMBER FOR TEMPORARY FILE NAME
    my $output; ## THE BLAST OUTPUT FILE
    my @temp; ##
    my $line; ##
    @@ -8703,8 +8905,8 @@ sub check_for_blast_match

    # RUN BLAST BETWEEN THE PROTEIN SEQUENCES AND THE SCAFFOLDS:
    # -m 8 GIVES TABULAR BLAST FORMAT
    $random_number = rand();
    $output = $outputdir."/tmp".$random_number;
    ($output,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    $cmd = "$blast_path/blastall -p tblastn -i $seqfile -d $scaffolds -o $output -m 8 -e $evalue_cutoff\n"; ##
    system "$cmd";

    @@ -8916,7 +9118,6 @@ sub run_genewise_on_hmm
    my $scaffold_length = $_[9]; ## LENGTH OF THE SCAFFOLD
    my $errorcode = 0; ## RETURNED AS 0 IF THERE IS NO ERROR.
    my $errormsg = "none";## RETURNED AS 'none' IF THERE IS NO ERROR.
    my $random_number; ## RANDOM NUMBER FOR TEMPORARY FILE NAME
    my $genewise = "none";## TEMPORARY FILE FOR THE GENEWISE OUTPUT FOR THIS HMM
    my $cmd; ## THE GENEWISE COMMAND TO RUN
    my $genewise_exe = "/nfs/users/nfs_a/alc/Documents/GeneWise/wise2.4.1/src/bin/genewise"; # wise 2.4.1, can use -genestats
    @@ -8938,8 +9139,8 @@ sub run_genewise_on_hmm
    }

    # MAKE A GENEWISE OUTPUT FILE:
    $random_number = rand();
    $genewise = $outputdir."/tmp".$random_number;
    ($genewise,$errorcode,$errormsg) = &make_filename($outputdir);
    if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); }
    open(GENEWISE,">$genewise") || die "ERROR: run_genewise_on_hmm: cannot open $genewise\n";
    close(GENEWISE);
    if ($testing == 0) { print STDERR "___ scaffold $scaffoldname: running genewise for HMM $hmmname ...\n";}
  2. avrilcoghlan revised this gist Nov 20, 2012. No changes.
  3. avrilcoghlan revised this gist Nov 20, 2012. 1 changed file with 6696 additions and 803 deletions.
    7,499 changes: 6,696 additions & 803 deletions run_genewisedb_afterblast.pl
    6,696 additions, 803 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
  4. avrilcoghlan created this gist Nov 15, 2012.
    3,184 changes: 3,184 additions & 0 deletions run_genewisedb_afterblast.pl
    3,184 additions, 0 deletions not shown because the diff is too large. Please use a local Git client to view these changes.