#! /usr/bin/perl -w
#
#
# DAB-GUI.pl
#
# A graphical user interface for the Divide and BLAST program
#
# (C)2002 Soren M. Johnson
#
# Based on the original Divide and BLAST program by Rahul Karnik
#
# Divides up a protein sequence file into subsequences with overlap,
# for BLASTing individually and filters results for unique hits
#------------------------------------------------------------------------
# Rahul Karnik
# 12/16/1999 -- Original version
# 12/30/1999 -- Enhanced output file - input parameters
# - sub-sequence positions
# -- BLAST directory option
# Soren Johnson
# 11/17/2002 -- First GUI version using Perl/Tk
#------------------------------------------------------------------------
use Tk;
use Tk::DialogBox;
use Tk::DirTree;
use Tk::LabEntry;
use strict; # Always!
# Variables
# ------------------------------------------------------
my $input_file; # File to process
my $input_file_path; # File to process with pathname
my $part_length; # Length of each sub-sequence
my $part_overlap; # Overlap between subsequences in amino acids
my $expect_1; # Expect value for full sequence BLAST
my $expect_2; # Expect value for subsequence BLAST
my $output_dir; # Output directory
my $matrix; # Matrix to use for BLAST
my $html_out; # Flag for HTML output
my $output_file; # Final output file
my $seq_name="";
my $full_seq="";
my @full_seq_hits;
my $num_parts;
# Create the main GUI window
# ------------------------------------------------------
my $top = MainWindow->new();
$top->title ("Divide and BLAST");
# Create frames to divide the windowspace
# -------------------------------------------------------
my $frame1 = $top->Frame()->pack(-pady => 10,
-padx => 20,
-ipadx => 0,
-ipady => 0);
my $frame2 = $top->Frame()->pack(-pady => 10);
my $frame3 = $top->Frame()->pack(-pady => 10);
my $frame4 = $top->Frame()->pack(-pady => 10);
# Frame 1: Input File and Output Directory
# -------------------------------------------------------
$frame1->Label(text => "Input File: ")->grid(-row => 0,
-col => 0,
-sticky => 'e');
$input_file = "None Selected";
my $infile_label = $frame1->Label(text => "$input_file",
relief => 'sunken',
width => 30
)->grid(-row => 0,
-col => 1,
-padx => 5,
-pady => 10);
my $open = $frame1->Button(text => 'Open File',
command => \&openfile
)->grid(-row => 0,
-col => 2);
$frame1->Label(text => "Output Directory: ")->grid(-row => 1,
-col => 0);
$output_dir = "Current Path";
my $output_Label = $frame1->Label(text => "$output_dir",
relief => 'sunken',
width => 30
)->grid(-row => 1,
-col => 1);
my $output_Button = $frame1->Button(text => 'Change...',
command => \&display_out_dir
)->grid(-row => 1,
-col => 2);
# Frame 2: Expect Value Scales
# -------------------------------------------------------
# Large Sequence Expect Scale
my $label_e1 = $frame2->Label(text => "Full Sequence Expect Value: "
)->grid(-row => 0,
-col => 0,
-sticky => 'e');
my $expect_1 = 10;
$frame2->Scale(orient => 'horizontal',
from => 0,
to => 20,
tickinterval => 5,
font => '-adobe-times-*-r-normal--12-120-75-75-p-56-iso8859-1',
length => 300, # in pixels
variable => \$expect_1,
)->grid(-row => 0,
-col => 1);
# Sub-Sequence Expect Scale
my $label_e2 = $frame2->Label(text => "Sub-Sequence Expect Value: "
)->grid(-row => 1,
-col => 0,
-sticky => 'e');
my $expect_2 = 10;
$frame2->Scale(orient => 'horizontal',
from => 0,
to => 20,
tickinterval => 5,
font => '-adobe-times-*-r-normal--12-120-75-75-p-56-iso8859-1',
length => 300, # in pixels
variable => \$expect_2,
)->grid(-row => 1,
-col => 1);
# Frame 3: Sub-Sequence Settings
# -------------------------------------------------------
$frame3->Label(text => "Sub-Sequence Length: "
)->grid(-row => 4,
-col => 0,
-sticky => 'e');
$part_length = 20;
$frame3->Entry(width => 3,
textvariable => \$part_length
)->grid(-row => 4,
-col => 1,
-sticky => 'w');
$frame3->Label(text => "amino acids"
)->grid(-row => 4,
-col => 2,
-sticky => 'w');
$frame3->Label(text => "Sub-Sequence Overlap: "
)->grid(-row => 5,
-col => 0,
-sticky => 'e');
$part_overlap = 10;
$frame3->Entry(width => 3,
textvariable => \$part_overlap
)->grid(-row => 5,
-col => 1,
-sticky => 'w');
$frame3->Label(text => "amino acids"
)->grid(-row => 5,
-col => 2,
-sticky => 'w');
$frame3->Checkbutton( variable => \$html_out,
text => 'HTML Output'
)->grid(-row => 6,
-columnspan => 3);
# Frame 4: Execute Button
# -------------------------------------------------------
my $DAB_Button = $frame4->Button(text => 'Execute',
command => \&execute
)->grid(-row => 6,
-col => 2);
# Directory Sidebox
# -------------------------------------------------------
my $directory;
my $top2 = $top->Toplevel;
$top2->DirTree()->pack();
$top2->Label(-text => $directory)->pack();
$top2->Button(-text => 'OK',
-command => [$top2 => 'destroy'])->pack();
# Initiate the infinite event loop
# -------------------------------------------------------
MainLoop();
# Subroutine definitions
# -------------------------------------------------------
sub display_out_dir {
# $top->TopLevel(-title => "Select Output Directory",
# -buttons => ["OK", "Cancel"]);
# $dirBox->DirTree()->pack;
# $output_Label->configure(text => "$output_dir");
return;
}
sub openfile {
$input_file_path = $top->getOpenFile();
$infile_label->configure(text => "$input_file");
my @input_path = split (/\//, $input_file_path);
$input_file = pop @input_path;
return;
}
sub print_options {
print "\n";
print "\nInput File: $input_file";
print "\nOutput Directory: $output_dir";
print "\nOutput File: $output_file";
print "\nExpect Value 1: $expect_1";
print "\nExpect Value 2: $expect_2";
print "\nSub-Sequence Length: $part_length";
print "\nSub-Sequence Overlap: $part_overlap";
print "\nHTML: $html_out";
print "\n\n";
}
sub execute {
&print_options;
&process_options;
&print_options;
&get_file;
&make_files_and_blast;
&compile_full_results;
&filter_results;
}
sub process_options {
$html_out = ($html_out? 1:0);
$output_dir = "output";
#Check if length of subsequence is less than overlap
if($part_length < $part_overlap) {
die "Sub-sequence length cannot be less than sub-sequence overlap\n";
}
#Decide on matrix to use based on subsequence length
if($part_length < 35) {
$matrix="PAM30";
} elsif($part_length < 50) {
$matrix="PAM70";
} elsif($part_length < 85) {
$matrix="BLOSUM80";
} else {
$matrix="BLOSUM62";
}
}
#-------------------Subroutine get_file----------------------------------
sub get_file {
open INPUT_FILE, "<$input_file";
my $i;
while(
\n";
print FILTERED "Input file: $input_file
\n";
print FILTERED "Sub-sequence length: $part_length
\n";
print FILTERED "Subsequence overlap: $part_overlap
\n";
print FILTERED "Expect value for full sequence BLAST: $expect_1
\n";
print FILTERED "Expect value for sub-sequence BLAST: $expect_2
\n";
print FILTERED "
Sub-sequence $i ($curr_start to $curr_end):
\n";
print FILTERED "\n";
}
else {
print FILTERED "\nSub-sequence $i ($curr_start to $curr_end):\n";
print FILTERED "---------------\n";
}
while(
\n";
}
else {
print FILTERED $_;
}
$num_hits++;
}
}
close UNFILTERED;
if($html_out) {
print FILTERED "