#!/bin/csh

#      Copyright (C) 2006  Jifeng Tang                                                  #

#       This program is free software; you can redistribute it and/or                   #
# 	modify it under the terms of the GNU General Public License                     #
#       as published by the Free Software Foundation; either version 2                  #
#       of the License, or (at your option) any later version.                          #

#       This program is distributed in the hope that it will be useful,                 #
#       but WITHOUT ANY WARRANTY; without even the implied warranty of                  #
#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the                   #
#       GNU General Public License for more details.                                    #

#       You should have received a copy of the GNU General Public License               #
#       along with this program; if not, write to the Free Software                     #
#       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA. #

#########################################################################################


# Define the directories for programs and database.
# program_path1 is for alignment programs like CAP3, this should be set to
# program_path is for the path of vector file for cross_match
# where your program is located.
#
set program_path1 = "/usr/local/bin"
set program_path = "/usr/local/data/vectordir"

# Program_path is the QualitySNP program directory:
# CHANGE THIS TO THE CORRECT LOCATION!
#
set program_path2 = "/usr/local/bin/QualitySNPdir"

# If you want to activate "cross_match", change the 0 below to 1.
#
set USE_CROSSMATCH = 0

# If you want to activate "CAP3", change the 0 in the line below to 1.
#
set USE_CAP3 = 0

# Set to 1 if you want to run FASTY on the results
#
set RUN_FASTY = 0

# Your FASTY program (if desired)
#
set FASTY = "/usr/local/bin/fast34_t"

# The UNIPROT (or any other protein) database
# You may also use a letter to specify the desired database,
# provide you have set up the FASTLIBS variable.
#
set PROTDB = "/usr/local/data/uniprot.dat"

# set the minimum quality score of SNPs if sequence quality information is available
set MINQUAL = 20

# You should not need to change anything beyond this point!"
############################################################

set QUAL = 0
# First test for commandline option for using quality files
if ( "$1" =~ "-q" || "$1" =~ "-Q" ) then
	shift
	set QUAL = 1
	echo "Pipeline uses QUALITY information"

	if( -r $1.qual) then
	   if( -z $1.qual) then
	      echo "$1.qual has a size zero"
	      exit 1
	   endif
	else
	  echo "sequence quality file $1.qual does not exist"
	  exit
	endif
endif

if (-r $1) then
     if (-z $1) then
		echo "File [$1] without sequences!"
		exit 1
     endif
else
     echo "Sequence file: $1 does not exist!"
     exit 1
endif

echo "input file $1; similarity for cap3:$2; min.size of cluster:$3; $4 $5 $6 $7 $8 $9 $10 $11 $12"
#
# cross_math to remove vectors, can not be used if sequences are cleaned

if ( $USE_CROSSMATCH == 1 ) then
	$program_path1/PHRAP/cross_match.manyreads $1 $program_path/vector -minmatch 12 -minscore 20 -screen
	# if you use cross_match to remove vectors, change $1.screen to $1,
	# otherwise CAP3 will run on the old file $1
	mv $1.screen $1
endif

#
# CAP3 will align unclustered sequences
#
if ( $USE_CAP3 == 1 ) then
	$program_path1/CAP3/formcon $1 500 4000
	$program_path1/CAP3/cap3 $1 -p $2 -o 100 >cap.out
endif

#
# Search for geting alignment information
#
   if ( -z $1.cap.contigs) then
        echo " $1.cap.contigs is zero size"
        exit 1
   else
	if ( $QUAL == 0 ) then

		$program_path2/Getalignmentinfo $1.cap $3
	else
		# if your sequences with quality files, we use Getalignmentinfoqual
		# instead of Getalignmentinfo
                # test your quality file whether it exsit and in right format

		$program_path2/Getalignmentinfoqual $1.cap $3

	endif
   endif
#
# Annotated EST sequences
#
   if ( -z availcontiglist) then
      echo "without availcontig more than $3"
      exit 1
   else
      $program_path2/Getavailcontigseq $1.cap
      $program_path2/Getavailcontigqual $1.cap

      # analysis of SNP and haplotype information
      echo "***********QualitySNP detects SNP and haplotypes *****************"
	if ( $QUAL == 0 ) then
	      $program_path2/QualitySNP $1.cap $4 $5 $6 $7 $8 $9 $10
	else
	      # If your sequences with quality files we use QualitSNPqual
               $program_path2/GetavailESTqual $1
	      # check quality file of each contig
	       set ERR = 0
	       foreach i ( data/contig_*[0-9] )
		set j = ${i}.qual
                 if (-r $j) then
                    if (-z $j) then
		             echo "quality file of contig $i has size zero!"
		             set ERR = 1
                    endif
                else
                  echo "quality file of contig $i does not exist!"
		  set ERR =1
                endif
              end
              if ( $ERR == 1 ) then
	            echo "There is contig without quality file or its quality file has zero size"
	            exit
              endif
               $program_path2/QualitySNPqual $1.cap $4 $5 $6 $7 $8 $9 $10 $MINQUAL
	endif

      echo "*********** Formating EST and SNP information for database**************"

      if ( $QUAL == 0 ) then
            $program_path2/Getsnpindexcontig $1
      else
            $program_path2/Getsnpindexcontigqual $1
      endif

      $program_path2/Transfersnpfasta

      # if you have Fasty run FASTY analysis of the results
	if ( $RUN_FASTY == 1 ) then 

		$FASTY allavailcontigseqwithSNP $PROTDB -b 6 -d 6 -Q >allavailcontigseqwithSNP.fasty
		$program_path2/GetnonsySNPfasty availcontigseq allavailcontigseqwithSNP allavailcontigseqwithSNP.fasty
	endif

      # put all data to one directory 
      mkdir snpdb
      mv estdata snpdb
      mv snpindexdata snpdb
      mv snpcontigdata snpdb
      mv contigindexdata snpdb
      mv allsnpmicro snpdb

      #
      # if you have mysql in on machine run
      #

      # if($QUAL ==0) then
             # dbcreater.sql is for sequences without quality
             # mysql -h ipaddress -u root -p < dbcreater.sql   #change databasename in the script; input password for root
      # else
	     # dbcreaterQ.sql is for sequence with quality
             # mysql -h ipaddress -u root -p < dbcreaterQ.sql   # change databasename in the script; input password for root
      # endif
      # load data to databae for sequences with or without quality
      # mysql -h ipaddress -u root -p < dataload.sql    # change databasename in the script; input password for root

      # the description of the results
      echo " "
      echo " "
      echo " "
      echo "*********************** How to use the results *****************************"
      echo "Important files containing the results:"
      echo "1. File realsnpinfo includes all SNPs in the clusters, excluding those"
      echo "   from single haplotypes."
      echo "2. File SNPquality includes all relevant information for identifying"
      echo "   reliable SNPs, such as confidence scores and allele haplotype scores."
      echo "3. File availcontigwithSNP includes haplotype and SNP information for"
      echo "   every contig, as well as statistical information of SNPs for all contigs."
      echo "For more information about the result files, please consult the manual!"

   endif
endif
