!/bin/bash

logger_ngseasy logger_ngseasy_new ngseasy

ngseasy_alignment_v0.9

ngseasy_functions

ngseasy_qcfiler_bam ngseasy_filter_recalbam

ngs_full_gatk ngs_full_no_gatk

Step 1

get contianers

get_containers.sh

step up directory structure

ngseasy_initiate_project

copy fastq file from storage folder to project and sample folders

ngseasy_initiate_fastq

start volumes container

ngseasy_volumes_container

TO DO

add checks for containers/images


NGS pipeline

#!/bin/bash -e -x

################################################################
# Program: ngseasy
# Version 1.0 
# Author: Stephen Newhouse (stephen.j.newhouse@gmail.com)
#################################################################

## NGSeasy version
NGSEASYVERSION="1.0"

## check and make ~/ngseasy_logs if needed
if [[ ! -e  ${HOME}/ngseasy_logs ]]
then
  mkdir ${HOME}/ngseasy_logs
  global_run_logs="${HOME}/ngseasy_logs"
fi

## global logging fuction
function logger_ngseasy() {
 message=${1}
 mylogfile=${2}
 echo -e [`date`]":[NGSEASY:${NGSEASYVERSION}]:"${message}":[`pwd`]:[${USER}]:[`uname -a`]" >> ${mylogfile}.log >> ${global_run_logs}/ngseasy-run.log;
 echo -e [`date`]":[NGSEASY:${NGSEASYVERSION}]:"${message}":[`pwd`]:[${USER}]:[`uname -a`]"
}

## global usage
function usage_ngseasy() {
    echo "
Program: ngseasy
Version 1.0
Author: Stephen Newhouse (stephen.j.newhouse@gmail.com)

usage:   ngseasy_full -c <config_file> -d <project_directory>

options:  -c  configuration file
          -d  project directory
          -h  show this message 
"
}

## example config: https://docs.google.com/spreadsheets/d/1VWqmMffkVDnvOtRJGlPqOYzXWnIN_IONXQHDAawaN5Q/edit#gid=0

## Check options passed in.
if test -z "$2"
  then
  usage_ngseasy
  exit 1
fi


## get options for command line args
while  getopts "hc:d:" opt
do

  case ${opt} in

   h)
   usage_ngseasy #print help
   exit 0
   ;;

   c)
   config_tsv=${OPTARG}
   echo "-c = ${config_tsv}"
   ;;

   d)
   project_directory=${OPTARG}
   echo "-d = ${project_directory}"
   ;;


   esac
done

## check file and directory exist.
if [[ ! -e "${config_tsv}" ]] 
  then
      usage_ngseasy;
      echo -e "ERROR : ${config_tsv} does not exist\n"
      exit 1;
fi

## check exists.
if [[ ! -d "${project_directory}" ]] 
  then
      usage_ngseasy;
      echo -e "ERROR :  ${project_directory} does not exist\n"
      exit 1;
fi


# --- Start NGS Pipeline -------------------------------------------------------- #

# Read config file
while read -r f1 f2 f3 f4 f5 f6 f7 f8 f9 f10 f11 f12 f13 f14 f15 f16 f17 f18 f19 f20 f21 f22
do

## set varibales  
DATE=`date +"%d%m%y"`

PROJECT_ID=$f1
SAMPLE_ID=$f2
FASTQ1=$f3
FASTQ2=$f4
PROJECT_DIR=$f5
DNA_PREP_LIBRARY_ID=$f6
NGS_PLATFORM=$f7
NGS_TYPE=$f8
BAIT=$f9
CAPTURE=$f10
FASTQC=$f11
TRIM=$f12
BSQR=$f13
REALN=$f14
ALIGNER=$f15
VARCALLER=$f16
CNV=$f17
ANNOTATOR=$f18
CLEANUP=$f19
NCPU=$f20
VERSION=$f21
NGSUSER=$f22

# Read config file log
logger_ngseasy "[ngseasy]:Reading [${config_tsv}] " ${HOME}/ngseasy_logs/ngseasy.${PROJECT_ID}.${USER}.$(date +"%d%m%y"

echo -e "
[ngseasy]:Reading [${config_tsv}] \n
[ngseasy]:Reading [${config_tsv}]:PROJECT_ID=[$PROJECT_ID] \n
[ngseasy]:Reading [${config_tsv}]:SAMPLE_ID=[$SAMPLE_ID] \n
[ngseasy]:Reading [${config_tsv}]:FASTQ1=[$FASTQ1] \n
[ngseasy]:Reading [${config_tsv}]:FASTQ2=[$FASTQ2] \n
[ngseasy]:Reading [${config_tsv}]:PROJECT_DIR=[$PROJECT_DIR] \n
[ngseasy]:Reading [${config_tsv}]:DNA_PREP_LIBRARY_ID=[$DNA_PREP_LIBRARY_ID] \n
[ngseasy]:Reading [${config_tsv}]:NGS_PLATFORM=[$NGS_PLATFORM] \n
[ngseasy]:Reading [${config_tsv}]:NGS_TYPE=[$NGS_TYPE] \n
[ngseasy]:Reading [${config_tsv}]:BAIT=[$BAIT] \n
[ngseasy]:Reading [${config_tsv}]:CAPTURE=[$CAPTURE] \n
[ngseasy]:Reading [${config_tsv}]:FASTQC=[$FASTQC] \n
[ngseasy]:Reading [${config_tsv}]:TRIM=[$TRIM] \n
[ngseasy]:Reading [${config_tsv}]:BSQR=[$BSQR] \n
[ngseasy]:Reading [${config_tsv}]:REALN=$[$REALN] \n
[ngseasy]:Reading [${config_tsv}]:ALIGNER=[$ALIGNER] \n
[ngseasy]:Reading [${config_tsv}]:VARCALLER=[$VARCALLER] \n
[ngseasy]:Reading [${config_tsv}]:CNV=[$CNV] \n
[ngseasy]:Reading [${config_tsv}]:ANNOTATOR=[$ANNOTATOR] \n
[ngseasy]:Reading [${config_tsv}]:CLEANUP=[$CLEANUP] \n
[ngseasy]:Reading [${config_tsv}]:NCPU=[$NCPU] \n
[ngseasy]:Reading [${config_tsv}]:VERSION=[$VERSION] \n
[ngseasy]:Reading [${config_tsv}]:NGSUSER=[$NGSUSER]"

logger_ngseasy "[ngseasy]:Reading [${config_tsv}] \n
[ngseasy]:Reading [${config_tsv}]:PROJECT_ID=[$PROJECT_ID] \n
[ngseasy]:Reading [${config_tsv}]:SAMPLE_ID=[$SAMPLE_ID] \n
[ngseasy]:Reading [${config_tsv}]:FASTQ1=[$FASTQ1] \n
[ngseasy]:Reading [${config_tsv}]:FASTQ2=[$FASTQ2] \n
[ngseasy]:Reading [${config_tsv}]:PROJECT_DIR=[$PROJECT_DIR] \n
[ngseasy]:Reading [${config_tsv}]:DNA_PREP_LIBRARY_ID=[$DNA_PREP_LIBRARY_ID] \n
[ngseasy]:Reading [${config_tsv}]:NGS_PLATFORM=[$NGS_PLATFORM] \n
[ngseasy]:Reading [${config_tsv}]:NGS_TYPE=[$NGS_TYPE] \n
[ngseasy]:Reading [${config_tsv}]:BAIT=[$BAIT] \n
[ngseasy]:Reading [${config_tsv}]:CAPTURE=[$CAPTURE] \n
[ngseasy]:Reading [${config_tsv}]:FASTQC=[$FASTQC] \n
[ngseasy]:Reading [${config_tsv}]:TRIM=[$TRIM] \n
[ngseasy]:Reading [${config_tsv}]:BSQR=[$BSQR] \n
[ngseasy]:Reading [${config_tsv}]:REALN=$[$REALN] \n
[ngseasy]:Reading [${config_tsv}]:ALIGNER=[$ALIGNER] \n
[ngseasy]:Reading [${config_tsv}]:VARCALLER=[$VARCALLER] \n
[ngseasy]:Reading [${config_tsv}]:CNV=[$CNV] \n
[ngseasy]:Reading [${config_tsv}]:ANNOTATOR=[$ANNOTATOR] \n
[ngseasy]:Reading [${config_tsv}]:CLEANUP=[$CLEANUP] \n
[ngseasy]:Reading [${config_tsv}]:NCPU=[$NCPU] \n
[ngseasy]:Reading [${config_tsv}]:VERSION=[$VERSION] \n
[ngseasy]:Reading [${config_tsv}]:NGSUSER=[$NGSUSER]
" ${HOME}/ngseasy_logs/ngseasy.${PROJECT_ID}.${USER}.$(date +"%d%m%y"

##-------------------------------------------------------------------------##
## fastqc
if [[ "${FASTQC}" -eq "qc-fastc" ]]
then
    echo -e  "[ngseasy]:Calling ngseasy_fastqc" 
    logger_ngseasy "[ngseasy]:Calling ngseasy_fastqc" ${HOME}/ngseasy_logs/ngseasy.${PROJECT_ID}.${USER}.$(date +"%d%m%y" 

    ngseasy_fastqc -c ${config_tsv} -d ${project_directory}

else
    echo -e  "[ngseasy]:Skipping qc-trimming" 
    logger_ngseasy "[ngseasy]:Reading [${config_tsv}] " ${HOME}/ngseasy_logs/ngseasy.${PROJECT_ID}.${USER}.$(date +"%d%m%y" 
fi

##-------------------------------------------------------------------------##
## adapter and read/base quality trimming
if [[ "${TRIM}" -eq "qc-trim" ]]
then
    echo -e  "[ngseasy]:Calling ngseasy_trimmomatic" 
    logger_ngseasy "[ngseasy]:Calling ngseasy_trimmomatic" ${HOME}/ngseasy_logs/ngseasy.${PROJECT_ID}.${USER}.$(date +"%d%m%y" 

    ngseasy_trimmomatic -c ${config_tsv} -d ${project_directory}

else
    echo -e  "[ngseasy]:Skipping qc-trimming" 
    logger_ngseasy "[ngseasy]:Reading [${config_tsv}] " ${HOME}/ngseasy_logs/ngseasy.${PROJECT_ID}.${USER}.$(date +"%d%m%y" 
fi

##-------------------------------------------------------------------------##
## alignment : includes addition of read groups at alignment stage 
## and then duplicate marking (samblaster), indexing and sorting with sambamba
ngseasy_alignment -c ${config_tsv} -d ${project_directory}


##-------------------------------------------------------------------------##
## NGS Processing : Indel realignment and base quality score reclibration using GATK or BamUtil/ogap
if [[ "${GATK}" -eq 1 ]] && [[ "${REALN}" -eq 1 ]] && [[ "${BSQR}" -eq 1 ]]
then

  ngseasy_indel_realn -c ${config_tsv} -d ${project_directory}
  ngseasy_base_recal -c ${config_tsv} -d ${project_directory}

elif [[ "${GATK}" -eq 1 ]] && [[ "${REALN}" -eq 0 ]] && [[ "${BSQR}" -eq 1 ]]
then

  ngseasy_base_recal -c ${config_tsv} -d ${project_directory}

elif [[ "${GATK}" -eq 0 ]] && [[ "${REALN}" -eq 1 ]] && [[ "${BSQR}" -eq 1 ]]
then

  ngseasy_ogap_realn -c ${config_tsv} -d ${project_directory}
  ngseasy_bamutil_base_recal -c ${config_tsv} -d ${project_directory}

elif [[ "${GATK}" -eq 0 ]] && [[ "${REALN}" -eq 0 ]] && [[ "${BSQR}" -eq 1 ]]
then

  ngseasy_bamutil_base_recal -c ${config_tsv} -d ${project_directory}

fi

##-------------------------------------------------------------------------##
## Alignment statistics
ngseasy_alignment_qc -c ${config_tsv} -d ${project_directory}

##-------------------------------------------------------------------------##
## SNP/INDEL calling
ngseasy_variant_calling -c ${config_tsv} -d ${project_directory}

ngseasy_variant_calling_fast_ensemble -c ${config_tsv} -d ${project_directory}

##-------------------------------------------------------------------------##
## CNV Calling

##-------------------------------------------------------------------------##
## Annotation

##-------------------------------------------------------------------------##
## NGS Report


done < ${config_tsv}

pipelines

gatk_realn_recab gatk_recab

options

PROJECT_ID SAMPLE_ID FASTQ1 FASTQ2 PROJECT_DIR DNA_PREP_LIBRARY_ID NGS_PLATFORM NGS_TYPE BAIT CAPTURE TRIM GATK BSQR REALN ALIGNER VARCALLER CNV # if WGS cn.MOPs mHMM DELLY and LUMPY if WEX ExomeDepth DELLY and LUMPY if TGS SLOPE DELLY and LUMPY GTMODEGATK CLEANUP NCPU VERSION NGSUSER


ngseasy_full ngseasy_fastqc ngseasy_trimmomatic ngseasy_alignment ngseasy_indel_realn ngseasy_base_recal ngseasy_ogap_realn ngseasy_bamutil_base_recal ngseasy_variant_calling

Dumped

ngseasy_addreadgroup
ngseasy_markduplicates

#usage printing func
usage()
{
cat << EOF
  This script calls the NGSeasy pipeline : <full_gatk/full_no_gatk/fastqc/fastq_trimm/alignment/var_call/cnv_call/var_annotate/alignment_qc>

  See NGSEasy containerized instructions.

  ARGUMENTS:

  -h      Flag: Show this help message
  -c      NGSeasy project and run configureation file
  -d      Base directory for (fastq_raw, reference_genomes_b37, gatk_resources, ngs_projects, ngseasy_scripts)

  EXAMPLE USAGE:

    ngseasy -c config.file.tsv -d /media/ngs_projects 

EOF
}

#get options for command line args
  while  getopts "hc:d:" opt
  do

      case ${opt} in
      h)
      usage #print help
      exit 0
      ;;

      c)
      config_tsv=${OPTARG}
      echo "-c = ${config_tsv}"
      ;;

      d)
      project_directory=${OPTARG}
      echo "-d = ${project_directory}"
      ;; 
      esac
  done

#check exists.
if [ ! -e "${config_tsv}" ] 
  then
      echo "ERROR : ${config_tsv} does not exist "
      usage;
      exit 1;
fi

#check exists.
if [ ! -d "${project_directory}" ] 
  then
      echo "ERROR :  ${project_directory} does not exist "
      usage;
      exit 1;
fi