##scripts of CNV calling 

## Generation of intensity file using GenomeStudio (v2.0.5 Illumina)
## Export intensity files (LRR + BAF)

##CNV calling using PennCNV (v1.0.5)
####################################

## Split intensity files
PennCNV-1.0.5/kcolumn.pl signal_file split 3 -tab -head 3 -name -out sample --start_split 1 --end_split 1000

## List the paths of the output splitted files in splitted_file_list.txt
## Extract snp_list from one splitted file in snp_list.txt

## Compile PFB file
PennCNV-1.0.5/compile_pfb.pl --listfile splitted_file_list.txt --snpposfile snp_list.txt --out file.pfb

## Download sorted_gc5Base.txt from UCSC
## Generate gcmodel
PennCNV/PennCNV-1.0.5/cal_gc_snp.pl sorted_gc5Base.txt file.pfb --output file.gcmodel

## Detect CNVs
PennCNV-1.0.5/detect_cnv.pl -test -hmm PennCNV-1.0.5/lib/hhall.hmm -pfb file.pfb --list splitted_file_list.txt --log file.log --out file.rawcnv --gcmodel file.gcmodel

## Merge CNVs
PennCNV-1.0.5/clean_cnv.pl combineseg file.rawcnv -signalfile file.pfb -out file.clean.rawcrawcnv

## Filter CNVs (first step)
PennCNV-1.0.5/filter_cnv.pl file.clean.rawcnv -qclogfile file.log -qcpassout file.qcpass -qcsumout file.qcsum -out file.goodcnv

## check file.qcsum and calculate --qclrrsd --qcbafdrift and --qcwf based on mean+3SD
#In R calculate the mean+3SD of these values
dat <- read.table("file.qcsum", header = T)
qclrrsd <- mean(dat$LRR_SD+3*sd(dat$LRR_SD))
qcbaf <- mean(dat$BAF_drift+3*sd(dat$BAF_drift))
qcwv <- mean(dat$WF+3*sd(dat$WF))

## Filter CNVs (second step) using cut-offs values from Rscript
PennCNV-1.0.5/filter_cnv.pl file.clean.rawcnv -qclogfile file.log -qcpassout file.qcpass -qcsumout file.qcsum -out file.goodcnv -qclrrsd qclrrsd --qcbafdrift qcbaf --qcwf qcwv

## Remove spurious region
## spurious_regions.txt file is available in https://penncnv.openbioinformatics.org
PennCNV-1.0.5/scan_region.pl file.goodcnv spurious_regions.txt -minqueryfrac 0.5 > file.spurious
fgrep -v -f file.spurious file.goodcnv > file.goodcnv.regionfiltered

## CNVs annotation using ANNOVAR
################################

perl table_annovar.pl file.goodcnv.regionfiltered ANNOVAR/humandb/ --buildver hg19 -protocol refGene -operation g -remove -polish

## CNVs frequency calculation using Plink (v1.07)
################################################

## Format plink CNV files and generate frequencies
plink-1.07 --cnv-list Courage_filtered.cnv --cnv-make-map --out Courage_filtered --noweb
plink-1.07 --cfile Courage_filtered --cnv-freq-method2 0.5 --allow-no-sex --cnv-write --cnv-write-freq --out Courage_filtered_freq --noweb
