################################################################################
##Genome-wide association study of copy number variations in Parkinson's disease
################################################################################

library(dplyr)

##################################################
##2. Format filtered CNVs for rCNV2
##################################################

# input filtered CNV data for ALL samples or EOPD+CTL
cnv_data_filtered <- read.table("cnv_data_filtered.txt", header = T)
rCNV_format <- cnv_data_filtered[,c(2:4,1,5,12)]
colnames(rCNV_format) <- c('#chr','start','end','name','cnv','pheno')
rCNV_format$pheno <- ifelse(rCNV_format$pheno == 0,"HEALTHY_CONTROL","PD")
rCNV_format$cnv <- ifelse(rCNV_format$cnv > 2,"DUP","DEL")
write.table(rCNV_format, "courage_filtered_20SNP_len20kb.txt", quote = F, row.names = F, sep = "\t")

#list unique samples
allCNV_unique <- cnv_data[!duplicated(cnv_data$Sample_Name),]
allCNV_unique <- allCNV_unique[!is.na(allCNV_unique$cnv),]
phenotable <- as.data.frame(allCNV_unique %>% group_by(aff) %>% count(aff))
colnames(phenotable) <- c("#HPO","courage")
phenotable$`#HPO` <- ifelse(phenotable$`#HPO` == 0,"HEALTHY_CONTROL","PD")
write.table(phenotable, "phenotable_ALL_courage_20SNP_len20kb.txt", quote = F, row.names = F, sep = "\t")
