1. Data

H3K27ac ChIP-seq data of human neuroepithelial stem cells (hNESCs) have been used to a core regulatory circuit of two cell lines using CRCmapper (Saint-André et al. (2016)).

2. Objectives

CRCmapper will be used to identify core regulatory circuits of K7-WT and COT-WT. Before running CRCmapper, HOMER (Heinz et al. (2010)) was used to call super-enhancers and ROSE (Lovén et al. (2013)) was used to associate super-enhancers to expressed genes.

3. Analysis

3.1 Run HOMER

Call super-enhancers and typical enhancers on H3K27ac.

# For K7-WT
findPeaks ./TagDir_K7_WT_H3K27Ac -style super -typical \
/scratch/users/dgerard/SN-CRC-2018/HOMER-2018/TE/K7_WT_H3K27Ac-GRChr38.p1.TE -o \
/scratch/users/dgerard/SN-CRC-2018/HOMER-2018/SE/K7_WT_H3K27Ac-GRChr38.p1.SE -L 0 -minDist \
10000 -i ./TagDir_K7_WT_INPUT

# Fot COR-WT
findPeaks CORWTH3K27AcL1.GRChr38.p1.q30 -style super -typical CORWTH3K27AcL1.GRChr38.p1.TE -o CORWTH3K27AcL1.GRChr38.p1.SE -L 0 -minDist 10000 -i CORWTINPUTL1.GRChr38.p1.q30

3.2 Prepare gff files of previously called super-enhancers that will be used by ROSE

Load libraries

library("tidyverse")

Load the super-enhancers (SEs) previously called by HOMER and make them as gff files for ROSE

# For K7-WT
SE_K7_WT = read_delim(paste0(getwd(), "/Fig4_CRCmapper/Data/SE/K7_WT_H3K27Ac-GRChr38.p1.SE"), 
    delim = "\t", progress = TRUE, skip = 43, col_names = TRUE)

SE_K7_WT_gff = SE_K7_WT %>%
    mutate(V3 = "", V6 = "", V8 = "") %>%
    select(chr, `#PeakID`, V3, start, end, V6, strand, V8) %>%
    mutate(SE_ID_Add = `#PeakID`)

write_delim(SE_K7_WT_gff, paste0(getwd(), "/Fig4_CRCmapper/Data/gff/19042018-K7_WT-H3K27ac-hg38-SE.gff"), 
    delim = "\t", col_names = FALSE)

# For COR-WT
SE_COR.WT = read_delim(paste0(getwd(), "/Fig4_CRCmapper/Data/BED/CORWTH3K27AcL1.GRChr38.p1.SE.bed"), 
    delim = "\t", progress = TRUE, col_names = FALSE)

## Change the chromosome into chr1, chr2, chr3, ... instead of 1, 2, 3
SE_COR.WT = SE_COR.WT %>%
    rename(chr = X1, start = X2, end = X3, SE_ID = X4, unk = X5, strand = X6)

SE_COR.WT$chr = gsub("(^[0-9XY])", "chr\\1", SE_COR.WT$chr)

SE_COR.WT_gff = SE_COR.WT %>%
    mutate(V3 = "", V6 = "", V8 = "") %>%
    select(chr, SE_ID, V3, start, end, V6, strand, V8) %>%
    mutate(SE_ID_add = SE_ID)

write_delim(SE_COR.WT_gff, paste0(getwd(), "/Fig4_CRCmapper/Data/gff/17042018-COR_WT-H3K27ac-hg38-SE.gff"), 
    delim = "\t", col_names = FALSE)
3.2.1 Run ROSE
# For K7-WT
python ROSE_main.py -i /scratch/users/dgerard/SN-CRC-2018/ROSE-gffFiles/19042018-K7_WT-H3K27ac-hg38-SE.gff -r \
/scratch/users/dgerard/SN-CRC-2018/ROSE-BAMfiles/K7-WT-H3K27Ac_chr_sorted.bam -o \
/scratch/users/dgerard/SN-CRC-2018/ROSE-2018-output -g HG38 -c \
/scratch/users/dgerard/SN-CRC-2018/ROSE-BAMfiles/K7-WT-I_chr_sorted.bam -s 10000 -t 1000

# For COR-WT
python ROSE_main.py -i /scratch/users/dgerard/SN-CRC-2018/ROSE-gffFiles/17042018-COR_WT-H3K27ac-hg38-SE.gff -r \
/scratch/users/dgerard/SN-CRC-2018/ROSE-BAMfiles/COR_WT_H3K27ac_chr_sorted.bam -o \
/scratch/users/dgerard/SN-CRC-2018/ROSE-2018-output -g HG38 -c \
/scratch/users/dgerard/SN-CRC-2018/ROSE-BAMfiles/COR_WT_INPUT_chr_sorted.bam -s 10000 -t 1000

3.3 Prepare subpeaks files of previously called super-enhancers that will be used by CRCmapper

# For K7-WT
SE_HOMER_K7_WT = read_delim(paste0(getwd(), "/Fig4_CRCmapper/Data/SE/K7_WT_H3K27Ac-GRChr38.p1.SE"), 
    delim = "\t", col_names = TRUE, skip = 43)

SE_HOMER_K7_WT_sub = SE_HOMER_K7_WT %>%
    select(chr, start, end, `#PeakID`, `findPeaks Score`)

SE_ROSE_K7_WT = read_delim(paste0(getwd(), "/Fig4_CRCmapper/Data/Enhancers_table/19042018-K7_WT-H3K27ac-hg38-SE_AllEnhancers.table.txt"), 
    delim = "\t", col_names = TRUE, skip = 5)

SE_ROSE_K7_WT_name = SE_ROSE_K7_WT %>%
    mutate(REGION_ID_charac = str_replace(SE_ROSE_K7_WT$REGION_ID, "[:digit:]_", 
        "") %>%
        str_replace("_[:alpha:]*", ""))

SE_HOMER_K7_WT_sub_fin = SE_HOMER_K7_WT_sub %>%
    filter(`#PeakID` %in% SE_ROSE_K7_WT_name$REGION_ID_charac)

write_delim(SE_HOMER_K7_WT_sub_fin, paste0(getwd(), "/Fig4_CRCmapper/Data/BED/19042018-K7_WT-H3K27ac-hg38-SE-subPeak.bed"), 
    delim = "\t", col_names = FALSE)

# For COR-WT
SE_HOMER_COR_WT = read_delim(paste0(getwd(), "/Fig4_CRCmapper/Data/SE/CORWTH3K27AcL1.GRChr38.p1.SE"), 
    delim = "\t", col_names = TRUE, skip = 43)

SE_HOMER_COR_WT_sub = SE_HOMER_COR_WT %>%
    select(chr, start, end, `#PeakID`, `findPeaks Score`)

SE_HOMER_COR_WT_sub$chr = gsub("(^[0-9XY])", "chr\\1", SE_HOMER_COR_WT_sub$chr)

SE_ROSE_COR_WT = read_delim(paste0(getwd(), "/Fig4_CRCmapper/Data/Enhancers_table/17042018-COR_WT-H3K27ac-hg38-SE_AllEnhancers.table.txt"), 
    delim = "\t", col_names = TRUE, skip = 5)

SE_ROSE_COR_WT_name = SE_ROSE_COR_WT %>%
    mutate(REGION_ID_charac = str_replace(SE_ROSE_COR_WT$REGION_ID, "[:digit:]_", 
        "") %>%
        str_replace("_[:alpha:]*", ""))

SE_HOMER_COR_WT_sub_fin = SE_HOMER_COR_WT_sub %>%
    filter(`#PeakID` %in% SE_ROSE_COR_WT_name$REGION_ID_charac)

write_delim(SE_HOMER_COR_WT_sub_fin, paste0(getwd(), "/Fig4_CRCmapper/Data/BED/18042018-COR_WT-H3K27ac-hg38-SE-subPeak.bed"), 
    delim = "\t", col_names = FALSE)
3.3.1 Run CRCmapper
# For K7-WT
python CRCmapper.py -e /scratch/users/dgerard/SN-CRC-2018/ROSE-2018-output/19042018-K7_WT-H3K27ac-hg38-SE_AllEnhancers.table.txt -b \
/scratch/users/dgerard/SN-CRC-2018/ROSE-BAMfiles/K7-WT-H3K27Ac_chr_sorted.bam -g \
HG38 -f ./hg38_chr/ -s \
/scratch/users/dgerard/SN-CRC-2018/SE/19042018-K7_WT-H3K27ac-hg38-SE-subPeak.bed -x 33 -l 500 -n \
19042018-K7_WT-H3K27ac-hg38-SE-CRC -o /scratch/users/dgerard/SN-CRC-2018/CRC-2018-output/ -E 256

# For COR-WT
python CRCmapper.py -e /scratch/users/dgerard/SN-CRC-2018/ROSE-2018-output/17042018-COR_WT-H3K27ac-hg38-SE_AllEnhancers.table.txt -b \
/scratch/users/dgerard/SN-CRC-2018/ROSE-BAMfiles/COR_WT_H3K27ac_chr_sorted.bam -g \
HG38 -f ./hg38_chr/ -s \
/scratch/users/dgerard/SN-CRC-2018/SE/18042018-COR_WT-H3K27ac-hg38-SE-subPeak.bed -x 33 -l 500 -n \
18042018-COR_WT-H3K27ac-hg38-SE-CRC -o /scratch/users/dgerard/SN-CRC-2018/CRC-2018-output/ -E 225

4. References

Heinz, Sven, Christopher Benner, Nathanael Spann, Eric Bertolino, Yin C. Lin, Peter Laslo, Jason X. Cheng, Cornelis Murre, Harinder Singh, and Christopher K. Glass. 2010. “Simple Combinations of Lineage-Determining Transcription Factors Prime Cis-Regulatory Elements Required for Macrophage and B Cell Identities.” Molecular Cell 38 (4): 576–89. https://doi.org/10.1016/j.molcel.2010.05.004.

Lovén, Jakob, Heather A. Hoke, Charles Y. Lin, Ashley Lau, David A. Orlando, Christopher R. Vakoc, James E. Bradner, Tong Ihn Lee, and Richard A. Young. 2013. “Selective Inhibition of Tumor Oncogenes by Disruption of Super-Enhancers.” Cell 153 (2): 320–34. https://doi.org/10.1016/j.cell.2013.03.036.

Saint-André, Violaine, Alexander J. Federation, Charles Y. Lin, Brian J. Abraham, Jessica Reddy, Tong Ihn Lee, James E. Bradner, and Richard A. Young. 2016. “Models of Human Core Transcriptional Regulatory Circuitries.” Genome Research 26 (3): 385–96. https://doi.org/10.1101/gr.197590.115.