Commit bf1d8939 authored by Shengen's avatar Shengen
Browse files

CAD data analysis scripts

parent e9f42432
Loading
Loading
Loading
Loading
+9 −1
Original line number Diff line number Diff line
@@ -50,7 +50,15 @@ The scATAC data were generated for this study and pre-processed with 10x cellran
## 2. single cell RNA-seq (scRNA) data analysis
All the scripts for the scRNA data analysis were included in the scRNA_analysis.r. 

The scRNA data were generated in a previous study in the same system as scATAC (Wirka et al., Nat Med. 2019, GSE131778). The main propose for the scRNA data analysis is to generate a scRNA reference database for the integration analysis and label transferring analysis of the scATAC data.
The scRNA data were generated in a previous study in the same system as scATAC (Wirka et al., Nat Med. 2019, GSE131778). The main propose for the scRNA data analysis is to generate a scRNA reference database for the integration analysis and label transferring analysis of the scATAC data. The script contains following parts.
- cell and gene filtering
- dimensional reduction 
- cell clustering
- UMAP visualization
- comparing the marker genes (Wirka et al., Nat Med. 2019, GSE131778) versus cell clustering 
- cell type assignment based on marker genes
- check knowledge based marker genes versus cell type assignment


## 3. Variant effect predictions using ATAC-seq peaks
All the scripts for the variant scoring analysis were included in the SVMpipeline/ folder. 
+68 −1
Original line number Diff line number Diff line
@@ -85,9 +85,74 @@ data_UMAP_K10 <- RunUMAP(data_cluster_K10, dims = 1:10, seed.use=1)
all_marker_genes_K10 <- FindAllMarkers(data_UMAP_K10, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, random.seed=1)
MKG_K10 <- all_marker_genes_K10[which(all_marker_genes_K10[,"avg_logFC"]>=log(2) & all_marker_genes_K10[,"p_val_adj"]<1e-5),]
data_UMAPtsne_K10 <- RunTSNE(data_UMAP_K10, dims = 1:10, seed.use=1)
all_marker_genes_K10 <- FindAllMarkers(data_UMAP_K10, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, random.seed=1)
MKG_K10 <- all_marker_genes_K10[which(all_marker_genes_K10[,"avg_logFC"]>=log(2) & all_marker_genes_K10[,"p_val_adj"]<1e-5),]

# read in marker gene list from downloaded from Wirka et al 2019, GSE131778
Bcell_MKG <- as.vector(read.table("scRNA_GSE131778_makergene_list/Bcell.txt")[,1])
Endothelial_MKG <- as.vector(read.table("scRNA_GSE131778_makergene_list/Endothelial.txt")[,1])
Fibroblast_MKG <- as.vector(read.table("scRNA_GSE131778_makergene_list/Fibroblast.txt")[,1])
Fibroblast2_MKG <- as.vector(read.table("scRNA_GSE131778_makergene_list/Fibroblast2.txt")[,1])
Fibromyocyte_MKG <- as.vector(read.table("scRNA_GSE131778_makergene_list/Fibromyocyte.txt")[,1])
Macrophage_MKG <- as.vector(read.table("scRNA_GSE131778_makergene_list/Macrophage.txt")[,1])
Mast_MKG <- as.vector(read.table("scRNA_GSE131778_makergene_list/Mast.txt")[,1])
NKcell_MKG <- as.vector(read.table("scRNA_GSE131778_makergene_list/NKcell.txt")[,1])
Neuron_MKG <- as.vector(read.table("scRNA_GSE131778_makergene_list/Neuron.txt")[,1])
Pericyte1_MKG <- as.vector(read.table("scRNA_GSE131778_makergene_list/Pericyte1.txt")[,1])
Pericyte2_MKG <- as.vector(read.table("scRNA_GSE131778_makergene_list/Pericyte2.txt")[,1])
Plasma1_MKG <- as.vector(read.table("scRNA_GSE131778_makergene_list/Plasma1.txt")[,1])
Plasma2_MKG <- as.vector(read.table("scRNA_GSE131778_makergene_list/Plasma2.txt")[,1])
SMC_MKG <- as.vector(read.table("scRNA_GSE131778_makergene_list/SMC.txt")[,1])
Tcell_MKG <- as.vector(read.table("scRNA_GSE131778_makergene_list/Tcell.txt")[,1])
c10_MKG <- as.vector(read.table("scRNA_GSE131778_makergene_list/c10.txt")[,1])
c11_MKG <- as.vector(read.table("scRNA_GSE131778_makergene_list/c11.txt")[,1])
c12_MKG <- as.vector(read.table("scRNA_GSE131778_makergene_list/c12.txt")[,1])
c14_MKG <- as.vector(read.table("scRNA_GSE131778_makergene_list/c14.txt")[,1])
c18_MKG <- as.vector(read.table("scRNA_GSE131778_makergene_list/c18.txt")[,1])



## compare clusters with marker genes, generate confusion matrix  

compare_celltype <- function(usedata, outname, MKGdata){
    ### compare with DEG
    DEGovMAT <- matrix(rep(0, 21*length(table(usedata$seurat_clusters))),ncol=21)
    rownames(DEGovMAT) <- names(table(usedata$seurat_clusters))
    colnames(DEGovMAT) <- c("Bcell","Endothelial","Fibroblast","Fibroblast2","Fibromyocyte","Macrophage","Mast","NKcell","Neuron","Pericyte1","Pericyte2","Plasma1","Plasma2","SMC","Tcell","c10","c11","c12","c14","c18","total")
    for(i in rownames(DEGovMAT)){
        MKG_thisCluster <- MKGdata[which(MKGdata[,"cluster"]==i),"gene"]
        DEGovMAT[i,"Bcell"] = length(intersect(MKG_thisCluster, Bcell_MKG))
        DEGovMAT[i,"Endothelial"] = length(intersect(MKG_thisCluster, Endothelial_MKG))
        DEGovMAT[i,"Fibroblast"] = length(intersect(MKG_thisCluster, Fibroblast_MKG))
        DEGovMAT[i,"Fibroblast2"] = length(intersect(MKG_thisCluster, Fibroblast2_MKG))
        DEGovMAT[i,"Fibromyocyte"] = length(intersect(MKG_thisCluster, Fibromyocyte_MKG))
        DEGovMAT[i,"Macrophage"] = length(intersect(MKG_thisCluster, Macrophage_MKG))
        DEGovMAT[i,"Mast"] = length(intersect(MKG_thisCluster, Mast_MKG))
        DEGovMAT[i,"NKcell"] = length(intersect(MKG_thisCluster, NKcell_MKG))
        DEGovMAT[i,"Neuron"] = length(intersect(MKG_thisCluster, Neuron_MKG))
        DEGovMAT[i,"Pericyte1"] = length(intersect(MKG_thisCluster, Pericyte1_MKG))
        DEGovMAT[i,"Pericyte2"] = length(intersect(MKG_thisCluster, Pericyte2_MKG))
        DEGovMAT[i,"Plasma1"] = length(intersect(MKG_thisCluster, Plasma1_MKG))
        DEGovMAT[i,"Plasma2"] = length(intersect(MKG_thisCluster, Plasma2_MKG))
        DEGovMAT[i,"SMC"] = length(intersect(MKG_thisCluster, SMC_MKG))
        DEGovMAT[i,"Tcell"] = length(intersect(MKG_thisCluster, Tcell_MKG))
        DEGovMAT[i,"c10"] = length(intersect(MKG_thisCluster, c10_MKG))
        DEGovMAT[i,"c11"] = length(intersect(MKG_thisCluster, c11_MKG))
        DEGovMAT[i,"c12"] = length(intersect(MKG_thisCluster, c12_MKG))
        DEGovMAT[i,"c14"] = length(intersect(MKG_thisCluster, c14_MKG))
        DEGovMAT[i,"c18"] = length(intersect(MKG_thisCluster, c18_MKG))
        DEGovMAT[i,"total"] = length(MKG_thisCluster)
    }
    write.table(DEGovMAT, file=paste0(outname,"_ownClusterDEG_vs_paperDEG.txt"),row.names=T,col.names=T,sep="\t", quote=F)   
}

# compare with the knowledge based marker genes for cell  types
compare_celltype(data_UMAP_K10, "PC10", MKG_K10)

# assign cell types to clusters based on confusion matrix comparing marker gene assignment. 
own_celltype <- c("Fibroblast","Endothelial","Macrophage","Fibro","T/NK","SMC","Pericyte1","unknown1","Pericyte2","B","Plasma","unknown2","Neuron","unknown3","Mast")


# check with the knowledge based marker genes for cell  types
DSMCmarker <- c("MYOCD","SRF","TEAD3","TEAD4","ACTA2","MYH11","TAGLN","LMOD1","CNN1","TPM2","MYL9")
MSMCmarker <- c("TCF21","KLF4","FN1","LUM","TNFRSF11B","BGN")
Emarker <- c("KLF2","PECAM1","CLDN5","PLVAP","ACKR1","EGFL7", "NFKB1","NFKB2","VCAM1","SELE")
@@ -103,6 +168,8 @@ pdf(file="top10marker_exp_heatmap.pdf")
DoHeatmap(data_UMAP_K10, features = top10marker$gene) + NoLegend()
dev.off()



# output processed scRNA data for integration analysis with scATAC
own_cluster <- as.vector(data_UMAP_K10$seurat_clusters)
names(own_cluster) <- names(data_UMAP_K10$seurat_clusters)