Unverified Commit 0d9d9428 authored by hy298's avatar hy298 Committed by GitHub
Browse files

Delete PCA_correlation_analysis directory

parent 2e64d2f2
Loading
Loading
Loading
Loading
+0 −35
Original line number Diff line number Diff line
### H3K4me1 reads at new enhancers (ov ATAC-seq peak (cutoff20) summit +/-250bp)
library(flextable)
### replicates merged
k4me1_all <- read.delim("H3K4me1_CPM_newenhancers.csv", sep=",", header=TRUE)
nrow(k4me1_all) #41122
WT=k4me1_all$LN.H3K4me1.R1+k4me1_all$LN.H3K4me1.R2
NPM1=k4me1_all$NPM1.H3K4me1.R1+k4me1_all$NPM1.H3K4me1.R2
FLT3=k4me1_all$FLT3.H3K4me1.R1+k4me1_all$FLT3.H3K4me1.R2
DM=k4me1_all$DM.H3K4me1.R1+k4me1_all$DM.H3K4me1.R2
peaksID=paste(k4me1_all$chr,"_",k4me1_all$start,"_",k4me1_all$end,sep="")
k4me1_new = data.frame(WT,NPM1,FLT3,DM,row.names = peaksID)
colnames(k4me1_new) = c("WT","Npm1c","Flt3-ITD","DM")
#correlation heatmap
panel.cor <- function(x, y, digits=2, prefix="", cex.cor)
{
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(0, 1, 0, 1))
    r <- abs(cor(x, y))
    txt <- format(c(r, 0.123456789), digits=digits)[1]
    txt <- paste(prefix, txt, sep="")
    if(missing(cex.cor)) cex <- 0.8/strwidth(txt)
    text(0.5, 0.5, txt, cex=1.8, font=2, col="#db3236")
}
panel.sc <-function(x, y,  ...)
{
    usr <- par("usr")
    points(x, y, col="#db3236", cex=.3, pch=20)
}

png("H3K4me1_newEnh_re.scatterplot.png", width = 4.5, height = 4.5, units = 'in', res = 300)
par(bg=NA)
pairs(log2(k4me1_new), lower.panel = panel.cor,upper.panel= panel.sc,
font.labels = 2,cex.labels=1.5,font.main=2,cex.main=1.5,
main="")
dev.off()
+0 −93
Original line number Diff line number Diff line
library(ggplot2)
library(ggrepel)

##### PCA for RNA-seq, use normalised reads, filtered PC genes
data <- read.table("Normalized.filteredCounts.PC.csv", header = TRUE,sep=",")
df=data[,c(6,7,8,9,4,5,2,3)]
alldata <- df[ !rowSums(df[,colnames(df)[(1:ncol(df))]]==0)==ncol(df), ]
alldatapca <- prcomp(t(alldata), scale = TRUE)
alldatascores = as.data.frame(alldatapca$x) ### PC values
group = rownames(alldatascores)
summary(alldatapca) ### components frequencies
capture.output(summary(alldatapca), file ="RNA-seq_PCA_summary.txt", append = FALSE)
# Data design file
alldataDesign = data.frame(row.names = colnames(alldatascores),
condition = c( "WT", "WT", "NPM1", "NPM1", "FLT3", "FLT3", "DM", "DM"))
alldataDesign$condition = factor(alldataDesign$condition, levels=c("WT","NPM1","FLT3","DM"))
Sample = alldataDesign$condition
# PCA plot
bg <- ggplot(data = alldatascores, aes(x = PC1, y = PC2, label=group)) +
ggtitle("RNA-seq") + xlab(paste("PC1","(56.7%)")) + ylab(paste("PC2","(15.6%)")) +
geom_point(aes(color = Sample), size = 5) +
scale_color_manual(values=c("#AAA9AD","#0078D7","#3CAEA3","#ED553B")) +
theme_bw() + theme_classic(base_size = 12, base_family = "") +
theme(axis.text = element_text(color="black", size=12, face="bold"),
axis.ticks = element_line(size = 1), axis.ticks.length = unit(.25, "cm"),
axis.title = element_text(color="black", size=14, face="bold"),
plot.title = element_text(color="black", size=14, face="bold",,hjust=0.5),
legend.title = element_text(color="black", size=14, face="bold",hjust=0.5),
legend.text = element_text(size=12, face="bold"),
panel.border = element_rect(linetype = "solid",size = 1,fill = NA))
ggsave("RNA-seq_RPKM.PCA.pdf", width = 4.5, height = 3.5)


### PCA for ATAC-seq data
dataFile=read.delim("ATAC_consensus.cpm.csv", header = TRUE, sep =",",row.names=1)
data=dataFile[,c(1,2,3,4,5,6,7,8)]
secondlowest=min( data[data!=min(data)] ) ### find the 2nd lowest value
data[data==0]=secondlowest
colnames(data)=c("WT.R1","WT.R2","NPM1.R1","NPM1.R2","FLT3.R1","FLT3.R2","DM.R1","DM.R2")
alldata=log(data,2)
alldatapca <- prcomp(t(alldata), scale = TRUE)
alldatascores = as.data.frame(alldatapca$x) ### PC values
group = rownames(alldatascores)
summary(alldatapca) ### components frequencies
capture.output(summary(alldatapca), file ="ATAC_PCA_summary.txt", append = FALSE)
# Data design file
alldataDesign = data.frame(row.names = colnames(alldatascores),
condition = c( "WT", "WT", "NPM1", "NPM1", "FLT3", "FLT3", "DM", "DM"))
alldataDesign$condition = factor(alldataDesign$condition, levels=c("WT","NPM1","FLT3","DM"))
Sample = alldataDesign$condition
# PCA plot
bg <- ggplot(data = alldatascores, aes(x = PC1, y = PC2, label=group)) +
ggtitle("ATAC-seq") + xlab(paste("PC1","(54.6%)")) + ylab(paste("PC2","(29.7%)")) +
geom_point(aes(color = Sample), size = 5) +
scale_color_manual(values=c("#AAA9AD","#0078D7","#3CAEA3","#ED553B")) +
theme_bw() + theme_classic(base_size = 12, base_family = "") +
theme(axis.text = element_text(color="black", size=12, face="bold"),
axis.ticks = element_line(size = 1), axis.ticks.length = unit(.25, "cm"),
axis.title = element_text(color="black", size=14, face="bold"),
plot.title = element_text(color="black", size=14, face="bold",,hjust=0.5),
legend.title = element_text(color="black", size=14, face="bold",hjust=0.5),
legend.text = element_text(size=12, face="bold"),
panel.border = element_rect(linetype = "solid",size = 1,fill = NA))
ggsave("ATAC_PCA.pdf", width = 4.5, height = 3.5)


### pCHiC PCA
df <- read.table("PCHiC_scores_rep.overlap.all.noNE.txt", header = TRUE, row.names = 1)
alldata <- df[ !rowSums(df[,colnames(df)[(1:ncol(df))]]==0)==ncol(df), ]
alldatapca <- prcomp(t(alldata), scale = TRUE)
alldatascores = as.data.frame(alldatapca$x) ### PC values
group = rownames(alldatascores)
summary(alldatapca) ### components frequencies
capture.output(summary(alldatapca), file ="pCHiC_PCA_summary.txt", append = FALSE)
# Data design file
alldataDesign = data.frame(row.names = colnames(alldatascores),
condition = c( "WT", "WT", "NPM1", "NPM1", "FLT3", "FLT3", "DM", "DM"))
alldataDesign$condition = factor(alldataDesign$condition, levels=c("WT","NPM1","FLT3","DM"))
Sample = alldataDesign$condition
bg <- ggplot(data = alldatascores, aes(x = PC1, y = PC2, label=group)) +
ggtitle("pCHiC HC interaction scores") + xlab(paste("PC1","(25.4%)")) + ylab(paste("PC2","(19.1%)")) +
geom_point(aes(color = Sample), size = 5) +
scale_color_manual(values=c("#AAA9AD","#0078D7","#3CAEA3","#ED553B")) +
theme_bw() + theme_classic(base_size = 12, base_family = "") +
theme(axis.text = element_text(color="black", size=12, face="bold"),
axis.ticks = element_line(size = 1), axis.ticks.length = unit(.25, "cm"),
axis.title = element_text(color="black", size=14, face="bold"),
plot.title = element_text(color="black", size=14, face="bold",,hjust=0.5),
legend.title = element_text(color="black", size=14, face="bold",hjust=0.5),
legend.text = element_text(size=12, face="bold"),
panel.border = element_rect(linetype = "solid",size = 1,fill = NA))
ggsave("pCHiC_CHiCAGOscore_PCA.pdf", width = 4.5, height = 3.5)
+0 −14
Original line number Diff line number Diff line
## PCA of RNA-seq (protein-coding genes), ATAC-seq and pCHiC data
PCA_analysis.R
 
## correlation of H3K4me1 profiles of WT and mutant cellular states
H3K4me1_correlation.R