English | 中文版 | 手機(jī)版 企業(yè)登錄 | 個(gè)人登錄 | 郵件訂閱
生物器材網(wǎng) logo
生物儀器 試劑 耗材
當(dāng)前位置 > 首頁 > 技術(shù)文章 > 代碼分享:基于TCGA甲基化數(shù)據(jù)挖掘驗(yàn)證分子表型代碼分享(第三期)

代碼分享:基于TCGA甲基化數(shù)據(jù)挖掘驗(yàn)證分子表型代碼分享(第三期)

瀏覽次數(shù):171 發(fā)布日期:2026-3-10  來源:本站 僅供參考,謝絕轉(zhuǎn)載,否則責(zé)任自負(fù)
DNA甲基化作為表觀遺傳調(diào)控的重要機(jī)制,在腫瘤發(fā)生、發(fā)展及預(yù)后中發(fā)揮關(guān)鍵作用。前兩期已經(jīng)完成了甲基化數(shù)據(jù)挖掘的基礎(chǔ)分析,這一期將立足于解釋具體的生物學(xué)問題來展開。
8.每個(gè)基因的甲基化信號值的比較 --------------------------------------------------------
# 8.每個(gè)基因的甲基化信號值的比較 --------------------------------------------------------
##對膀胱癌中差異上調(diào)的cg進(jìn)行繪制均值的折線圖
load('./00.data//pangguangai_21paired_meLoad_data_412481.Rdata')
# exp_count_gene_name =myLoad$beta
sample= pd
normalized_counts =myNorm
 
# exp_count_gene_name = exp_count_gene_name[,c(1:10,13:18,21:24,27:30,33:42)]
sample=sample[c(1:10,13:18,21:24,27:30,33:42),]
normalized_counts=normalized_counts[,c(1:10,13:18,21:24,27:30,33:42)]
colnames(marker_plot)
marker_1 = marker[,c(1,8,9,15,25)]
marker_plot <- reshape2::melt(marker_1, id.vars=c("rowname","gene",'change'), variable.name="group",value.name="AVG")
color <- c("#E63863","#0228A5")
gene <- "IFFO1"
marker_plot_sub = marker_plot[marker_plot$gene %in% gene,]
ggplot(marker_plot_sub, aes(x = rowname, y = AVG, group = group, colour=group, shape= group, fill = group)) +
  geom_line(size=0.5)+
  geom_point(size=3)+
  geom_text(aes(label=AVG),position = position_dodge(.9),vjust =-1,hjust=1.3)+
  scale_shape_manual(values = c(24,22)) +
  scale_fill_manual(values =color)+
  scale_color_manual(values = color)+
  labs(x="",y="β value", title = "IFFO1", subtitle = "Padj < 0.00001 & |logFC| >= 0.4") +
  # ylim(0,3200) +
  theme(
    #legend.position="none",
    axis.text.x = element_text(size = 12,colour="black"),
    #axis.text.x = element_text(size = 12,colour="black", angle = 45,hjust = 1),
    axis.text.y = element_text(size = 12,colour="black"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 12),
    #axis.ticks = element_blank(),
    plot.title = element_text(hjust = 0.5, vjust=1),
    #panel.background = element_rect(fill="white",color = "black"),
    panel.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(colour = "black"),
    panel.border = element_rect(colour = "black", fill=NA))
 
ggsave(paste0("../03.plot/", "line_βval_IFFO1",".pdf"), width = 6*1.5, height = 6*1.5)
ggsave(paste0("../03.plot/", "line_βval_IFFO1",".png"), width = 6*1.5, height = 6*1.5)

結(jié)果如下:
gene <- "TMEM106A"
marker_plot_sub = marker_plot[marker_plot$gene %in% gene,]
 
ggplot(marker_plot_sub, aes(x = rowname, y = AVG, group = group, colour=group, shape= group, fill = group)) +
  geom_line(size=0.5)+
  geom_point(size=3)+
  geom_text(aes(label=AVG),position = position_dodge(.9),vjust =-1,hjust=1.3)+
  scale_shape_manual(values = c(24,22)) +
  scale_fill_manual(values =color)+
  scale_color_manual(values = color)+
  labs(x="",y="β value", title = "TMEM106A", subtitle = "Padj < 0.00001 & |logFC| >= 0.4") +
  # ylim(0,3200) +
  theme(
    #legend.position="none",
    axis.text.x = element_text(size = 12,colour="black"),
    #axis.text.x = element_text(size = 12,colour="black", angle = 45,hjust = 1),
    axis.text.y = element_text(size = 12,colour="black"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 12),
    #axis.ticks = element_blank(),
    plot.title = element_text(hjust = 0.5, vjust=1),
    #panel.background = element_rect(fill="white",color = "black"),
    panel.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(colour = "black"),
    panel.border = element_rect(colour = "black", fill=NA))
 
ggsave(paste0("../03.plot/", "line_βval_TMEM106A",".pdf"), width = 6*1.5, height = 6*1.5)
ggsave(paste0("../03.plot/", "line_βval_TMEM106A",".png"), width = 6*1.5, height = 6*1.5)

 
結(jié)果如下:
gene <- "SHANK2"
marker_plot_sub = marker_plot[marker_plot$gene %in% gene,]
 
ggplot(marker_plot_sub, aes(x = rowname, y = AVG, group = group, colour=group, shape= group, fill = group)) +
  geom_line(size=0.5)+
  geom_point(size=3)+
  geom_text(aes(label=AVG),position = position_dodge(.9),vjust =-1,hjust=1.3)+
  scale_shape_manual(values = c(24,22)) +
  scale_fill_manual(values =color)+
  scale_color_manual(values = color)+
  labs(x="",y="β value", title = "SHANK2", subtitle = "Padj < 0.00001 & |logFC| >= 0.4") +
  # ylim(0,3200) +
  theme(
    #legend.position="none",
    axis.text.x = element_text(size = 12,colour="black"),
    #axis.text.x = element_text(size = 12,colour="black", angle = 45,hjust = 1),
    axis.text.y = element_text(size = 12,colour="black"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 12),
    #axis.ticks = element_blank(),
    plot.title = element_text(hjust = 0.5, vjust=1),
    #panel.background = element_rect(fill="white",color = "black"),
    panel.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(colour = "black"),
    panel.border = element_rect(colour = "black", fill=NA))
 
ggsave(paste0("../03.plot/", "line_βval_SHANK2",".pdf"), width = 6*1.5, height = 6*1.5)
ggsave(paste0("../03.plot/", "line_βval_SHANK2",".png"), width = 6*1.5, height = 6*1.5)

結(jié)果如下:
gene <- "IFFO1"
marker_plot_sub = marker_plot[marker_plot$gene %in% gene,]
 
ggplot(marker_plot_sub, aes(x = rowname, y = AVG, group = group, colour=group, shape= group, fill = group)) +
  geom_bar(stat="identity", color="black",
           position=position_dodge()) +
  geom_text(aes(label=AVG),position = position_dodge(.9),vjust =-1,hjust=1.3)+
  scale_shape_manual(values = c(24,22)) +
  scale_fill_manual(values =color)+
  scale_color_manual(values = color)+
  labs(x="",y="β value", title = "IFFO1", subtitle = "Padj < 0.00001 & |logFC| >= 0.4") +
  # ylim(0,3200) +
  theme(
    #legend.position="none",
    axis.text.x = element_text(size = 12,colour="black"),
    #axis.text.x = element_text(size = 12,colour="black", angle = 45,hjust = 1),
    axis.text.y = element_text(size = 12,colour="black"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 12),
    #axis.ticks = element_blank(),
    plot.title = element_text(hjust = 0.5, vjust=1),
    #panel.background = element_rect(fill="white",color = "black"),
    panel.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(colour = "black"),
    panel.border = element_rect(colour = "black", fill=NA))
 
ggsave(paste0("../03.plot/", "barplot_βval_IFFO1",".pdf"), width = 6*1.5, height = 6*1.5)
ggsave(paste0("../03.plot/", "barplot_βval_IFFO1",".png"), width = 6*1.5, height = 6*1.5)

結(jié)果如下:
gene <- "TMEM106A"
marker_plot_sub = marker_plot[marker_plot$gene %in% gene,]
 
ggplot(marker_plot_sub, aes(x = rowname, y = AVG, group = group, colour=group, shape= group, fill = group)) +
  geom_bar(stat="identity", color="black",
           position=position_dodge()) +
  geom_text(aes(label=AVG),position = position_dodge(.9),vjust =-1,hjust=1.3)+
  scale_shape_manual(values = c(24,22)) +
  scale_fill_manual(values =color)+
  scale_color_manual(values = color)+
  labs(x="",y="β value", title = "TMEM106A", subtitle = "Padj < 0.00001 & |logFC| >= 0.4") +
  # ylim(0,3200) +
  theme(
    #legend.position="none",
    axis.text.x = element_text(size = 12,colour="black"),
    #axis.text.x = element_text(size = 12,colour="black", angle = 45,hjust = 1),
    axis.text.y = element_text(size = 12,colour="black"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 12),
    #axis.ticks = element_blank(),
    plot.title = element_text(hjust = 0.5, vjust=1),
    #panel.background = element_rect(fill="white",color = "black"),
    panel.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(colour = "black"),
    panel.border = element_rect(colour = "black", fill=NA))
 
ggsave(paste0("../03.plot/", "barplot_βval_TMEM106A",".pdf"), width = 6*1.5, height = 6*1.5)
ggsave(paste0("../03.plot/", "barplot_βval_TMEM106A",".png"), width = 6*1.5, height = 6*1.5)

結(jié)果如下:
gene <- "SHANK2"
marker_plot_sub = marker_plot[marker_plot$gene %in% gene,]
 
ggplot(marker_plot_sub, aes(x = rowname, y = AVG, group = group, colour=group, shape= group, fill = group)) +
  geom_bar(stat="identity", color="black",
           position=position_dodge()) +
  geom_text(aes(label=AVG),position = position_dodge(.9),vjust =-1,hjust=1.3)+
  scale_shape_manual(values = c(24,22)) +
  scale_fill_manual(values =color)+
  scale_color_manual(values = color)+
  labs(x="",y="β value", title = "SHANK2", subtitle = "Padj < 0.00001 & |logFC| >= 0.4") +
  # ylim(0,3200) +
  theme(
    #legend.position="none",
    axis.text.x = element_text(size = 12,colour="black"),
    #axis.text.x = element_text(size = 12,colour="black", angle = 45,hjust = 1),
    axis.text.y = element_text(size = 12,colour="black"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 12),
    #axis.ticks = element_blank(),
    plot.title = element_text(hjust = 0.5, vjust=1),
    #panel.background = element_rect(fill="white",color = "black"),
    panel.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(colour = "black"),
    panel.border = element_rect(colour = "black", fill=NA))
 
 
ggsave(paste0("../03.plot/", "barplot_βval_SHANK2",".pdf"), width = 6*1.5, height = 6*1.5)
ggsave(paste0("../03.plot/", "barplot_βval_SHANK2",".png"), width = 6*1.5, height = 6*1.5)

結(jié)果如下:
gene <- c("IFFO1","TMEM106A","SHANK2")
marker_plot_sub = marker_plot[marker_plot$gene %in% gene,]
marker_plot_sub
marker_plot_sub$rowname <- factor(marker_plot_sub$rowname, levels = c("cg08875705","cg01979888", "cg01493517", "cg00983904",  "cg23737737" ,
                                                                      "cg18222083", "cg19548479" ,
                                                                      "cg20170028"
))
 
ggplot(marker_plot_sub, aes(x = rowname, y = AVG, group = group, colour=group, shape= group, fill = group)) +
  geom_line(size=0.5)+
  geom_point(size=3)+
  geom_text(aes(label=AVG),position = position_dodge(.9),vjust =-1,hjust=1.3)+
  scale_shape_manual(values = c(24,22)) +
  scale_fill_manual(values =color)+
  scale_color_manual(values = color)+
  labs(x="",y="β value", title = "TCGA BLCA", subtitle = "Padj < 0.00001 & |logFC| >= 0.4") +
  # ylim(0,3200) +
  theme(
    #legend.position="none",
    axis.text.x = element_text(size = 12,colour="black"),
    #axis.text.x = element_text(size = 12,colour="black", angle = 45,hjust = 1),
    axis.text.y = element_text(size = 12,colour="black"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 12),
    #axis.ticks = element_blank(),
    plot.title = element_text(hjust = 0.5, vjust=1),
    #panel.background = element_rect(fill="white",color = "black"),
    panel.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(colour = "black"),
    panel.border = element_rect(colour = "black", fill=NA))
 
ggsave(paste0("../03.plot/", "line_βval_all",".pdf"), width = 6*1.5, height = 6*1.5)
ggsave(paste0("../03.plot/", "line_βval_all",".png"), width = 6*1.5, height = 6*1.5)

結(jié)果如下:
color <- c("#EE9B7E","#D9D7BC")
ggplot(marker_plot_sub, aes(x = rowname, y = AVG, group = group, colour=group, shape= group, fill = group)) +
  geom_bar(stat="identity", color="black",
           position=position_dodge()) +
  geom_text(aes(label=AVG),position = position_dodge(.9),vjust =-1,hjust=1.3)+
  scale_shape_manual(values = c(24,22)) +
  scale_fill_manual(values =color)+
  scale_color_manual(values = color)+
  labs(x="",y="β value", title = "TCGA BLCA", subtitle = "Padj < 0.00001 & |logFC| >= 0.4") +
  # ylim(0,3200) +
  theme(
    #legend.position="none",
    axis.text.x = element_text(size = 12,colour="black"),
    #axis.text.x = element_text(size = 12,colour="black", angle = 45,hjust = 1),
    axis.text.y = element_text(size = 12,colour="black"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 12),
    #axis.ticks = element_blank(),
    plot.title = element_text(hjust = 0.5, vjust=1),
    #panel.background = element_rect(fill="white",color = "black"),
    panel.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(colour = "black"),
    panel.border = element_rect(colour = "black", fill=NA))
 
 
ggsave(paste0("../03.plot/", "barplot_βval_all",".pdf"), width = 6*1.5, height = 6*1.5)
ggsave(paste0("../03.plot/", "barplot_βval_all",".png"), width = 6*1.5, height = 6*1.5)

結(jié)果如下:
#上面的結(jié)果再加上對應(yīng)的igv的圖這里我選擇hg19
9.補(bǔ)充生存分析以及AUC分析 ----------------------------------------------
# 輸入文件
dd =read.delim('../00.data/clinical.tsv')
# clinical.tsv,包含生存數(shù)據(jù)和臨床亞組信息。
# dd <- data.table::fread("easy_input.csv",data.table = F)
dd[1:3,]
#處理數(shù)據(jù)為生存分析需要的格式
meta = dd
# 生存時(shí)間需要為月份
# meta[meta==""]=NA
meta$time = ifelse(meta$vital_status == 'Alive',meta$days_to_last_follow_up,meta$days_to_death)
meta$time = as.numeric(meta$time)/30
# 1.2生死定義
meta$event = ifelse(meta$vital_status== 'Alive',0,1)
table(meta$event)
 
# 區(qū)分年齡
meta$age = ceiling(abs(as.numeric(meta$days_to_birth))/365)
meta$age_group=ifelse(meta$age>median(meta$age,na.rm = T),'older','younger')
table(meta$age_group)                 
 
## 自定義顏色
orange <- "#EB736B"
blue <- "#57B5AB"
library(stringr)
# 1.4提取分期信息
b= meta$ajcc_pathologic_stage
a= str_extract_all(meta$ajcc_pathologic_stage,"I|V");head(a)#提取分期信息
b= sapply(a,paste,collapse = '')
head(b)
 
meta$stage = b
#去掉生存信息不全或者生存時(shí)間小于0.1(月)的病人,樣本納排標(biāo)準(zhǔn)不唯一,且差別很大
# k1 = meta$time>=0.1;table(k1)
# # 8
# k2 =!(is.na(meta$time)|is.na(meta$event));table(k2)
# 6
##只先看目前的數(shù)據(jù)
#注意生存分析只需要腫瘤的病人
head(normalized_counts)
# nm_tumor = normalized_counts[]
sample$sample_type_fix[sample$sample_type_fix=='Primary Tumor']='Tumor'
 
colnames(sample)
 
 
exp = normalized_counts[,sample$sample_type_fix %in% "Tumor"]
dim(exp)#412481     17 只存貯這17個(gè)
meta$case_submitter_id<- str_replace_all(meta$case_submitter_id ,"-", ".")
# meta$case_submitter_id =
# meta = meta[meta$case_submitter_id %in% str_sub(colnames(exp),1,12),]
meta = meta[meta$case_id %in% sample_tumor$case_id,]
sample_tumor = sample[sample$sample_type_fix %in% c("Tumor"),]
dim(sample_tumor)  #34 215
dim(meta)  #34 215
 
meta <- meta[!duplicated(meta$case_id), ]
dim(meta)  #17 215
# exp = exp[,k1&k2]
# meta = meta[k1&k2,]
# rownames(meta)
# colnames(meta)
save(meta,exp,sample_tumor,file = './/surve_model_tumor.Rdata')
# load('./00.data//surve_model_tumor.Rdata')
 
## 提取數(shù)據(jù)
# rt <- meta
# colnames(rt)
## 生存分析
# rt$days_to_death =as.numeric(rt$days_to_death)
fit <- survfit(Surv(time,event) ~age_group, data = meta)
 
## 作圖
ggsurvplot(fit,
           pval = TRUE,
           palette = c(orange, blue),
           title="age_group",
           xlab="Time")
orange <- "#EB736B"
blue <- "#57B5AB"
ggsurvplot(fit,
           palette =c("#E7B800","#2E9FDF"),
           risk.table =TRUE,pVal =TRUE,
           conf.int =TRUE,xlab ="Time in months",
           ggtheme =theme_light(),
           ncensor.plot = TRUE)
 
ggsurvplot(fit,
           palette =c("#EB736B","#57B5AB"),
           risk.table =TRUE,pVal =TRUE,
           conf.int =TRUE,xlab ="Time in months",
           ggtheme =theme_light(),
           ncensor.plot = TRUE)
 
 
#性別年齡
sfit1=survfit(Surv(time, event)~gender, data=meta)
sfit2=survfit(Surv(time, event)~age_group, data=meta)
splots<- list()
splots[[1]]<- ggsurvplot(sfit1,pval =TRUE,data = meta,risk.table = TRUE)
splots[[2]]<- ggsurvplot(sfit2,pval =TRUE,data = meta,risk.table = TRUE)
arrange_ggsurvplots(splots, print = TRUE,
                    ncol=2,nrow=1,risk.table.height=0.4)
dev.off()
 
#單個(gè)基因(選擇對應(yīng)的cg展示他們的風(fēng)險(xiǎn))
g=rownames(exp)[1]
meta$gene = ifelse(exp[g,]> median(exp[g,]),'high','low')
sfit1=survfit(Surv(time, event)~gene, data=meta)
ggsurvplot(sfit1,pval =TRUE, data = meta, risk.table = TRUE)
#多個(gè)基因
gs=rownames(exp)[1:4]
splots <- lapply(gs,function(g){
  meta$gene = ifelse(exp[g,]> median(exp[g,]),'high','low')
  sfit1=survfit(Surv(time, event)~gene,data=meta)
  ggsurvplot(sfit1,pval =TRUE, data = meta, risk.table = TRUE)
})
arrange_ggsurvplots(splots, print = TRUE,ncol=2,nrow=2,risk.table.height=0.4)
 
##log_rank_p
#加上KM生存曲線即可
mySurv <- with(meta, Surv(time, event == "1"))  # 創(chuàng)建生存對象,event == 1表示“1為結(jié)局時(shí)間發(fā)生“
log_rank_p <- pbapply(exp, 1, function(gene){
  # gene <- exp[1,]
  meta$group <- ifelse(gene > median(gene), "high", "low")
  data.survdiff <- survdiff(mySurv ~ group, data = meta)
  # survdiff()用于檢驗(yàn)兩條或多條生存曲線之間是否存在差異,
  # 或者針對已知替代項(xiàng)檢驗(yàn)單條曲線,也適用于分組資料以及多組間生存曲線的比較;
  p.val <- 1 - pchisq(data.survdiff$chisq, length(data.survdiff$n) - 1)  # 計(jì)算log-rank檢驗(yàn)的p值
  return(p.val)
})
log_rank_p <- sort(log_rank_p)  # 取出每一個(gè)基因生存分析的P值,形成表
table(log_rank_p < 0.01)  # 哪些是P<0.01的
table(log_rank_p < 0.05)  # 哪些是P<0.05的
 
log_rank <- names(log_rank_p)[log_rank_p < 0.05]  # log-rank檢驗(yàn)篩選的差異基因 0.05
log_rank_p
 
 
# Cox比例風(fēng)險(xiǎn)回歸(HR)
mySurv <- with(meta, Surv(time, event == "1"))  # 創(chuàng)建生存對象,event = 1表示“1為結(jié)局事件發(fā)生”
unicox_results <- pbapply(exp, 1, function(gene){  # 對表達(dá)矩陣exp中的每一行(基因)執(zhí)行函數(shù)function
  # gene = as.numeric(exp[1,]) # 舉個(gè)例子
  group <- ifelse(gene > median(gene), "high","low")  # 標(biāo)記不同樣本中該基因的上下調(diào),將表達(dá)量轉(zhuǎn)換為二分類變量
  survival_dat <- data.frame(group = group,
                             stage = meta$stage,
                             age = meta$age,
                             gender = meta$gender,
                             gene = gene,  # 基因表達(dá)量
                             stringsAsFactors = F)  # 選擇用于生存分析的數(shù)據(jù)
  # 擬合Cox比例風(fēng)險(xiǎn)回歸模型 m :
  m <- coxph(mySurv ~ gender + age + stage + group, data = survival_dat)  # 表達(dá)量用二分類變量group
  # m <- coxph(mySurv ~ group, data = survival_dat)  # 表達(dá)量用二分類變量group
  # m <- coxph(mySurv ~ gene, data =  survival_dat)  # 表達(dá)量也可直接使用連續(xù)型變量gene
  # sm <- summary(m)  # 看擬合出的Cox比例風(fēng)險(xiǎn)回歸模型的參數(shù)
  # n:總?cè)藬?shù)。
  # number of events:發(fā)生結(jié)局事件的人數(shù),即死亡人數(shù)
  # coef(coefficients,回歸系數(shù)):結(jié)果為正(負(fù)),表示男性(genderMALE)死亡風(fēng)險(xiǎn)高(低)于女性,或者說有較低(高)的生存概率。
  # exp(coef):也稱為風(fēng)險(xiǎn)比(HR)。HR = 1: 無影響,HR < 1: 降低風(fēng)險(xiǎn),HR > 1: 增加風(fēng)險(xiǎn)。
  # se(coef):coef的標(biāo)準(zhǔn)誤
  # z:Wald檢驗(yàn)的統(tǒng)計(jì)量,z = coef/se(coef),是否具有統(tǒng)計(jì)學(xué)意義看p值
  # Pr(>|z|):p值
  # low.95與upper.95表示HR值的95%置信度區(qū)間CI,如果95%CI跨越了1,一般就不認(rèn)為該因素對生存有顯著影響。
  beta <- coef(m)
  se <- sqrt(diag(vcov(m)))
  tmp <- round(cbind(coef = beta,  # 模型的偏回歸系數(shù)β
                     se = se,  # 偏回歸系數(shù)β的標(biāo)準(zhǔn)誤
                     z = beta/se,  # Wald檢驗(yàn)的統(tǒng)計(jì)量
                     p = 1 - pchisq((beta/se)^2, 1),  # p值,差異的統(tǒng)計(jì)學(xué)意義
                     HR = exp(beta),  # 風(fēng)險(xiǎn)值,計(jì)算公式為exp(coef)
                     HRCILL = exp(beta - qnorm(.975, 0, 1) * se),  # HR的95%置信區(qū)間低點(diǎn)
                     HRCIUL = exp(beta + qnorm(.975, 0, 1) * se)), 3)  # HR的95%置信區(qū)間高點(diǎn)
  return(tmp["grouplow",])  # 分類變量
  # return(tmp["gene",])  # 連續(xù)型變量
})
unicox_results <- as.data.frame(t(unicox_results))
table(unicox_results[,4] < 0.01)  # p值<0.01的有多少
table(unicox_results[,4] < 0.05)  # p值<0.05的有多少
 
unicox <- rownames(unicox_results)[unicox_results[,4] < 0.05]  # Cox單因素分析篩選的差異基因
save(log_rank_p,unicox_results,file = './/surve_cox_log_rank_p.Rdata')
 
 
#多個(gè)基因
#  c("cg08875705","cg01979888", "cg01493517", "cg00983904",  "cg23737737" ,"cg18222083", "cg19548479" ,"cg20170028"
 
gs=c("cg08875705","cg01979888", "cg01493517", "cg00983904",  "cg23737737" ,"cg18222083", "cg19548479" ,"cg20170028")
splots <- lapply(gs,function(g){
  meta$gene = ifelse(exp[g,]> median(exp[g,]),'high','low')
  sfit1=survfit(Surv(time, event)~gene,data=meta)
  ggsurvplot(sfit1,pval =TRUE, data = meta, risk.table = TRUE)
})
arrange_ggsurvplots(splots, print = TRUE,ncol=4,nrow=2,risk.table.height=0.4)
 
load('./00.data//surve_cox_log_rank_p.Rdata')
 
# 取交集
cd =intersect(log_rank,gs)  #0
cd =intersect(unicox,gs)  #0
 
 
gs=c("cg08875705","cg01979888", "cg01493517", "cg00983904",  "cg23737737" ,"cg18222083", "cg19548479" ,"cg20170028")
splots <- lapply(gs,function(g){
  meta$gene = ifelse(exp[g,]> mean(exp[g,]),'high','low')
  sfit1=survfit(Surv(time, event)~gene,data=meta)
  ggsurvplot(sfit1,pval =TRUE, data = meta, risk.table = TRUE)
})
arrange_ggsurvplots(splots, print = TRUE,ncol=4,nrow=2,risk.table.height=0.4)
#ggsave(paste0("../03.plot/", "volcano","_logFC_0.4_P.Value_10_5.pdf"), width = 6*1.2, height = 6*1)
 
##log_rank_p
# KM生存曲線即可
mySurv <- with(meta, Surv(time, event == "1"))  # 創(chuàng)建生存對象,event == 1表示“1為結(jié)局時(shí)間發(fā)生“
log_rank_p <- pbapply(exp, 1, function(gene){
  # gene <- exp[1,]
  meta$group <- ifelse(gene > mean(gene), "high", "low")
  data.survdiff <- survdiff(mySurv ~ group, data = meta)
  # survdiff()用于檢驗(yàn)兩條或多條生存曲線之間是否存在差異,
  # 或者針對已知替代項(xiàng)檢驗(yàn)單條曲線,也適用于分組資料以及多組間生存曲線的比較;
  p.val <- 1 - pchisq(data.survdiff$chisq, length(data.survdiff$n) - 1)  # 計(jì)算log-rank檢驗(yàn)的p值
  return(p.val)
})
log_rank_p <- sort(log_rank_p)  # 取出每一個(gè)基因生存分析的P值,形成表
table(log_rank_p < 0.01)  # 哪些是P<0.01的
table(log_rank_p < 0.05)  # 哪些是P<0.05的
 
log_rank <- names(log_rank_p)[log_rank_p < 0.05]  # log-rank檢驗(yàn)篩選的差異基因 0.05
log_rank_p
 
 
# Cox比例風(fēng)險(xiǎn)回歸(HR)
mySurv <- with(meta, Surv(time, event == "1"))  # 創(chuàng)建生存對象,event = 1表示“1為結(jié)局事件發(fā)生”
unicox_results <- pbapply(exp, 1, function(gene){  # 對表達(dá)矩陣exp中的每一行(基因)執(zhí)行函數(shù)function
  # gene = as.numeric(exp[1,]) # 舉個(gè)例子
  group <- ifelse(gene > mean(gene), "high","low")  # 標(biāo)記不同樣本中該基因的上下調(diào),將表達(dá)量轉(zhuǎn)換為二分類變量
  survival_dat <- data.frame(group = group,
                             stage = meta$stage,
                             age = meta$age,
                             gender = meta$gender,
                             gene = gene,  # 基因表達(dá)量
                             stringsAsFactors = F)  # 選擇用于生存分析的數(shù)據(jù)
  # 擬合Cox比例風(fēng)險(xiǎn)回歸模型 m :
  m <- coxph(mySurv ~ gender + age + stage + group, data = survival_dat)  # 表達(dá)量用二分類變量group
  # m <- coxph(mySurv ~ group, data = survival_dat)  # 表達(dá)量用二分類變量group
  # m <- coxph(mySurv ~ gene, data =  survival_dat)  # 表達(dá)量也可直接使用連續(xù)型變量gene
  # sm <- summary(m)  # 看擬合出的Cox比例風(fēng)險(xiǎn)回歸模型的參數(shù)
  # n:總?cè)藬?shù)。
  # number of events:發(fā)生結(jié)局事件的人數(shù),即死亡人數(shù)
  # coef(coefficients,回歸系數(shù)):結(jié)果為正(負(fù)),表示男性(genderMALE)死亡風(fēng)險(xiǎn)高(低)于女性,或者說有較低(高)的生存概率。
  # exp(coef):也稱為風(fēng)險(xiǎn)比(HR)。HR = 1: 無影響,HR < 1: 降低風(fēng)險(xiǎn),HR > 1: 增加風(fēng)險(xiǎn)。
  # se(coef):coef的標(biāo)準(zhǔn)誤
  # z:Wald檢驗(yàn)的統(tǒng)計(jì)量,z = coef/se(coef),是否具有統(tǒng)計(jì)學(xué)意義看p值
  # Pr(>|z|):p值
  # low.95與upper.95表示HR值的95%置信度區(qū)間CI,如果95%CI跨越了1,一般就不認(rèn)為該因素對生存有顯著影響。
  beta <- coef(m)
  se <- sqrt(diag(vcov(m)))
  tmp <- round(cbind(coef = beta,  # 模型的偏回歸系數(shù)β
                     se = se,  # 偏回歸系數(shù)β的標(biāo)準(zhǔn)誤
                     z = beta/se,  # Wald檢驗(yàn)的統(tǒng)計(jì)量
                     p = 1 - pchisq((beta/se)^2, 1),  # p值,差異的統(tǒng)計(jì)學(xué)意義
                     HR = exp(beta),  # 風(fēng)險(xiǎn)值,計(jì)算公式為exp(coef)
                     HRCILL = exp(beta - qnorm(.975, 0, 1) * se),  # HR的95%置信區(qū)間低點(diǎn)
                     HRCIUL = exp(beta + qnorm(.975, 0, 1) * se)), 3)  # HR的95%置信區(qū)間高點(diǎn)
  return(tmp["grouplow",])  # 分類變量
  # return(tmp["gene",])  # 連續(xù)型變量
})
unicox_results <- as.data.frame(t(unicox_results))
table(unicox_results[,4] < 0.01)  # p值<0.01的有多少
table(unicox_results[,4] < 0.05)  # p值<0.05的有多少
 
unicox <- rownames(unicox_results)[unicox_results[,4] < 0.05]  # Cox單因素分析篩選的差異基因
save(log_rank_p,unicox_results,file = './00.data//surve_cox_log_rank_p_mean.Rdata')
 
cd =intersect(log_rank,gs)  #0
cd =intersect(unicox,gs)  #0
#輸出均值的結(jié)果
# cg19548479
write.csv(log_rank_p,'./00.data//log_rank_p_mean.csv')
write.csv(unicox_results,'./00.data//unicox_results_mean.csv')
 
# cg19548479符合后面的幾個(gè)檢驗(yàn)
# gs=c("cg08875705","cg01979888", "cg01493517", "cg00983904",  "cg23737737" ,"cg18222083", "cg19548479" ,"cg20170028")
g='cg19548479'
meta$gene = ifelse(exp[g,]> median(exp[g,]),'high','low')
sfit1=survfit(Surv(time, event)~gene, data=meta)
ggsurvplot(sfit1,pval =TRUE, data = meta, risk.table = TRUE)
arrange_ggsurvplots(splots, print = TRUE,ncol=4,nrow=2,risk.table.height=0.4)

最后出圖結(jié)果如下:
Fig1樣本質(zhì)控
Fig2樣本篩選
Fig3-標(biāo)志物篩選
本文所使用的數(shù)據(jù)均已公開,歡迎各位老師獲取數(shù)據(jù)下載鏈接。
發(fā)布者:深圳市易基因科技有限公司
聯(lián)系電話:0755-28317900
E-mail:wuhuanhuan@e-gene.cn

標(biāo)簽: 代碼 甲基化 分子表型
用戶名: 密碼: 匿名 快速注冊 忘記密碼
評論只代表網(wǎng)友觀點(diǎn),不代表本站觀點(diǎn)。 請輸入驗(yàn)證碼: 8795
Copyright(C) 1998-2026 生物器材網(wǎng) 電話:021-64166852;13621656896 E-mail:info@bio-equip.com