24  关联分析

在成功获取核心特征集之后,我们计划深入地探究这些特征与免疫浸润细胞之间的关联性,这是因为免疫浸润细胞在癌症的进程中扮演着至关重要的角色。免疫浸润细胞,如T细胞、B细胞、巨噬细胞等,是肿瘤微环境中的重要组成部分,它们通过直接杀伤肿瘤细胞、释放细胞因子、激活免疫应答等多种机制参与抗肿瘤免疫过程。

24.1 加载R包

使用rm(list = ls())来清空环境中的所有变量。

library(tidyverse)
library(data.table)
library(Biobase)
library(ggpmisc)
library(ggpubr)
library(ggExtra)
library(cowplot)
library(pheatmap)

rm(list = ls())
options(stringsAsFactors = F)
options(future.globals.maxSize = 10000 * 1024^2)

grp_names <- c("Early Stage", "Late Stage")
grp_colors <- c("#8AC786", "#B897CA")
grp_shapes <- c(15, 16)

24.2 导入数据


ImmuneCell <- read.csv("./data/result/ImmuneCell/HCC_ImmuneCell_ImmuCellAI.csv")
common_feature <- read.csv("./data/result/Biomarker/Biomarker_LR_RF_SVM_validation.csv")
ExprSet <- readRDS("./data/result/ExpSetObject/MergeExpSet_VoomSNM_VoomSNM_LIRI-JP_TCGA-LIHC.RDS")

24.3 函数


get_cor <- function(
    object,
    genelist,
    immunescore) {

  metadata <- pData(object)
  profile <- exprs(object) %>%
    data.frame() 
  
  Iscore <- immunescore %>%
    dplyr::select(-c(2:8)) %>%
    tibble::column_to_rownames("SampleID") %>%
    dplyr::select(-all_of(c("Effector_memory", "Gamma_delta", 
                            "Central_memory")))
  Iscore <- Iscore[, colSums(Iscore) > 0]
  
  sid <- intersect(colnames(profile), rownames(Iscore))
  profile_final <- profile[rownames(profile) %in% genelist$FeatureID, 
                           pmatch(sid, colnames(profile))]
  Iscore_final <- Iscore[pmatch(sid, rownames(Iscore)), ]
  
  if (!all(colnames(profile_final) == rownames(Iscore_final))) {
    message("wrong order bettween profile and Iscore")
  } else {
    profile_final <- profile_final[, pmatch(rownames(Iscore_final), 
                                            colnames(profile_final))]
  }
  
  # calculate the association by stats::cor.test
  mat_rho <- datRho
  mat_signif <- as.matrix(datPva)
  mat_signif[mat_signif > 0.05] <- ""
  mat_signif[mat_signif > 0.01 & mat_signif != ""] <- "*"
  mat_signif[mat_signif <= 0.01 & mat_signif != "" & mat_signif != "*"] <- "+"
  
  pl <- pheatmap::pheatmap(
      mat = t(mat_rho),
      color = colorRampPalette(c("green", "black", "red"))(100), # "green", "black", "red"    
      cluster_rows = T,
      cluster_cols = F,
      display_numbers = t(mat_signif),
      number_color = "white",      
      fontsize_number = 15,
      fontsize_row = 8,
      fontsize_col = 8, 
      fontfamily = "serif")
  
  res <- list(Rho = datRho,
              Pvalue = datPva,
              pl = pl)
  
  return(res)
}


get_lollipop <- function(
    dat_rho,
    dat_pva,
    biomarker) {
  
  plotdata <- rho %>%
    dplyr::inner_join(pval, by = "Cell") %>%
    dplyr::arrange(Rho)
  plotdata$Cell <- factor(plotdata$Cell, levels = unique(plotdata$Cell))
  
  p1 <- ggplot(plotdata, aes(x = Rho, y = Cell)) +
    scale_color_manual(name = "pvalue",
                       values = c("#E69F00", "#56B4E9", "#009E73", "gray")) +
    geom_segment(aes(x = 0, y = Cell, xend = Rho, yend = Cell), size = 1) +
    geom_point(aes(size = Rho_label, color = Pval_label)) +
    labs(title = biomarker,
         x = "Spearman Correrlation Coefficient", y = "") +
    guides(size = guide_legend(order = 1, title = "abs(cor)"),
           color = guide_legend(order = 2, title = "pvalue")) +
    theme_bw() +
    theme(plot.title = element_text(size = 12, color = "black", hjust = 0.5, face = "bold"),
          axis.title = element_text(size = 11, color = "black", face = "bold"), 
          axis.text.y = element_text(size = 10, color = "black"),
          legend.position = "right",
          text = element_text(family = "serif"))  
  
  p2 <- ggplot() +
    geom_text(plotdata, mapping = aes(x = 0, y = Cell, 
                       color = Pval_label2, 
                       label = Pval)) +
    scale_color_manual(name = "",
                       values = c("red", "black", ""))+
    theme_void() +
    guides(color = "none")
  
  pl <- cowplot::plot_grid(
    p1, p2, ncol = 2,
    align = "hv",
    rel_widths = c(1, 0.1)
  )
  
  return(pl)
}

get_cor_scatterplot <- function(
    object,
    genelist,
    immunescore,
    biomarker_gene,
    biomarker_cell,
    group_names = grp_names,
    group_colors = grp_colors) {
  
  mdat <- Iscore %>%
    dplyr::select(all_of(c("Group", biomarker_cell))) %>%
    tibble::rownames_to_column("SampleID") %>%
    dplyr::inner_join(profile %>%
                        t() %>% as.data.frame() %>%
                        dplyr::select(biomarker_gene) %>%
                        tibble::rownames_to_column("SampleID"),
                      by = "SampleID") 
  plotdata <- mdat
  colnames(plotdata)[3:4] <- c("Cell", "Gene")
  plotdata$Group <- factor(plotdata$Group, levels = group_names)
  

  xlab <- paste(biomarker_gene, "expression") 
  ylab <- biomarker_cell 
  
  pl <- ggExtra::ggMarginal(
    pl_temp, 
    type = "density",
    groupFill = TRUE)
  
  return(pl)
}

24.4 重要特征与免疫细胞的相关热图

所有重要特征和免疫浸润细胞的相关性分析,解析特征和免疫细胞之间的相关性也即是特征对癌症的免疫微环境的关联情况。这种关联可以是正相关(一个变量增加时,另一个也增加),也可以是负相关(一个变量增加时,另一个减少)。相关性的知识点:

  • 关系的强度和方向:相关系数的值(如皮尔逊相关系数)可以表示两个变量之间关系的强度和方向。系数的绝对值越接近1,关系越强;接近0则关系越弱。正数表示正相关,负数表示负相关。

  • 是否存在依赖关系:虽然相关性不等于因果性(即一个变量导致另一个变量变化),但它可以指示两个变量之间是否存在某种依赖关系。这种依赖关系可能是直接的,也可能是通过其他变量间接的。

  • 预测能力:高度相关的变量可能具有预测另一个变量的能力。例如,如果两个变量高度正相关,那么知道一个变量的值可能有助于预测另一个变量的值。

  • 数据模式:相关性分析可以帮助我们识别数据中的模式或趋势,这有助于理解数据的性质和行为。


cell_cor <- get_cor(
  object = ExprSet,
  genelist = common_feature, 
  immunescore = ImmuneCell)

cell_cor$pl

24.5 SLC6A8关联图

综合先前验证结果 小节 23.6 和 上述相关热图结果 小节 24.4,对SLC6A8单独做相关性分析和可视化进一步了解其特性。


SLC6A8_pl <- get_lollipop(
  dat_rho = cell_cor$Rho,
  dat_pva = cell_cor$Pvalue,
  biomarker = "SLC6A8")

SLC6A8_pl

24.6 SLC6A8与特定免疫细胞

针对上述相关结果 小节 24.5,为了进一步探索SLC6A8Bcell(树突状细胞)之间的具体相关情况,我们进行了散点图的可视化分析。


SLC6A8_Bcell <- get_cor_scatterplot(
  object = ExprSet,
  genelist = common_feature,
  immunescore = ImmuneCell,
  biomarker_gene = "SLC6A8",
  biomarker_cell = "Bcell")

SLC6A8_Bcell

针对上述相关结果 小节 24.5,为了进一步探索SLC6A8CD8_naive(未成熟的CD8+ T淋巴细胞)之间的具体相关情况,我们进行了散点图的可视化分析。


SLC6A8_CD8_naive <- get_cor_scatterplot(
  object = ExprSet,
  genelist = common_feature,
  immunescore = ImmuneCell,
  biomarker_gene = "SLC6A8",
  biomarker_cell = "CD8_naive")

SLC6A8_CD8_naive

24.7 SLC6A8与其他免疫细胞

除了上述两种免疫细胞外,SLC6A8与其他细胞的结果可以作为附图保存。


IC_names <- colnames(ImmuneCell)[9:32]
IC_names <- IC_names[!IC_names %in% 
                       c("Effector_memory", "Gamma_delta", 
                         "Central_memory", "CD4_T", "Tfh", 
                         "Th1", "MAIT", "iTreg", "Th2", 
                         "Macrophage", "CD8_naive", "Bcell")]

SLC6A8_ImmuneCell_list <- list()
for (j in 1:length(IC_names)) {

  IC <- IC_names[j]
  temp_plot <- get_cor_scatterplot(
    object = ExprSet,
    genelist = common_feature,
    immunescore = ImmuneCell,
    biomarker_gene = "SLC6A8",
    biomarker_cell = IC)
  
  SLC6A8_ImmuneCell_list[[j]] <- temp_plot
}

names(SLC6A8_ImmuneCell_list) <- IC_names

合并上述画图对象


SLC6A8_ImmuneCell_pl <- cowplot::plot_grid(
  SLC6A8_ImmuneCell_list$CD4_naive, SLC6A8_ImmuneCell_list$Cytotoxic,
  SLC6A8_ImmuneCell_list$Exhausted, SLC6A8_ImmuneCell_list$Tr1,
  SLC6A8_ImmuneCell_list$nTreg, SLC6A8_ImmuneCell_list$Th17,
  SLC6A8_ImmuneCell_list$NKT, SLC6A8_ImmuneCell_list$DC,
  SLC6A8_ImmuneCell_list$Monocyte, SLC6A8_ImmuneCell_list$NK,
  SLC6A8_ImmuneCell_list$Neutrophil, SLC6A8_ImmuneCell_list$CD8_T,  
  ncol = 4, align = "hv",
  labels = LETTERS[1:length(IC_names)])

SLC6A8_ImmuneCell_pl

24.8 输出结果


if (!dir.exists("./data/result/Figure/")) {
  dir.create("./data/result/Figure/", recursive = TRUE)
}

ggsave("./data/result/Figure/Fig6-A.pdf", cell_cor$pl, width = 8, height = 5, dpi = 600) 
ggsave("./data/result/Figure/Fig6-B.pdf", SLC6A8_pl, width = 6, height = 4, dpi = 600)
ggsave("./data/result/Figure/Fig6-C.pdf", SLC6A8_Bcell, width = 6, height = 4, dpi = 600)
ggsave("./data/result/Figure/Fig6-D.pdf", SLC6A8_CD8_naive, width = 6, height = 4, dpi = 600)

ggsave("./data/result/Figure/SFig6.pdf", SLC6A8_ImmuneCell_pl, width = 11, height = 9, dpi = 600)

24.9 总结

经过对关键特征的细致筛选与深入分析,我们针对肿瘤微环境中的免疫浸润细胞进行了详尽的相关性研究。在此过程中,SLC6A8的显著重要性得以进一步凸显,特别是在肝细胞癌(HCC)晚期的显著富集 小节 23.6,为我们揭示了其在癌症进程中的潜在作用。

具体而言,我们的研究观察到SLC6A8Bcell(B淋巴细胞)CD8_naive(未成熟的CD8+ T淋巴细胞)之间存在显著的相关性。一方面,SLC6A8Bcell呈显著正相关,这一发现表明SLC6A8可能与B细胞的活化、增殖或功能执行存在紧密联系,进而在肿瘤免疫应答中发挥关键作用。另一方面,SLC6A8CD8_naive则呈显著负相关,这一结果可能揭示了SLC6A8在调节CD8+ T淋巴细胞成熟和活化过程中的潜在抑制作用。

B细胞CD8+ T淋巴细胞作为免疫系统的关键组成部分,在癌症的演变和进展过程中扮演着至关重要的角色。B细胞通过产生抗体参与体液免疫应答,而CD8+ T淋巴细胞则主要执行细胞毒性功能,直接杀伤被感染的细胞或肿瘤细胞。

此外,根据我们之前的免疫浸润分析 小节 18.5,我们发现B细胞显著富集在HCC晚期分组,而CD8_naive则显著富集在HCC早期分组。这一发现进一步强调了SLC6A8与这两种免疫细胞之间关系的重要性,并可能为我们理解HCC进展的免疫机制提供新的线索。

综上所述,SLC6A8B细胞CD8_naive之间的显著相关性,以及其在HCC早晚期的显著富集,为我们揭示了该基因在肿瘤免疫应答中的潜在作用,并为进一步的研究和临床应用提供了有价值的参考。

系统信息
sessionInfo()
R version 4.3.3 (2024-02-29)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Sonoma 14.2

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Asia/Shanghai
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices datasets  utils     methods   base     

other attached packages:
 [1] pheatmap_1.0.12     cowplot_1.1.3       ggExtra_0.10.1     
 [4] ggpubr_0.6.0        ggpmisc_0.5.6       ggpp_0.5.7         
 [7] Biobase_2.62.0      BiocGenerics_0.48.1 data.table_1.15.4  
[10] lubridate_1.9.3     forcats_1.0.0       stringr_1.5.1      
[13] dplyr_1.1.4         purrr_1.0.2         readr_2.1.5        
[16] tidyr_1.3.1         tibble_3.2.1        ggplot2_3.5.1      
[19] tidyverse_2.0.0    

loaded via a namespace (and not attached):
 [1] gtable_0.3.5        xfun_0.43           htmlwidgets_1.6.4  
 [4] rstatix_0.7.2       lattice_0.22-6      tzdb_0.4.0         
 [7] vctrs_0.6.5         tools_4.3.3         generics_0.1.3     
[10] fansi_1.0.6         pkgconfig_2.0.3     Matrix_1.6-5       
[13] RColorBrewer_1.1-3  lifecycle_1.0.4     compiler_4.3.3     
[16] MatrixModels_0.5-3  munsell_0.5.1       carData_3.0-5      
[19] SparseM_1.81        httpuv_1.6.15       quantreg_5.97      
[22] htmltools_0.5.8.1   yaml_2.3.8          later_1.3.2        
[25] pillar_1.9.0        car_3.1-2           MASS_7.3-60.0.1    
[28] abind_1.4-5         mime_0.12           tidyselect_1.2.1   
[31] digest_0.6.35       stringi_1.8.4       splines_4.3.3      
[34] fastmap_1.1.1       grid_4.3.3          colorspace_2.1-0   
[37] cli_3.6.2           magrittr_2.0.3      survival_3.7-0     
[40] utf8_1.2.4          broom_1.0.5         withr_3.0.0        
[43] promises_1.3.0      scales_1.3.0        backports_1.4.1    
[46] timechange_0.3.0    rmarkdown_2.26      ggsignif_0.6.4     
[49] hms_1.1.3           shiny_1.8.1.1       evaluate_0.23      
[52] knitr_1.46          miniUI_0.1.1.1      rlang_1.1.3        
[55] Rcpp_1.0.12         xtable_1.8-4        glue_1.7.0         
[58] polynom_1.4-1       BiocManager_1.30.23 renv_1.0.0         
[61] rstudioapi_0.16.0   jsonlite_1.8.8      R6_2.5.1