你的位置:首页 > 信息动态 > 新闻中心
信息动态
联系我们

SingleCellExperiment类使用

2021/11/27 17:44:02

SingleCellExperience类用于表示单细胞测序数据。它继承自RangedSummarizedExperiment类,并以相同的方式使用。此外,该类还支持通过reducedDims存储降维结果(如PCA、t-SNE),并通过altExps存储替代特征类型(如spike-ins)。

Usage

SingleCellExperiment(
  ...,
  reducedDims = list(),
  altExps = list(),
  rowPairs = list(),
  colPairs = list(),
  mainExpName = NULL
)

1. 生成SingleCellExperiment对象

# if (!requireNamespace("BiocManager", quietly = TRUE))
#   install.packages("BiocManager")
# 
# BiocManager::install("SingleCellExperiment")

library("SingleCellExperiment") 
ls("package:SingleCellExperiment")
# 只有表达矩阵
counts <- matrix(rpois(100, lambda = 10), ncol=10, nrow=10)
sce <- SingleCellExperiment(counts)
sce <- SingleCellExperiment(list(counts=counts))

# 含有降纬数据
ncells <- 100
u <- matrix(rpois(20000, 5), ncol=ncells)
v <- log2(u + 1)

pca <- matrix(runif(ncells*5), ncells)
tsne <- matrix(rnorm(ncells*2), ncells)

sce <- SingleCellExperiment(assays=list(counts=u, logcounts=v),
                            reducedDims=SimpleList(PCA=pca, tSNE=tsne))

# 有SummarizedExperiment对象转化生成
se <- SummarizedExperiment(assays=list(counts=u, logcounts=v))
as(se, "SingleCellExperiment")

# 含有colData,rowData 以及metadata
pretend.cell.labels <- sample(letters, ncol(counts), replace=TRUE)
pretend.gene.lengths <- sample(10000, nrow(counts))

sce <- SingleCellExperiment(list(counts=counts),
                            colData=DataFrame(label=pretend.cell.labels),
                            rowData=DataFrame(length=pretend.gene.lengths),
                            metadata=list(study="GSE111111"))

2. 提取行列信息,表达矩阵等

library(scRNAseq)
sce <- ReprocessedAllenData("tophat_counts")
# 行:特征,列:单细胞名
dim(assay(sce))
dim(sce)
# 表达矩阵
assay(sce,1)
assays(sce)[[1]]
colnames(colData(sce))
rownames(rowData(sce))
# 元数据
metadata(sce)

# Alternative Experiment methods
altExpNames(sce)
altExp(sce, "ERCC")
altExp(sce, 1)
assay(altExp(sce, "ERCC"))

3. 取子集

library(scRNAseq)
sce <- MuraroPancreasData()

sce[1:2,1:3]
sce["A1BG__chr19",]
sce[,c("D28-1_1", "D28-1_2")]

# 也可以根据rowData(sce)以及colData(sce)进行选择
# 如果有NA,要去除
sce[,sce$donore=="D28"]

4. 其他

library(scRNAseq)
sce <- ReprocessedAllenData("tophat_counts")

## 1. 增加logcounts数据 norm + log
counts <- assay(sce, "tophat_counts")
libsizes <- colSums(counts)
size.factors <- libsizes/mean(libsizes)
logcounts(sce) <- log2(t(t(counts)/size.factors) + 1)
assayNames(sce)

## 2. 获得logcounts 表达谱数据 matrix
assay(sce, "logcounts")
logcounts(sce)

## 3. 增加PCA,TSNE数据
pca_data <- prcomp(t(logcounts(sce)), rank=50)
library(Rtsne)
set.seed(5252)
tsne_data <- Rtsne(pca_data$x[,1:50], pca = FALSE)
reducedDims(sce) <- list(PCA=pca_data$x, TSNE=tsne_data$Y)
sce

## 4. getting降纬数据
educedDim(sce, "PCA")
reducedDim(sce, "tSNE")


## 5. 增加Alternative Experiment 数据
## ----options, include=FALSE, echo=FALSE---------------------------------------
library(BiocStyle)
knitr::opts_chunk$set(warning=FALSE, error=FALSE, message=FALSE)

library(SingleCellExperiment)
counts <- matrix(rpois(100, lambda = 10), ncol=10, nrow=10)
sce <- SingleCellExperiment(counts)
# the same columns
altExp(sce, "Spike") <- SingleCellExperiment(matrix(rpois(20, lambda = 5), ncol=10, nrow=2))
altExp(sce, "Protein") <- SingleCellExperiment(matrix(rpois(50, lambda = 100), ncol=10, nrow=5))
altExp(sce, "CRISPR") <- SingleCellExperiment(matrix(rbinom(80, p=0.1, 1), ncol=10, nrow=8))

sce


## 6. 函数操作
## -----------------------------------------------------------------------------
totalCount <- function(x, i=1, multiplier=1, subset.row=NULL) {
  mat <- assay(x, i)
  if (!is.null(subset.row)) {
    mat <- mat[subset.row,,drop=FALSE]
  }
  colSums(mat) * multiplier
}

## -----------------------------------------------------------------------------
totals <- applySCE(sce, FUN=totalCount)
totals

## -----------------------------------------------------------------------------
totals.manual <- list( 
  totalCount(sce),
  Spike=totalCount(altExp(sce, "Spike")),
  Protein=totalCount(altExp(sce, "Protein")),
  CRISPR=totalCount(altExp(sce, "CRISPR"))
)
stopifnot(identical(totals, totals.manual))

## -----------------------------------------------------------------------------
totals10.manual <- list( 
  totalCount(sce, multiplier=10),
  Spike=totalCount(altExp(sce, "Spike"), multiplier=10),
  Protein=totalCount(altExp(sce, "Protein"), multiplier=10),
  CRISPR=totalCount(altExp(sce, "CRISPR"), multiplier=10)
)

## -----------------------------------------------------------------------------
totals10.apply <- applySCE(sce, FUN=totalCount, multiplier=10)
stopifnot(identical(totals10.apply, totals10.manual))

## -----------------------------------------------------------------------------
totals10.lapply <- lapply(c(List(sce), altExps(sce)),
                          FUN=totalCount, multiplier=10)
stopifnot(identical(totals10.apply, totals10.lapply))

## -----------------------------------------------------------------------------
totals.custom <- applySCE(sce, FUN=totalCount, multiplier=10, 
                          ALT.ARGS=list(Spike=list(subset.row=2), 
                                        Protein=list(subset.row=3:5)))
totals.custom

## -----------------------------------------------------------------------------
head.sce <- applySCE(sce, FUN=head, n=5)
head.sce

## -----------------------------------------------------------------------------
altExp(head.sce)
altExp(head.sce, "Protein")
altExp(head.sce, "CRISPR")

## -----------------------------------------------------------------------------
head.sce.list <- applySCE(sce, FUN=head, n=5, SIMPLIFY=FALSE) 
head.sce.list

## -----------------------------------------------------------------------------
manual.head <- head(sce, n=5)
altExp(manual.head, "Spike") <- head(altExp(sce, "Spike"), n=5)
altExp(manual.head, "Protein") <- head(altExp(sce, "Protein"), n=5)
altExp(manual.head, "CRISPR") <- head(altExp(sce, "CRISPR"), n=5)
manual.head