R/single_cell_simulation.R
make_bulk_eset.Rd
Make a random expression set from a single-cell dataset
make_bulk_eset(eset, cell_fractions, n_cells = 500, combine = mean)
Biobase::ExpressionSet
with a cell_type
column in pData
.
n x n_cell_types dataframe with the fraction for each sample in each row.
number of single cells to use in each sample
callback function used to aggregate the counts.
matrix with genes in rows and samples in columns. The column sum is scaled to 1 million (TPM).
make_random_bulk
suppressPackageStartupMessages(library(Biobase))
suppressPackageStartupMessages(library(tibble))
# generate toy matrix with three marker genes and three cell types
expr <- matrix(c(
rep(c(1, 0, 0), 300),
rep(c(0, 1, 0), 300),
rep(c(0, 0, 1), 300)
), nrow = 3)
# generate a featureData and phenoData data-frame.
# row names must be consistent between expr and featureData.
gene_names <- c("CD8A", "CD4", "CD19")
rownames(expr) <- gene_names
cell_types <- c(rep("T cell CD8+", 300), rep("T cell CD4+", 300), rep("B cell", 300))
pdata <- data.frame(cell_type = cell_types)
fdata <- data.frame(gene_symbol = gene_names)
rownames(fdata) <- gene_names
# tie expr, fdata and pdata together in expression set
eset <- ExpressionSet(expr,
phenoData = as(pdata, "AnnotatedDataFrame"),
featureData = as(fdata, "AnnotatedDataFrame")
)
# create data frame with cell fractions (one row per sample)
desired_cell_fractions <- tibble(
"T cell CD8+" = c(0.1, 0.2, 0.3),
"T cell CD4+" = c(0.9, 0.7, 0.5),
"B cell" = c(0, 0.1, 0.2)
)
new_eset <- make_bulk_eset(eset, desired_cell_fractions, n_cells = 500)
#> New names:
#> • `value` -> `value...1`
#> • `value` -> `value...2`
#> • `value` -> `value...3`
exprs(new_eset)
#> value...1 value...2 value...3
#> CD8A 1e+05 2e+05 3e+05
#> CD4 9e+05 7e+05 5e+05
#> CD19 0e+00 1e+05 2e+05