--- title: "Creating The Example Data Set" author: "Frederik Ziebell" output: rmarkdown::html_vignette: df_print: paged link-citations: TRUE vignette: > %\VignetteIndexEntry{Creating The Example Data Set} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- Here we quickly show how the data objects of the example data set can be reproduced. # Load Packages ```{r eval=FALSE} library("recount3") library("DESeq2") library("dplyr") library("stringr") library("SummarizedExperiment") library("tidyr") library("tibble") library("magrittr") ``` # Download GSE89888 via recount3 and format data ```{r eval=FALSE} # download data recount3 proj_info <- available_projects() %>% filter(project == "SRP093386") se <- create_rse(proj_info) count_mat <- compute_read_counts(se) rownames(count_mat) <- str_replace(rownames(count_mat), "\\.[:number:]+$", "") meta <- colData(se) %>% as_tibble() %>% separate(sra.sample_title, into = c("cell_line", "treatment", "mutation", "replicate"), sep = "-") %>% select(cell_line, treatment, mutation, replicate) colnames(count_mat) <- paste0(meta$treatment, "_", meta$mutation, "_", meta$replicate) # subset to cell line T47D count_mat <- count_mat[, meta$cell_line == "T47D"] meta <- meta[meta$cell_line == "T47D", c("treatment", "mutation", "replicate")] T47D <- make_dds(count_mat, meta, ah_record = "AH89426") T47D <- T47D[rowSums(assay(T47D))>0,] # round some numeric data to reduce the size of the data object rowData(T47D)$gc_content <- round(rowData(T47D)$gc_content,1) ``` # Differential testing ```{r eval=FALSE} dds <- T47D dds <- filter_genes(dds, min_count = 5, min_rep = 4) dds$mutation <- as.factor(dds$mutation) dds$treatment <- as.factor(dds$treatment) design(dds) <- ~ mutation + treatment # to not run DESeq2 in the main vignette, # wo pre-compute the dispersion plot and diff testing results dds <- DESeq(dds, parallel=T) png(filename="disp_ests.png", width=7, height=5, units="in", res=200) plotDispEsts(dds) dev.off() T47D_diff_testing <- lfcShrink(dds, coef = "mutation_WT_vs_D538G", lfcThreshold = log2(1.5), type = "normal", parallel = TRUE) T47D_diff_testing$stat <- NULL T47D_diff_testing$lfcSE <- NULL T47D_diff_testing$pvalue <- NULL ```