1 Required packages

library(blockmodels)
library(tidyverse)
library(huge)
library(QUIC)
library(stabs)
library(limma)
library(igraph)

2 Data preparation

2.1 Data importation

covariates  <- read_delim("Covariates.txt" , delim = "\t") %>% dplyr::select(-subjectidp1)
transcripts <- readRDS("Transcripts.rds") %>% as_tibble(rownames = "subjectidp")

2.2 Merging tables

Merge covariates and exposures and retain rows shared by all tables. Extract the city variable.

expr <- semi_join(transcripts, covariates, by = "subjectidp") %>% dplyr::select(-subjectidp)
city <- semi_join(covariates, transcripts, by = "subjectidp") %>% pull(city)

2.3 Selecting Cities

Regarding the sampling distribution of the cities,

table(city)
## city
##   Basel Norwich Piscina   Turin  Utrect 
##      67      39      33      76      62

we only keep Basel, Turin and Utrect for the example.

expr <- filter(expr, city %in% c("Basel", "Turin", "Utrect"))
city <- city[city %in% c("Basel", "Turin", "Utrect")]

3 Principal Component Analysis

A basic PCA indicates that the city variable has a strong effect:

library(FactoMineR)
pca_expr   <- PCA(expr, graph = FALSE)

4 Variable screening: differential analysis with Limma

4.1 Differential analysis with Limma

design <- model.matrix(~ 0 + city)
colnames(design) <- c("Basel", "Turin", "Utrect")
fit <- lmFit(t(expr), design)
contrast.matrix <- makeContrasts(
  Basel   - Turin  ,
  Basel   - Utrect ,
  Turin   - Utrect ,
  levels = design)
fit2 <- contrasts.fit(fit, contrast.matrix)
fit2 <- eBayes(fit2)
topTable(fit2, coef=1, adjust="BH")
##                    logFC   AveExpr         t      P.Value    adj.P.Val
## A_24_P56689     4.232290  9.131715  31.25779 4.399099e-80 1.036296e-75
## A_33_P3282364   2.276584  5.065083  28.78363 4.429614e-74 5.217420e-70
## A_33_P3353906   3.355395  9.307358  28.70862 6.812310e-74 5.349253e-70
## A_33_P3379535   2.617897  4.983144  27.97332 4.799348e-72 2.758292e-68
## A_21_P0010155   2.613143  5.198017  27.93926 5.854505e-72 2.758292e-68
## A_23_P140527    4.061183 10.212512  27.79182 1.385923e-71 5.441365e-68
## A_23_P29851    -2.130031  9.717812 -27.18302 5.004018e-70 1.683995e-66
## A_19_P00332515  3.044734  6.771544  26.91223 2.503191e-69 7.370959e-66
## A_24_P118376    2.840383  6.244197  26.68394 9.795361e-69 2.563881e-65
## A_19_P00320614  2.465980  5.468292  26.37280 6.355291e-68 1.497116e-64
##                       B
## A_24_P56689    172.1123
## A_33_P3282364  158.3896
## A_33_P3353906  157.9620
## A_33_P3379535  153.7343
## A_21_P0010155  153.5368
## A_23_P140527   152.6804
## A_23_P29851    149.1154
## A_19_P00332515 147.5149
## A_24_P118376   146.1583
## A_19_P00320614 144.2988
results <- decideTests(fit2, p.value = 1e-5)
vennDiagram(results)

Still too many guys to perform network inference properly !!

We only keep the first 50 more variants guys among the differentially expressed transcripts.

expr_DF <- expr[, which(rowSums(abs(results)) == 3)]
expr_sub <- expr_DF[, order(apply(expr_DF, 2, var), decreasing = TRUE)[1:50]]
heatmap(as.matrix(expr_sub))