vect_COG_category_long <- c("A" = "A: RNA processing and modification",
"B" = "B: Chromatin Structure and dynamics",
"C" = "C: Energy production and conversion",
"D" = "D: Cell cycle control, cell division, chromosome partitioning",
"E" = "E: Amino Acid transport and metabolism",
"F" = "F: Nucleotide transport and metabolism",
"G" = "G: Carbohydrate transport and metabolism",
"H" = "H: Coenzyme transport and metabolism",
"I" = "I: Lipid transport and metabolism",
"J" = "J: Translation, ribosomal structure and biogenesis",
"K" = "K: Transcription",
"L" = "L: Replication, recombination and repair",
"M" = "M: Cell wall/membrane/envelope biogenesis",
"N" = "N: Cell motility",
"O" = "O: Post-translational modification, protein turnover, chaperones",
"P" = "P: Inorganic ion transport and metabolism",
"Q" = "Q: Secondary metabolites biosynthesis, transport and catabolism",
"T" = "T: Signal Transduction mechanisms",
"U" = "U: Intracellular trafficking, secretion and vesicular transport",
"V" = "V: Defense mechanisms",
"W" = "W: Extracellular structures",
"X" = "X: Mobilome: prophages, transposons",
"Y" = "Y: Nuclear structure",
"Z" = "Z: Cytoskeleton",
"R" = "R: General functional prediction only",
"S" = "S: Function unknown" #,
# "-" = "-: Other"
)
COG_colours <- hue_pal()(length(vect_COG_category_long))
names(COG_colours) <- vect_COG_category_long
species_colours <- c("D.hungarica" = "#e66101", "P.graminis" = "#fdb863",
"P.syringae" = "#b2abd2", "R.enclensis" = "#5e3c99")
compute_deseq2_analysis = function(myData, sample_data, subset_var=FALSE, select=FALSE,
contrast_col, ref, tested){
data <- myData
sample <- sample_data
if ((subset_var != FALSE) & (select != FALSE)){
sample %>% filter(!!as.name(subset_var) == select) -> sample
}
sample[[contrast_col]] <- factor(sample[[contrast_col]])
data <- data[row.names(sample)]
data_matrix <- round(as.matrix(data))
d = formula(paste("~", " ", contrast_col))
dds <- DESeqDataSetFromMatrix(countData = data_matrix,
colData = sample,
design = d)
dds <- estimateSizeFactors(dds)
dds <- DESeq(dds)
resultsNames(dds)
resDESeq <- results(dds, contrast = c(contrast_col, tested, ref),
independentFiltering = TRUE, alpha=0.1)
resDESeq <- resDESeq[order(resDESeq$padj),]
res <- data.frame(resDESeq)
res$gene = row.names(res)
res$condition = select
res$SAMPLE_COMPARISON = paste0(tested, "_VS_", ref)
return(res)
}
Comparison of an artificial microbial assemblage gene expression under two cloud-like conditions:
summer day (SD): light, 250µM H2O2, 17°C
winter night (WN): dark, no added H2O2, 5°C
The metatranscriptomics data was processed using a custom made Snakemake workflow.
Two sequencing runs were performed by Genoscreen (Lille, France) as the first run produced relatively low quality reads. Both runs are added into a single dataset in our analyses.
After quality control steps and read cleaning (report) remaining rRNA (not depleted prior to sequencing) were removed with sortmeRNA. As fungal rRNA were not depleted in our libraries, the vast majority of our reads correspond to Dioszegia hungarica rRNAs.
Non ribosomal and ribosomal RNAs were then mapped separately on our reference genomes using STAR.
For non rRNA reads: ~90-95% of reads mapped in biological samples. In blank samples, around 40% of reads were mapped, and only partially.
Few reads were mapped on P. graminis PDD-13b-3 genome. It will be difficult to get significant results for this species.
Lastly, for each gene of the assemblage, read counts were obtained with featureCounts.’-M’ and ‘–fraction’ options were used to count multi-mapping reads fractionally (if a read maps on x features: each feature gets 1/x counts).
counts_table <- read.csv("../results/all_counts_community_artificial_no_rRNA.tsv", sep = "\t", row.names = "Geneid")
metadata_table <- read.csv("../data/metadata.txt", sep = "\t", row.names = "name") # row.names = "sample"
counts_table %>% rename("WN_TF_1" = "S_5C1",
"WN_TF_2" = "S_5C2",
"WN_TF_3" = "S_5C3",
"WN_BLK" = "S_5BLK",
"SD_TF_1" = "S_17C1",
"SD_TF_2" = "S_17C2",
"SD_TF_3" = "S_17C3",
"SD_BLK" = "S_17BLK") -> counts_table
annotation_table <- read.csv("../data/annotations_final_community_updated.tsv", sep="\t", row.names = "Geneid")
annotation_all <- read.csv("../data/annotations_final_community_updated.tsv", sep="\t", row.names = "Geneid")
annotation_table_long <- read.csv("../data/annotations_final_community_long2.tsv", sep="\t", row.names = "Geneid")
## added july 2024
counts_table <- counts_table[intersect(rownames(counts_table), rownames(annotation_all)),]
annotation_diohu <- read.csv("../data/dioszegia_kegg.tsv", sep="\t", row.names = "Geneid")
annotation_psegr <- read.csv("../data/pseudomonas_graminis_kegg.tsv", sep="\t", row.names = "Geneid")
annotation_psesy <- read.csv("../data/pseudomonas_syringae_kegg.tsv", sep="\t", row.names = "Geneid")
annotation_rhoen <- read.csv("../data/rhodococcus_kegg.tsv", sep="\t", row.names = "Geneid")
chem_data_metaT <- read.csv("../data/formaldehyde_evolution_transcriptomics.txt", sep="\t")
chem_data_metaB <- read.csv("../data/formaldehyde_evolution_metabolomics.txt", sep="\t")
## Data for figure
annotation_table_fig <- readxl::read_excel("../data/Table_S3_DEGs_annotations.xlsx")
rename(annotation_table_fig, "COG_category" = "COG category",
"COG_category_long" = "COG category long",
"COG_process" = "COG process") -> annotation_table_fig
metabolomics_df <- read.csv("../data/metabolomics.txt", sep="\t", row.names = 1)
# keep a version of the metabolomics dataset with all time points
metabolomics_df -> metabolomics_all_times_df
metabolomics_all_times_df %>% t() %>% data.frame() -> metabolomics_all_times_df
metabolomics_all_times_df %>% rename("WN_T0_1" = "S5C_T0_1",
"WN_T0_2" = "S5C_T0_2",
"WN_T0_3" = "S5C_T0_3",
"SD_T0_1" = "S17C_T0_1",
"SD_T0_2" = "S17C_T0_2",
"SD_T0_3" = "S17C_T0_3",
"WN_TF_1" = "S5C_TF_1",
"WN_TF_2" = "S5C_TF_2",
"WN_TF_3" = "S5C_TF_3",
"SD_TF_1" = "S17C_TF_1",
"SD_TF_2" = "S17C_TF_2",
"SD_TF_3" = "S17C_TF_3") -> metabolomics_all_times_df
metabolomics_all_times_df <- metabolomics_all_times_df[,c("WN_T0_1", "WN_T0_2", "WN_T0_3", "SD_T0_1", "SD_T0_2", "SD_T0_3",
"WN_TF_1", "WN_TF_2", "WN_TF_3", "SD_TF_1", "SD_TF_2", "SD_TF_3")]
metadata_table_metaB_all_times <- read.csv("../data/metadata_metaB.txt", sep = "\t", row.names = "name") # row.names = "sample"
# Keep final time points only, like for metatranscriptomics
metabolomics_df %>% t() %>%
data.frame() %>% select(starts_with(c("S17C_TF_", "S5C_TF_"))) -> metabolomics_df
metabolomics_df %>% rename("WN_TF_1" = "S5C_TF_1",
"WN_TF_2" = "S5C_TF_2",
"WN_TF_3" = "S5C_TF_3",
"SD_TF_1" = "S17C_TF_1",
"SD_TF_2" = "S17C_TF_2",
"SD_TF_3" = "S17C_TF_3") -> metabolomics_df
metabolomics_box_plot_df <- readxl::read_excel("../results/metabolomics_annotation_boxplot.xlsx")
metabolomics_annotations <- readxl::read_excel("../results/Table_S2_Metabolites_identified.xlsx", n_max = 25)
chem_data_metaT$experiment <- "metaT"
chem_data_metaB$experiment <- "metaB"
chem_data <- rbind(chem_data_metaT, chem_data_metaB)
## Metatranscriptomics data
chem_data[chem_data$color == "#89DDF8" & chem_data$experiment == "metaT",] -> tmp1
group_by(tmp1, sampling_time) %>% summarise(formaldehyde_mean=mean(formaldehyde_ratio_to_initial),
formaldehyde_sd=sd(formaldehyde_ratio_to_initial)) -> tmp1
tmp1$color <- "#89DDF8"
tmp1$condition <- "WN"
tmp1$category <- "biotic_WN"
tmp1$experiment <- "metaT"
chem_data[chem_data$color == "#F8AD18" & chem_data$experiment == "metaT",] -> tmp2
group_by(tmp2, sampling_time) %>% summarise(formaldehyde_mean=mean(formaldehyde_ratio_to_initial),
formaldehyde_sd=sd(formaldehyde_ratio_to_initial)) -> tmp2
tmp2$color <- "#F8AD18"
tmp2$condition <- "SD"
tmp2$category <- "biotic_SD"
tmp2$experiment <- "metaT"
chem_data[chem_data$color == "#A6C1CA" & chem_data$experiment == "metaT",] -> tmp3
group_by(tmp3, sampling_time) %>% summarise(formaldehyde_mean=mean(formaldehyde_ratio_to_initial),
formaldehyde_sd=sd(formaldehyde_ratio_to_initial)) -> tmp3
tmp3$color <- "#A6C1CA"
tmp3$condition <- "WN"
tmp3$category <- "abiotic_WN"
tmp3$experiment <- "metaT"
chem_data[chem_data$color == "#C4B69B" & chem_data$experiment == "metaT",] -> tmp4
group_by(tmp4, sampling_time) %>% summarise(formaldehyde_mean=mean(formaldehyde_ratio_to_initial),
formaldehyde_sd=sd(formaldehyde_ratio_to_initial)) -> tmp4
tmp4$color <- "#C4B69B"
tmp4$condition <- "SD"
tmp4$category <- "abiotic_SD"
tmp4$experiment <- "metaT"
## Metabolomics data
chem_data[chem_data$color == "#89DDF8" & chem_data$experiment == "metaB",] -> tmp5
group_by(tmp5, sampling_time) %>% summarise(formaldehyde_mean=mean(formaldehyde_ratio_to_initial),
formaldehyde_sd=sd(formaldehyde_ratio_to_initial)) -> tmp5
tmp5$color <- "#89DDF8"
tmp5$condition <- "WN"
tmp5$category <- "biotic_WN"
tmp5$experiment <- "metaB"
chem_data[chem_data$color == "#F8AD18" & chem_data$experiment == "metaB",] -> tmp6
group_by(tmp6, sampling_time) %>% summarise(formaldehyde_mean=mean(formaldehyde_ratio_to_initial),
formaldehyde_sd=sd(formaldehyde_ratio_to_initial)) -> tmp6
tmp6$color <- "#F8AD18"
tmp6$condition <- "SD"
tmp6$category <- "biotic_SD"
tmp6$experiment <- "metaB"
chem_data[chem_data$color == "#A6C1CA" & chem_data$experiment == "metaB",] -> tmp7
group_by(tmp7, sampling_time) %>% summarise(formaldehyde_mean=mean(formaldehyde_ratio_to_initial),
formaldehyde_sd=sd(formaldehyde_ratio_to_initial)) -> tmp7
tmp7$color <- "#A6C1CA"
tmp7$condition <- "WN"
tmp7$category <- "abiotic_WN"
tmp7$experiment <- "metaB"
chem_data[chem_data$color == "#C4B69B" & chem_data$experiment == "metaB",] -> tmp8
group_by(tmp8, sampling_time) %>% summarise(formaldehyde_mean=mean(formaldehyde_ratio_to_initial),
formaldehyde_sd=sd(formaldehyde_ratio_to_initial)) -> tmp8
tmp8$color <- "#C4B69B"
tmp8$condition <- "SD"
tmp8$category <- "abiotic_SD"
tmp8$experiment <- "metaB"
## Merge data
line_data <- rbind(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8)
rm(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8)
## Test difference between biotic and abiotic
kruskal_formaldehyde <- c("",
try(if(kruskal.test(x = c(chem_data_metaB[(chem_data_metaB$sampling_time == 1 &
chem_data_metaB$color == "#F8AD18"),
"formaldehyde_ratio_to_initial"],
chem_data_metaB[(chem_data_metaB$sampling_time == 1 &
chem_data_metaB$color == "#C4B69B"),
"formaldehyde_ratio_to_initial"]),
g = c(rep("biotic_SD", 3), rep("abiotic_SD", 3))
)[["p.value"]] < 0.05){"*"}else{""}, silent = TRUE) ,
try(if(kruskal.test(x = c(chem_data_metaB[(chem_data_metaB$sampling_time == 2 &
chem_data_metaB$color == "#F8AD18"),
"formaldehyde_ratio_to_initial"],
chem_data_metaB[(chem_data_metaB$sampling_time == 2 &
chem_data_metaB$color == "#C4B69B"),
"formaldehyde_ratio_to_initial"]),
g = c(rep("biotic_SD", 3), rep("abiotic_SD", 3))
)[["p.value"]] < 0.05){"*"}else{""}, silent = TRUE),
try(if(kruskal.test(x = c(chem_data_metaB[(chem_data_metaB$sampling_time == 3 &
chem_data_metaB$color == "#F8AD18"),
"formaldehyde_ratio_to_initial"],
chem_data_metaB[(chem_data_metaB$sampling_time == 3 &
chem_data_metaB$color == "#C4B69B"),
"formaldehyde_ratio_to_initial"]),
g = c(rep("biotic_SD", 3), rep("abiotic_SD", 3))
)[["p.value"]] < 0.05){"*"}else{""}, silent = TRUE),
"",
try(if(kruskal.test(x = c(chem_data_metaT[(chem_data_metaT$sampling_time == 1 &
chem_data_metaT$color == "#F8AD18"),
"formaldehyde_ratio_to_initial"],
chem_data_metaT[(chem_data_metaT$sampling_time == 1 &
chem_data_metaT$color == "#C4B69B"),
"formaldehyde_ratio_to_initial"]),
g = c(rep("biotic_SD", 3), rep("abiotic_SD", 3))
)[["p.value"]] < 0.05){"*"}else{""}, silent = TRUE) ,
try(if(kruskal.test(x = c(chem_data_metaT[(chem_data_metaT$sampling_time == 2 &
chem_data_metaT$color == "#F8AD18"),
"formaldehyde_ratio_to_initial"],
chem_data_metaT[(chem_data_metaT$sampling_time == 2 &
chem_data_metaT$color == "#C4B69B"),
"formaldehyde_ratio_to_initial"]),
g = c(rep("biotic_SD", 3), rep("abiotic_SD", 3))
)[["p.value"]] < 0.05){"*"}else{""}, silent = TRUE),
try(if(kruskal.test(x = c(chem_data_metaT[(chem_data_metaT$sampling_time == 3 &
chem_data_metaT$color == "#F8AD18"),
"formaldehyde_ratio_to_initial"],
chem_data_metaT[(chem_data_metaT$sampling_time == 3 &
chem_data_metaT$color == "#C4B69B"),
"formaldehyde_ratio_to_initial"]),
g = c(rep("biotic_SD", 3), rep("abiotic_SD", 3))
)[["p.value"]] < 0.05){"*"}else{""}, silent = TRUE),
try(if(kruskal.test(x = c(chem_data_metaT[(chem_data_metaT$sampling_time == 5 &
chem_data_metaT$color == "#F8AD18"),
"formaldehyde_ratio_to_initial"],
chem_data_metaT[(chem_data_metaT$sampling_time == 5 &
chem_data_metaT$color == "#C4B69B"),
"formaldehyde_ratio_to_initial"]),
g = c(rep("biotic_SD", 3), rep("abiotic_SD", 3))
)[["p.value"]] < 0.05){"*"}else{""}, silent = TRUE),
"",
try(if(kruskal.test(x = c(chem_data_metaB[(chem_data_metaB$sampling_time == 1 &
chem_data_metaB$color == "#89DDF8"),
"formaldehyde_ratio_to_initial"],
chem_data_metaB[(chem_data_metaB$sampling_time == 1 &
chem_data_metaB$color == "#A6C1CA"),
"formaldehyde_ratio_to_initial"]),
g = c(rep("biotic_WN", 3), rep("abiotic_WN", 3))
)[["p.value"]] < 0.05){"*"}else{""}, silent = TRUE) ,
try(if(kruskal.test(x = c(chem_data_metaB[(chem_data_metaB$sampling_time == 2 &
chem_data_metaB$color == "#89DDF8"),
"formaldehyde_ratio_to_initial"],
chem_data_metaB[(chem_data_metaB$sampling_time == 2 &
chem_data_metaB$color == "#A6C1CA"),
"formaldehyde_ratio_to_initial"]),
g = c(rep("biotic_WN", 3), rep("abiotic_WN", 3))
)[["p.value"]] < 0.05){"*"}else{""}, silent = TRUE),
try(if(kruskal.test(x = c(chem_data_metaB[(chem_data_metaB$sampling_time == 3 &
chem_data_metaB$color == "#89DDF8"),
"formaldehyde_ratio_to_initial"],
chem_data_metaB[(chem_data_metaB$sampling_time == 3 &
chem_data_metaB$color == "#A6C1CA"),
"formaldehyde_ratio_to_initial"]),
g = c(rep("biotic_WN", 3), rep("abiotic_WN", 3))
)[["p.value"]] < 0.05){"*"}else{""}, silent = TRUE),
"",
try(if(kruskal.test(x = c(chem_data_metaT[(chem_data_metaT$sampling_time == 1 &
chem_data_metaT$color == "#89DDF8"),
"formaldehyde_ratio_to_initial"],
chem_data_metaT[(chem_data_metaT$sampling_time == 1 &
chem_data_metaT$color == "#A6C1CA"),
"formaldehyde_ratio_to_initial"]),
g = c(rep("biotic_WN", 3), rep("abiotic_WN", 3))
)[["p.value"]] < 0.05){"*"}else{""}, silent = TRUE) ,
try(if(kruskal.test(x = c(chem_data_metaT[(chem_data_metaT$sampling_time == 2 &
chem_data_metaT$color == "#89DDF8"),
"formaldehyde_ratio_to_initial"],
chem_data_metaT[(chem_data_metaT$sampling_time == 2 &
chem_data_metaT$color == "#A6C1CA"),
"formaldehyde_ratio_to_initial"]),
g = c(rep("biotic_WN", 3), rep("abiotic_WN", 3))
)[["p.value"]] < 0.05){"*"}else{""}, silent = TRUE),
try(if(kruskal.test(x = c(chem_data_metaT[(chem_data_metaT$sampling_time == 3 &
chem_data_metaT$color == "#89DDF8"),
"formaldehyde_ratio_to_initial"],
chem_data_metaT[(chem_data_metaT$sampling_time == 3 &
chem_data_metaT$color == "#A6C1CA"),
"formaldehyde_ratio_to_initial"]),
g = c(rep("biotic_WN", 3), rep("abiotic_WN", 3))
)[["p.value"]] < 0.05){"*"}else{""}, silent = TRUE),
try(if(kruskal.test(x = c(chem_data_metaT[(chem_data_metaT$sampling_time == 5 &
chem_data_metaT$color == "#89DDF8"),
"formaldehyde_ratio_to_initial"],
chem_data_metaT[(chem_data_metaT$sampling_time == 5 &
chem_data_metaT$color == "#A6C1CA"),
"formaldehyde_ratio_to_initial"]),
g = c(rep("biotic_WN", 3), rep("abiotic_WN", 3))
)[["p.value"]] < 0.05){"*"}else{""}, silent = TRUE)
)
## Test difference between time 0 an the other time points
kruskal_df <- data.frame(time = rep(c(0, 1, 2, 3, 0, 1, 2, 3, 5), 2),
height = rep(0, 18),
experiment = rep(c(rep("metaB", 4), rep("metaT", 5)), 2),
condition = c(rep("SD", 9), rep("WN", 9)),
category = c(rep("community SD", 9), rep("community WN", 9)),
color = c(rep("#F8AD18", 9), rep("#89DDF8", 9)),
pvalue = kruskal_formaldehyde)
krusk <- kruskal_df
## Segment data for sampling time
segment_data <- data.frame(x = c(4, 0, 3.5), xend = c(4, 0, 3.5),
y = c(-Inf, -Inf, -Inf), yend = c(Inf, Inf, Inf),
experiment = c("metaT", "metaB", "metaB"))
chem_data %>% ggplot() +
geom_point(aes(x=sampling_time, y=formaldehyde_ratio_to_initial, color=color)) +
ylim(c(-0.1, 1.2)) +
xlim(c(0, 5)) +
scale_colour_identity("Category", labels = c("Assemblage Winter Night", "Abiotic Winter Night",
"Abiotic Summer Day", "Assemblage Summer Day"),
guide="legend") +
scale_fill_identity() +
ylab(label = "Relative formaldehyde evolution") +
xlab(label = "Incubation time (h)") +
facet_grid(rows = vars(condition), cols = vars(experiment),
labeller = as_labeller(c("metaT" = "Metatranscriptomics",
"metaB" = "Meta-metabolomics",
"SD" = "Summer Day",
"WN" = "Winter Night"))) +
theme(legend.position = "bottom",
panel.background = element_rect(fill="#f4f4f4"),
strip.text = element_text(size=12)) +
geom_path(data = line_data[line_data$category == "biotic_SD",],
aes(x = sampling_time, y = formaldehyde_mean, color = color),
linewidth = 1) +
geom_ribbon(data = line_data[line_data$category == "biotic_SD",],
aes(x = sampling_time,
ymin = formaldehyde_mean - formaldehyde_sd,
ymax = formaldehyde_mean + formaldehyde_sd,
fill = color), alpha=0.25, linewidth=0.05) +
geom_path(data = line_data[line_data$category == "abiotic_SD",],
aes(x = sampling_time, y = formaldehyde_mean, color = color),
linewidth = 1) +
geom_ribbon(data = line_data[line_data$category == "abiotic_SD",],
aes(x = sampling_time,
ymin = formaldehyde_mean - formaldehyde_sd,
ymax = formaldehyde_mean + formaldehyde_sd,
fill = color), alpha=0.25, linewidth=0.05) +
geom_path(data = line_data[line_data$category == "biotic_WN",],
aes(x = sampling_time, y = formaldehyde_mean, color = color),
linewidth = 1) +
geom_ribbon(data = line_data[line_data$category == "biotic_WN",],
aes(x = sampling_time,
ymin = formaldehyde_mean - formaldehyde_sd,
ymax = formaldehyde_mean + formaldehyde_sd,
fill = color), alpha=0.25, linewidth=0.05) +
geom_path(data = line_data[line_data$category == "abiotic_WN",],
aes(x = sampling_time, y = formaldehyde_mean, color = color),
linewidth = 1) +
geom_ribbon(data = line_data[line_data$category == "abiotic_WN",],
aes(x = sampling_time,
ymin = formaldehyde_mean - formaldehyde_sd,
ymax = formaldehyde_mean + formaldehyde_sd,
fill = color), alpha=0.25, linewidth=0.05) +
# plot significance stars
geom_text(data=krusk[krusk$category == "community SD",],
aes(x = time, y = height, label = pvalue, colour=color),
size = 7, show.legend = FALSE, nudge_y = -0.07) +
geom_text(data=krusk[krusk$category == "community WN",],
aes(x = time, y = height, label = pvalue, colour=color),
size = 7, show.legend = FALSE, nudge_y = -0.07) +
geom_text(data=krusk[krusk$category == "abiotic SD",],
aes(x = time, y = height, label = pvalue, colour=color),
size = 7, show.legend = FALSE) +
geom_text(data=krusk[krusk$category == "abiotic WN",],
aes(x = time, y = height, label = pvalue, colour=color),
size = 7, show.legend = FALSE) +
geom_segment(data=segment_data, aes(x = x, xend = xend, y = y, yend = yend)) +
geom_text(data=segment_data, aes(x = x + 0.2, y = 0.20, label = "Cell sampling"), angle=90, size=3) -> plot_formaldehyde
print(plot_formaldehyde)
chem_data_metaT$experiment <- "metaT"
chem_data_metaB$experiment <- "metaB"
chem_data <- rbind(chem_data_metaT, chem_data_metaB)
## Metatranscriptomics data
chem_data[chem_data$color == "#F8AD18" & chem_data$experiment == "metaT",] -> tmp2
group_by(tmp2, sampling_time) %>% summarise(H2O2_mean=mean(H2O2_ratio_to_initial),
H2O2_sd=sd(H2O2_ratio_to_initial)) -> tmp2
tmp2$color <- "#F8AD18"
tmp2$condition <- "SD"
tmp2$category <- "biotic_SD"
tmp2$experiment <- "metaT"
chem_data[chem_data$color == "#C4B69B" & chem_data$experiment == "metaT",] -> tmp4
group_by(tmp4, sampling_time) %>% summarise(H2O2_mean=mean(H2O2_ratio_to_initial),
H2O2_sd=sd(H2O2_ratio_to_initial)) -> tmp4
tmp4$color <- "#C4B69B"
tmp4$condition <- "SD"
tmp4$category <- "abiotic_SD"
tmp4$experiment <- "metaT"
## Metabolomics data
chem_data[chem_data$color == "#F8AD18" & chem_data$experiment == "metaB",] -> tmp6
group_by(tmp6, sampling_time) %>% summarise(H2O2_mean=mean(H2O2_ratio_to_initial),
H2O2_sd=sd(H2O2_ratio_to_initial)) -> tmp6
tmp6$color <- "#F8AD18"
tmp6$condition <- "SD"
tmp6$category <- "biotic_SD"
tmp6$experiment <- "metaB"
chem_data[chem_data$color == "#C4B69B" & chem_data$experiment == "metaB",] -> tmp8
group_by(tmp8, sampling_time) %>% summarise(H2O2_mean=mean(H2O2_ratio_to_initial),
H2O2_sd=sd(H2O2_ratio_to_initial)) -> tmp8
tmp8$color <- "#C4B69B"
tmp8$condition <- "SD"
tmp8$category <- "abiotic_SD"
tmp8$experiment <- "metaB"
chem_data$color <- as.factor(chem_data$color)
## Merge data
line_data <- rbind(tmp2, tmp4, tmp6, tmp8)
rm(tmp2, tmp4, tmp6, tmp8)
## Test difference between time 0 an the other time points
kruskal_h2o2 <- c("",
try(if(kruskal.test(x = c(chem_data_metaB[(chem_data_metaB$sampling_time == 1 &
chem_data_metaB$color == "#F8AD18"),
"H2O2_ratio_to_initial"],
chem_data_metaB[(chem_data_metaB$sampling_time == 1 &
chem_data_metaB$color == "#C4B69B"),
"H2O2_ratio_to_initial"]),
g = c(rep("biotic_SD", 3), rep("abiotic_SD", 3))
)[["p.value"]] < 0.05){"*"}else{""}, silent = TRUE) ,
try(if(kruskal.test(x = c(chem_data_metaB[(chem_data_metaB$sampling_time == 2 &
chem_data_metaB$color == "#F8AD18"),
"H2O2_ratio_to_initial"],
chem_data_metaB[(chem_data_metaB$sampling_time == 2 &
chem_data_metaB$color == "#C4B69B"),
"H2O2_ratio_to_initial"]),
g = c(rep("biotic_SD", 3), rep("abiotic_SD", 3))
)[["p.value"]] < 0.05){"*"}else{""}, silent = TRUE),
try(if(kruskal.test(x = c(chem_data_metaB[(chem_data_metaB$sampling_time == 3 &
chem_data_metaB$color == "#F8AD18"),
"H2O2_ratio_to_initial"],
chem_data_metaB[(chem_data_metaB$sampling_time == 3 &
chem_data_metaB$color == "#C4B69B"),
"H2O2_ratio_to_initial"]),
g = c(rep("biotic_SD", 3), rep("abiotic_SD", 3))
)[["p.value"]] < 0.05){"*"}else{""}, silent = TRUE),
"",
try(if(kruskal.test(x = c(chem_data_metaT[(chem_data_metaT$sampling_time == 1 &
chem_data_metaT$color == "#F8AD18"),
"H2O2_ratio_to_initial"],
chem_data_metaT[(chem_data_metaT$sampling_time == 1 &
chem_data_metaT$color == "#C4B69B"),
"H2O2_ratio_to_initial"]),
g = c(rep("biotic_SD", 3), rep("abiotic_SD", 3))
)[["p.value"]] < 0.05){"*"}else{""}, silent = TRUE) ,
try(if(kruskal.test(x = c(chem_data_metaT[(chem_data_metaT$sampling_time == 2 &
chem_data_metaT$color == "#F8AD18"),
"H2O2_ratio_to_initial"],
chem_data_metaT[(chem_data_metaT$sampling_time == 2 &
chem_data_metaT$color == "#C4B69B"),
"H2O2_ratio_to_initial"]),
g = c(rep("biotic_SD", 3), rep("abiotic_SD", 3))
)[["p.value"]] < 0.05){"*"}else{""}, silent = TRUE),
try(if(kruskal.test(x = c(chem_data_metaT[(chem_data_metaT$sampling_time == 3 &
chem_data_metaT$color == "#F8AD18"),
"H2O2_ratio_to_initial"],
chem_data_metaT[(chem_data_metaT$sampling_time == 3 &
chem_data_metaT$color == "#C4B69B"),
"H2O2_ratio_to_initial"]),
g = c(rep("biotic_SD", 3), rep("abiotic_SD", 3))
)[["p.value"]] < 0.05){"*"}else{""}, silent = TRUE),
try(if(kruskal.test(x = c(chem_data_metaT[(chem_data_metaT$sampling_time == 5 &
chem_data_metaT$color == "#F8AD18"),
"H2O2_ratio_to_initial"],
chem_data_metaT[(chem_data_metaT$sampling_time == 5 &
chem_data_metaT$color == "#C4B69B"),
"H2O2_ratio_to_initial"]),
g = c(rep("biotic_SD", 3), rep("abiotic_SD", 3))
)[["p.value"]] < 0.05){"*"}else{""}, silent = TRUE)
)
kruskal_df <- data.frame(time = c(0, 1, 2, 3, 0, 1, 2, 3, 5),
height = rep(0, 9),
experiment = c(rep("metaB", 4), rep("metaT", 5)),
condition = rep("SD", 9),
category = rep("community SD", 9),
color = rep("#F8AD18", 9),
pvalue = kruskal_h2o2)
krusk <- kruskal_df
## Segment data for sampling time
segment_data <- data.frame(x = c(4, 0, 3.5), xend = c(4, 0, 3.5),
y = c(-Inf, -Inf, -Inf), yend = c(Inf, Inf, Inf),
experiment = c("metaT", "metaB", "metaB"))
chem_data[chem_data$condition == "SD",] %>% ggplot() +
geom_point(aes(x=sampling_time, y=H2O2_ratio_to_initial, color=color)) +
ylim(c(-0.1, 1.2)) +
xlim(c(0, 5)) +
scale_colour_identity("Category", labels = c("Assemblage Winter Night", "Abiotic Winter Night",
"Abiotic Summer Day", "Assemblage Summer Day"),
guide="legend", drop = FALSE) +
scale_fill_identity() +
ylab("Relative H<sub>2</sub>O<sub>2</sub> evolution") +
xlab(label = "Incubation time (h)") +
facet_grid(rows = vars(condition), cols = vars(experiment),
labeller = as_labeller(c("metaT" = "Metatranscriptomics",
"metaB" = "Meta-metabolomics",
"SD" = "Summer Day",
"WN" = "Winter Night"))) +
theme(legend.position = "bottom",
panel.background = element_rect(fill="#f4f4f4"),
axis.title.y = element_markdown(),
strip.text = element_text(size=12)) +
geom_path(data = line_data[line_data$category == "biotic_SD",],
aes(x = sampling_time, y = H2O2_mean, color = color),
linewidth = 1) +
geom_ribbon(data = line_data[line_data$category == "biotic_SD",],
aes(x = sampling_time,
ymin = H2O2_mean - H2O2_sd,
ymax = H2O2_mean + H2O2_sd,
fill = color), alpha=0.25, linewidth=0.05) +
geom_path(data = line_data[line_data$category == "abiotic_SD",],
aes(x = sampling_time,y =H2O2_mean, color = color),
linewidth = 1) +
geom_ribbon(data = line_data[line_data$category == "abiotic_SD",],
aes(x = sampling_time,
ymin = H2O2_mean - H2O2_sd,
ymax = H2O2_mean + H2O2_sd,
fill = color), alpha=0.25, linewidth=0.05) +
# plot significance stars
geom_text(data=krusk[krusk$category == "community SD",],
aes(x = time, y = height, label = pvalue, colour=color),
size = 7, show.legend = FALSE, nudge_y = -0.07) +
geom_text(data=krusk[krusk$category == "abiotic SD",],
aes(x = time, y = height, label = pvalue, colour=color),
size = 7, show.legend = FALSE) +
# geom_segment(aes(x=4, xend=4, y=-Inf, yend=Inf)) +
geom_segment(data=segment_data, aes(x = x, xend = xend, y = y, yend = yend)) +
geom_segment(aes(x = -Inf, xend = Inf, y = 0.2, yend = 0.2), colour = "darkred", linetype = 2) +
geom_text(data=segment_data, aes(x = x + 0.2, y = 0.42, label = "Cell sampling"), angle=90, size=3) -> plot_h2o2
print(plot_h2o2)
message("Computing sample-wise statistics on raw counts")
sample_stat_prenorm <- data.frame(
mean = apply(counts_table, 2, mean, na.rm = TRUE),
sd = apply(counts_table, 2, sd, na.rm = TRUE),
iqr = apply(counts_table, 2, IQR, na.rm = TRUE),
Q1 = apply(counts_table, 2, quantile, p = 0.25, na.rm = TRUE),
median = apply(counts_table, 2, median, na.rm = TRUE),
Q3 = apply(counts_table, 2, quantile, p = 0.75, na.rm = TRUE),
max = apply(counts_table, 2, max, na.rm = TRUE),
null = apply(counts_table == 0, 2, sum, na.rm = TRUE)
)
message("Computing gene-wise statistics on raw counts")
gene_stat_prenorm <- data.frame(
mean = apply(counts_table, 1, mean, na.rm = TRUE),
sd = apply(counts_table, 1, sd, na.rm = TRUE),
iqr = apply(counts_table, 1, IQR, na.rm = TRUE),
Q1 = apply(counts_table, 1, quantile, p = 0.25, na.rm = TRUE),
median = apply(counts_table, 1, median, na.rm = TRUE),
Q3 = apply(counts_table, 1, quantile, p = 0.75, na.rm = TRUE),
max = apply(counts_table, 1, max, na.rm = TRUE),
null = apply(counts_table == 0, 1, sum, na.rm = TRUE)
)
Gene with 0 counts in more than 70% of samples are deemed unexpressed and are removed from the dataset.
message("Filtering undetected genes")
undetected_genes <- gene_stat_prenorm$null >= ncol(counts_table) * 0.70
print(paste0("Undetected genes (null in >= 70% samples): ", sum(undetected_genes)))
[1] “Undetected genes (null in >= 70% samples): 19262”
[1] “Kept genes: 7191”
## Genes after filtering
counts_filtered <- counts_table[kept_genes, ]
annotation_table <- annotation_all[rownames(counts_filtered),] #row.names()
## Species abundance (sum of all counts)
totals_df <- data.frame()
for (spe in species) {
spe_geneid <- annotation_table$Organism == spe
for (samp in samples) {
totals_df[spe, samp] <- sum(counts_filtered[spe_geneid, samp], na.rm=TRUE)
}
}
# no standardisation was used afterall
counts_standard <- counts_filtered
table_gene_expr <- counts_table
table_gene_expr$detected <- "No"
table_gene_expr[kept_genes, "detected"] <- "yes"
table_gene_expr <- rownames_to_column(table_gene_expr)
table_gene_expr <- left_join(table_gene_expr, rownames_to_column(annotation_table_long)[,c("rowname", "locus_tag", "transcriptId")])
## remaining zeroes smoothing (as in Zhang et al. 2021)
min_val <- apply(counts_standard, 1, function(x) (min(x[x>0])/2))
for (gene in row.names(counts_standard)) {
counts_standard[gene,][counts_standard[gene,] == 0] <- min_val[gene]
}
message("Computing sample-wise statistics on filtered counts")
sample_stat_filt <- data.frame(
mean = apply(counts_standard, 2, mean, na.rm = TRUE),
sd = apply(counts_standard, 2, sd, na.rm = TRUE),
iqr = apply(counts_standard, 2, IQR, na.rm = TRUE),
Q1 = apply(counts_standard, 2, quantile, p = 0.25, na.rm = TRUE),
median = apply(counts_standard, 2, median, na.rm = TRUE),
Q3 = apply(counts_standard, 2, quantile, p = 0.75, na.rm = TRUE),
max = apply(counts_standard, 2, max, na.rm = TRUE),
null = apply(counts_standard == 0, 2, sum, na.rm = TRUE)
)
kable(sample_stat_filt[0:6, ], caption = "Sample-wise statistics after filtering")
mean | sd | iqr | Q1 | median | Q3 | max | null | |
---|---|---|---|---|---|---|---|---|
WN_TF_1 | 7.274402 | 153.42034 | 1.745 | 0.250 | 0.750 | 1.995 | 8701.91 | 0 |
WN_TF_2 | 5.798768 | 141.55673 | 0.835 | 0.165 | 0.330 | 1.000 | 6777.67 | 0 |
WN_TF_3 | 7.436439 | 154.65538 | 1.670 | 0.330 | 0.830 | 2.000 | 8361.50 | 0 |
SD_TF_1 | 7.435202 | 192.23869 | 0.830 | 0.170 | 0.375 | 1.000 | 10856.58 | 0 |
SD_TF_2 | 3.346716 | 85.01516 | 0.375 | 0.125 | 0.250 | 0.500 | 4582.83 | 0 |
SD_TF_3 | 10.483380 | 285.44101 | 0.700 | 0.170 | 0.330 | 0.870 | 14318.75 | 0 |
counts_clr <- counts_standard
for (samp in samples) {
counts_clr[,samp] <- clr(counts_standard[,samp])
}
message("Computing sample-wise statistics on filtered counts")
sample_stat_clr <- data.frame(
mean = apply(counts_clr, 2, mean, na.rm = TRUE),
sd = apply(counts_clr, 2, sd, na.rm = TRUE),
iqr = apply(counts_clr, 2, IQR, na.rm = TRUE),
Q1 = apply(counts_clr, 2, quantile, p = 0.25, na.rm = TRUE),
median = apply(counts_clr, 2, median, na.rm = TRUE),
Q3 = apply(counts_clr, 2, quantile, p = 0.75, na.rm = TRUE),
max = apply(counts_clr, 2, max, na.rm = TRUE),
null = apply(counts_clr == 0, 2, sum, na.rm = TRUE)
)
kable(sample_stat_clr[0:6, ], caption = "Sample-wise statistics after CLR transformation")
mean | sd | iqr | Q1 | median | Q3 | max | null | |
---|---|---|---|---|---|---|---|---|
WN_TF_1 | 0 | 1.366662 | 2.076935 | -1.1413059 | -0.0426936 | 0.9356294 | 9.316286 | 0 |
WN_TF_2 | 0 | 1.246467 | 1.801810 | -0.9619634 | -0.2688162 | 0.8398464 | 9.661235 | 0 |
WN_TF_3 | 0 | 1.357223 | 1.801810 | -0.9519112 | -0.0295782 | 0.8498986 | 9.188144 | 0 |
SD_TF_1 | 0 | 1.220095 | 1.771957 | -0.9654242 | -0.1742966 | 0.8065326 | 10.099059 | 0 |
SD_TF_2 | 0 | 1.061585 | 1.386294 | -0.8828711 | -0.1897239 | 0.5034233 | 9.626643 | 0 |
SD_TF_3 | 0 | 1.204212 | 1.632695 | -0.9117566 | -0.2484624 | 0.7209382 | 10.429525 | 0 |
## Gene-wise statistics after normalisation
message("Computing gene-wise statistics on log-transformed and normalised counts")
gene_stat_norm <- data.frame(mean = apply(counts_clr, 1, mean, na.rm=TRUE),
var = apply(counts_clr, 1, var, na.rm=TRUE),
sd = apply(counts_clr, 1, sd, na.rm=TRUE),
iqr = apply(counts_clr, 1, IQR, na.rm=TRUE),
min = apply(counts_clr, 1, min, na.rm=TRUE),
med = apply(counts_clr, 1, median, na.rm=TRUE),
max = apply(counts_clr, 1, max, na.rm=TRUE))
# Ajout du coefficient de variation
gene_stat_norm$coef_var <- (gene_stat_norm$sd / gene_stat_norm$mean)
par(mfrow=c(1,1))
hist(unlist(counts_table),
breaks = 200,
cex.axis = 0.7,
las = 1,
col = "skyblue",
xlab = "raw counts",
main = "Distribution of raw counts")
par(mfrow=c(1,1))
hist(unlist(counts_clr),
breaks = 200,
cex.axis = 0.7,
las = 1,
col = "skyblue",
xlab = "clr-transformed counts",
main = "Distribution of clr-transformed counts")
The distribution of the whole assemblage metatranscriptomics counts seems close to normal.
# Raw data
boxplot(counts_table,
main = "Raw expression",
horizontal = TRUE,
col = metadata_table$color,
cex = 0.5,
cex.axis = 0.8,
las = 1)
boxplot(counts_clr,
main = "clr-transformed expression",
horizontal = TRUE,
col = metadata_table$color,
cex = 0.5,
cex.axis = 0.8,
las = 1)
The CLR transformation centred our data.
totals_df$species <- row.names((totals_df))
totals_df %>%
pivot_longer(!species,
names_to=c("sample"),
values_to="total_counts") %>%
mutate(sample = case_when(sample == "SD_TF_1" ~ "SD_TF_1",
sample == "SD_TF_2" ~ "SD_TF_2",
sample == "SD_TF_3" ~ "SD_TF_3",
sample == "WN_TF_1" ~ "WN_TF_1",
sample == "WN_TF_2" ~ "WN_TF_2",
sample == "WN_TF_3" ~ "WN_TF_3")) -> totals_df
ggplot(data=totals_df, mapping=aes(x=sample, y=total_counts, fill=species)) +
geom_bar(position="fill", stat="identity") +
scale_fill_brewer(palette = "PuOr") +
labs(y = "Percentage of total counts") +
scale_y_continuous(labels = as_function(~ 100 * .)) +
theme(axis.text.x = element_text(colour = c("SD_TF_1" = "darkorange",
"SD_TF_2" = "darkorange",
"SD_TF_3" = "darkorange",
"WN_TF_1" = "darkcyan",
"WN_TF_2" = "darkcyan",
"WN_TF_3" = "darkcyan"))) +
ggtitle("Total metatranscriptomics counts per species") -> p
print(p)
ggsave("../figures/Figure_S3_counts_repartition.png", p, dpi = 300)
ggsave("../figures/Figure_S3_counts_repartition.tiff", p, height = 7, width = 7, dpi = 300)
As previously noted, Dioszegia hungarica counts are largely dominant in our data. Pseudomonas graminis associated counts are very rare.
organelle_df <- data.frame()
mito_geneid <- annotation_table$Chr == "Dioszegia_hungarica_PDD-24b-2_contig_35"
nucl_geneid <- (annotation_table$Organism == "D.hungarica" & annotation_table$Chr != "Dioszegia_hungarica_PDD-24b-2_contig_35")
for (samp in samples) {
organelle_df["mitochondria", samp] <- sum(counts_filtered[mito_geneid, samp], na.rm=TRUE)
organelle_df["nucleus", samp] <- sum(counts_filtered[nucl_geneid, samp], na.rm=TRUE)
}
organelle_df$organelle <- row.names((organelle_df))
organelle_df %>%
pivot_longer(!organelle,
names_to=c("sample"),
values_to="total_counts") %>%
mutate(sample = case_when(sample == "SD_TF_1" ~ "SD_TF_1",
sample == "SD_TF_2" ~ "SD_TF_2",
sample == "SD_TF_3" ~ "SD_TF_3",
sample == "WN_TF_1" ~ "WN_TF_1",
sample == "WN_TF_2" ~ "WN_TF_2",
sample == "WN_TF_3" ~ "WN_TF_3")) -> organelle_df
ggplot(data=organelle_df, mapping=aes(x=sample, y=total_counts, fill=organelle)) +
geom_bar(position="fill", stat="identity") +
scale_fill_brewer(palette = "PuOr") +
labs(y = "Percentage of total counts") +
scale_y_continuous(labels = as_function(~ 100 * .)) +
theme(axis.text.x = element_text(colour = c("SD_TF_1" = "darkorange",
"SD_TF_2" = "darkorange",
"SD_TF_3" = "darkorange",
"WN_TF_1" = "darkcyan",
"WN_TF_2" = "darkcyan",
"WN_TF_3" = "darkcyan"))) +
ggtitle("Total transcript counts per D. hungarica compartment") -> p
print(p)
ggsave("../figures/Diohu_counts_repartition.png", p, dpi = 300)
ggsave("../figures/Diohu_counts_repartition.tiff", p, height = 7, width = 7, dpi = 300)
Among Dioszegia hungarica counts, a large portion come from transcripts produced in the mitochondria, especially in SD conditions.
# message("Computing metabolite-wise statistics on raw counts")
metabolite_stat_prenorm <- data.frame(
mean = apply(metabolomics_all_times_df, 1, mean, na.rm = TRUE),
sd = apply(metabolomics_all_times_df, 1, sd, na.rm = TRUE),
iqr = apply(metabolomics_all_times_df, 1, IQR, na.rm = TRUE),
Q1 = apply(metabolomics_all_times_df, 1, quantile, p = 0.25, na.rm = TRUE),
median = apply(metabolomics_all_times_df, 1, median, na.rm = TRUE),
Q3 = apply(metabolomics_all_times_df, 1, quantile, p = 0.75, na.rm = TRUE),
max = apply(metabolomics_all_times_df, 1, max, na.rm = TRUE),
null = apply(metabolomics_all_times_df == 0, 1, sum, na.rm = TRUE)
)
kable(metabolite_stat_prenorm[100:109, ], caption = "Gene-wise statistics before normalisation")
mean | sd | iqr | Q1 | median | Q3 | max | null | |
---|---|---|---|---|---|---|---|---|
M227.1754T7.46 | 0.0297637 | 0.0125831 | 0.0153123 | 0.0216464 | 0.0248620 | 0.0369588 | 0.0545123 | 0 |
M227.1756T7.71 | 0.0009423 | 0.0005382 | 0.0005236 | 0.0006028 | 0.0008935 | 0.0011263 | 0.0023538 | 0 |
M229.1547T7.3 | 0.0002872 | 0.0000654 | 0.0001292 | 0.0002224 | 0.0002810 | 0.0003516 | 0.0003785 | 0 |
M229.1548T7.51 | 0.0002844 | 0.0000405 | 0.0000395 | 0.0002643 | 0.0002889 | 0.0003038 | 0.0003452 | 0 |
M231.1704T7.41 | 0.0007549 | 0.0004071 | 0.0004789 | 0.0004242 | 0.0006295 | 0.0009030 | 0.0015536 | 0 |
M231.1704T7.7 | 0.0007458 | 0.0003289 | 0.0004379 | 0.0004945 | 0.0007700 | 0.0009324 | 0.0013820 | 0 |
M231.1705T8.71 | 0.0001531 | 0.0000906 | 0.0001410 | 0.0000854 | 0.0001604 | 0.0002264 | 0.0002715 | 0 |
M232.1544T7.21 | 0.0019758 | 0.0016928 | 0.0009050 | 0.0010000 | 0.0011673 | 0.0019050 | 0.0056565 | 0 |
M232.1544T7.07 | 0.0010623 | 0.0005972 | 0.0005553 | 0.0006736 | 0.0007847 | 0.0012289 | 0.0021863 | 0 |
M233.0592T7.35 | 0.0000623 | 0.0000277 | 0.0000356 | 0.0000442 | 0.0000653 | 0.0000798 | 0.0001143 | 0 |
Metabolites with 0 counts in more than 70% of samples are deemed unexpressed and are removed from the dataset.
message("Filtering undetected metabolites")
undetected_metabolites <- metabolite_stat_prenorm$null >= ncol(metabolomics_all_times_df) * 0.70
print(paste0("undetected_metabolites (null in >= 70% samples): ", sum(undetected_metabolites)))
[1] “undetected_metabolites (null in >= 70% samples): 5”
kept_metabolites <- !undetected_metabolites
print(paste0("Kept metabolites: ", sum(kept_metabolites)))
[1] “Kept metabolites: 465”
## metabolites after filtering
metabolomics_all_times_filtered <- metabolomics_all_times_df[kept_metabolites, ]
## remaining zeroes smoothing (as in Zhang et al. 2021)
min_val <- apply(metabolomics_all_times_filtered, 1, function(x) (min(x[x>0])/2))
for (metabolite in row.names(metabolomics_all_times_filtered)) {
metabolomics_all_times_filtered[metabolite,][metabolomics_all_times_filtered[metabolite,] == 0] <- min_val[metabolite]
}
par(mfrow=c(1,1))
hist(unlist(metabolomics_all_times_df),
breaks = 200,
cex.axis = 0.7,
las = 1,
col = "skyblue",
xlab = "raw metabolomics data",
main = "Distribution of raw metabolomics data")
par(mfrow=c(1,1))
hist(unlist(metabolomics_all_times_clr),
breaks = 200,
cex.axis = 0.7,
las = 1,
col = "skyblue",
xlab = "clr-transformed metabolomics data, all times",
main = "Distribution of clr-transformed metabolomics data, all times")
The distribution of metabolomics data is close to normal.
boxplot(metabolomics_all_times_df,
main = "metabolomics raw data",
horizontal = TRUE,
col = metadata_table_metaB_all_times$color,
cex = 0.5,
cex.axis = 0.8,
las = 1)
boxplot(metabolomics_all_times_clr,
main = "metabolomics clr-transformed expression",
horizontal = TRUE,
col = metadata_table_metaB_all_times$color,
cex = 0.5,
cex.axis = 0.8,
las = 1)
The CLR transformation centred our data.
counts_data <- t(counts_clr)
metadata_tmp <- data.frame(metadata_table, stringsAsFactors = TRUE)
permanova <- adonis2(formula = as.formula(paste0("counts_data~", "condition")),
data = metadata_tmp, method="euclidean", # method="bray", #otu_data~TIME
permutations = 999, sqrt.dist = FALSE, add = FALSE, by = "terms")
pv = permanova$`Pr(>F)`[1]
# print(pv)
rm(counts_data, metadata_tmp)
pca_tmp <- rbind(res_pca$ind$coord,
res_pca$ind.sup$coord) %>% data.frame() %>%
rownames_to_column("sample")
pca_data <- left_join(pca_tmp, rownames_to_column(metadata_table), by=join_by("sample" == "rowname"))
rm(pca_tmp)
D1_text = paste0("Dim 1 (",
round(data.frame(res_pca$eig)$percentage.of.variance[1], digits=2),
"%)")
D2_text = paste0("Dim 2 (",
round(data.frame(res_pca$eig)$percentage.of.variance[2], digits=2),
"%)")
D3_text = paste0("Dim 3 (",
round(data.frame(res_pca$eig)$percentage.of.variance[3], digits=2),
"%)")
ggplot(pca_data) +
geom_point(aes(x=Dim.1, y=Dim.2, colour=color, ), size=3) +
scale_colour_identity("Condition", labels=c("Winter Night", "Summer Day"), guide = "legend") +
ggrepel::geom_text_repel(aes(x=Dim.1, y=Dim.2, colour=color, label=sample),
nudge_y = -2, seed = 42, segment.size=0.1, show.legend = FALSE) +
xlab(D1_text) +
ylab(D2_text) +
ggtitle("PCA metatranscriptomics") +
labs(caption = paste0("PERMANOVA on condition: p-value: ", pv)) +
theme(panel.background= element_rect(fill="#f4f4f4"),
plot.caption = element_text(size=7)) -> p1_metaT
ggplot(pca_data) +
geom_point(aes(x=Dim.1, y=Dim.3, colour=color), size=3) +
scale_colour_identity("Condition", labels=c("Winter Night", "Summer Day"), guide = "legend") +
ggrepel::geom_text_repel(aes(x=Dim.1, y=Dim.3, colour=color, label=sample),
nudge_y = -2, seed = 42, segment.size=0.1, show.legend = FALSE) +
xlab(D1_text) +
ylab(D3_text) +
ggtitle("PCA metatranscriptomics") +
labs(caption = paste0("PERMANOVA on condition: p-value: ", pv)) +
theme(panel.background= element_rect(fill="#f4f4f4"),
plot.caption = element_text(size=7)) -> p2
ggplot(pca_data) +
geom_point(aes(x=Dim.2, y=Dim.3, colour=color), size=3) +
scale_colour_identity("Condition", labels=c("Winter Night", "Summer Day"), guide = "legend") +
ggrepel::geom_text_repel(aes(x=Dim.2, y=Dim.3, colour=color, label=sample),
nudge_y = -2, seed = 42, segment.size=0.1, show.legend = FALSE) +
xlab(D2_text) +
ylab(D3_text) +
ggtitle("PCA metatranscriptomics") +
labs(caption = paste0("PERMANOVA on condition: p-value: ", pv)) +
theme(panel.background= element_rect(fill="#f4f4f4"),
plot.caption = element_text(size=7)) -> p3
plotgrid_PCA <- ggarrange(p1_metaT, p2, p3, labels = c("A", "B", "C"), common.legend=TRUE, legend="bottom", nrow=1)
print(plotgrid_PCA)
ggsave(plot = plotgrid_PCA, filename = "../figures/PCA_metatranscriptomics_3_composantes.tiff", dpi = 300, width = 15, height = 5, bg="white")
ggsave(plot=p1_metaT, filename = "../figures/PCA_metatranscriptomics.tiff", dpi = 300, width = 6, height = 5, bg="white")
row_coord <- res_pca$ind$coord
invisible(rgl::open3d())
#rgl::bg3d("lightgray")
rgl::plot3d(x=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 1],
y=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 2],
z=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 3],
xlab = paste0(colnames(row_coord)[1] , " (" , substr(as.character(res_pca$eig[,2][1]), 1, 5) , "%)"),
ylab = paste0(colnames(row_coord)[2] , " (" , substr(as.character(res_pca$eig[,2][2]), 1, 5) , "%)"),
zlab = paste0(colnames(row_coord)[3] , " (" , substr(as.character(res_pca$eig[,2][3]), 1, 5) , "%)"),
xlim = c(min(row_coord[, 1]-10), max(row_coord[, 1])+10),
ylim = c(min(row_coord[, 2]-10), max(row_coord[, 2])+10),
zlim = c(min(row_coord[, 3]-10), max(row_coord[, 3])+10),
col = "darkcyan",
size=10,
main = "PCA Assemblage")
rgl::text3d(x=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 1]+5,
y=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 2]+5,
z=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 3]+5,
text=c("WN_TF_1", "WN_TF_2", "WN_TF_3"),
col = "darkcyan",
size=10)
rgl::points3d(x=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 1],
y=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 2],
z=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 3],
col = "darkorange",
size=10)
rgl::text3d(x=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 1]+5,
y=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 2]+5,
z=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 3]+5,
text=c("SD_TF_1", "SD_TF_2", "SD_TF_3"),
col = "darkorange",
size=10)
# Activate for or gif generation
par3d(windowRect = c(20, 30, 800, 800))
# rgl::movie3d(
# movie="3dAnimated_PCA_metaT_assemblage",
# spin3d( axis = c(0, 0, 1), rpm = 3),
# duration = 20,
# dir = "../figures",
# type = "gif",
# clean = TRUE, webshot=FALSE, fps=20)
par(mfrow = c(2,2))
res_pca_metaB_all_times <- PCA(t(metabolomics_all_times_clr), scale.unit = TRUE, graph = FALSE)
metabolomics_data <- t(metabolomics_all_times_clr)
metabolomics_tmp <- data.frame(metadata_table_metaB_all_times, stringsAsFactors = TRUE)
permanova <- adonis2(formula = as.formula(paste0("metabolomics_data~", "condition")),
data = metabolomics_tmp, method="euclidean",
permutations = 999, sqrt.dist = FALSE, add = FALSE, by = "terms")
pv = permanova$`Pr(>F)`[1]
rm(metabolomics_data, metabolomics_tmp)
pca_tmp_metaB <- rbind(res_pca_metaB_all_times$ind$coord,
res_pca_metaB_all_times$ind.sup$coord) %>% data.frame() %>%
rownames_to_column("sample")
pca_data_metaB <- left_join(pca_tmp_metaB, rownames_to_column(metadata_table_metaB_all_times), by=join_by("sample" == "rowname"))
rm(pca_tmp_metaB)
D1_text = paste0("Dim 1 (",
round(data.frame(res_pca_metaB_all_times$eig)$percentage.of.variance[1], digits=2),
"%)")
D2_text = paste0("Dim 2 (",
round(data.frame(res_pca_metaB_all_times$eig)$percentage.of.variance[2], digits=2),
"%)")
D3_text = paste0("Dim 3 (",
round(data.frame(res_pca_metaB_all_times$eig)$percentage.of.variance[3], digits=2),
"%)")
ggplot(pca_data_metaB) +
geom_point(aes(x=Dim.1, y=Dim.2, colour=color, ), size=3) +
scale_colour_identity("Condition", labels=c("Winter Night", "Summer Day"), guide = "legend") +
ggrepel::geom_text_repel(aes(x=Dim.1, y=Dim.2, colour=color, label=sample),
nudge_y = -2, seed = 42, segment.size=0.1, show.legend = FALSE) +
xlab(D1_text) +
ylab(D2_text) +
ggtitle("PCA meta-metabolomics") +
labs(caption = paste0("PERMANOVA on condition: p-value: ", pv)) +
theme(panel.background= element_rect(fill="#f4f4f4"),
plot.caption = element_text(size=7)) -> p1_metaB
ggplot(pca_data_metaB) +
geom_point(aes(x=Dim.1, y=Dim.3, colour=color), size=3) +
scale_colour_identity("Condition", labels=c("Winter Night", "Summer Day"), guide = "legend") +
ggrepel::geom_text_repel(aes(x=Dim.1, y=Dim.3, colour=color, label=sample),
nudge_y = -2, seed = 42, segment.size=0.1, show.legend = FALSE) +
xlab(D1_text) +
ylab(D3_text) +
ggtitle("PCA meta-metabolomics") +
labs(caption = paste0("PERMANOVA on condition: p-value: ", pv)) +
theme(panel.background= element_rect(fill="#f4f4f4"),
plot.caption = element_text(size=7)) -> p2_metaB
ggplot(pca_data_metaB) +
geom_point(aes(x=Dim.2, y=Dim.3, colour=color), size=3) +
scale_colour_identity("Condition", labels=c("Winter Night", "Summer Day"), guide = "legend") +
ggrepel::geom_text_repel(aes(x=Dim.2, y=Dim.3, colour=color, label=sample),
nudge_y = -2, seed = 42, segment.size=0.1, show.legend = FALSE) +
xlab(D2_text) +
ylab(D3_text) +
ggtitle("PCA meta-metabolomics") +
labs(caption = paste0("PERMANOVA on condition: p-value: ", pv)) +
theme(panel.background= element_rect(fill="#f4f4f4"),
plot.caption = element_text(size=7)) -> p3_metaB
plotgrid_PCA_metaB <- ggarrange(p1_metaB, p2_metaB, p3_metaB, labels = c("A", "B", "C"), common.legend=TRUE, legend="bottom", nrow=1)
print(plotgrid_PCA_metaB)
ggsave(plot = p1_metaB, filename = "../figures/PCA_metabolomics_all_times.tiff", dpi = 300, width = 5, height = 5, bg="white")
ggsave(plot = plotgrid_PCA_metaB, filename = "../figures/PCA_metabolomics_3_composantes_all_times.tiff", dpi = 300, width = 15, height = 5, bg="white")
ggsave(plot=p1_metaB, filename = "../figures/PCA_metabolomics_all_times.tiff", dpi = 300, width = 6, height = 5, bg="white")
row_coord <- res_pca_metaB_all_times$ind$coord
invisible(rgl::open3d())
#rgl::bg3d("lightgray")
rgl::plot3d(x=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 1],
y=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 2],
z=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 3],
xlab = paste0(colnames(row_coord)[1] , " (" , substr(as.character(res_pca$eig[,2][1]), 1, 5) , "%)"),
ylab = paste0(colnames(row_coord)[2] , " (" , substr(as.character(res_pca$eig[,2][2]), 1, 5) , "%)"),
zlab = paste0(colnames(row_coord)[3] , " (" , substr(as.character(res_pca$eig[,2][3]), 1, 5) , "%)"),
xlim = c(min(row_coord[, 1]-10), max(row_coord[, 1])+10),
ylim = c(min(row_coord[, 2]-10), max(row_coord[, 2])+10),
zlim = c(min(row_coord[, 3]-10), max(row_coord[, 3])+10),
col = "darkcyan",
size=10,
main = "PCA meta-metabolomics assemblage")
rgl::text3d(x=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 1]+5,
y=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 2]+5,
z=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 3]+5,
text=c("WN_TF_1", "WN_TF_2", "WN_TF_3"),
col = "darkcyan",
size=10)
rgl::points3d(x=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 1],
y=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 2],
z=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 3],
col = "darkorange",
size=10)
rgl::text3d(x=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 1]+5,
y=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 2]+5,
z=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 3]+5,
text=c("SD_TF_1", "SD_TF_2", "SD_TF_3"),
col = "darkorange",
size=10)
rgl::points3d(x=row_coord[c("WN_T0_1", "WN_T0_2", "WN_T0_3"), 1],
y=row_coord[c("WN_T0_1", "WN_T0_2", "WN_T0_3"), 2],
z=row_coord[c("WN_T0_1", "WN_T0_2", "WN_T0_3"), 3],
col = "darkcyan",
size=10)
rgl::text3d(x=row_coord[c("WN_T0_1", "WN_T0_2", "WN_T0_3"), 1]+5,
y=row_coord[c("WN_T0_1", "WN_T0_2", "WN_T0_3"), 2]+5,
z=row_coord[c("WN_T0_1", "WN_T0_2", "WN_T0_3"), 3]+5,
text=c("WN_T0_1", "WN_T0_2", "WN_T0_3"),
col = "darkcyan",
size=10)
rgl::points3d(x=row_coord[c("SD_T0_1", "SD_T0_2", "SD_T0_3"), 1],
y=row_coord[c("SD_T0_1", "SD_T0_2", "SD_T0_3"), 2],
z=row_coord[c("SD_T0_1", "SD_T0_2", "SD_T0_3"), 3],
col = "darkorange",
size=10)
rgl::text3d(x=row_coord[c("SD_T0_1", "SD_T0_2", "SD_T0_3"), 1]+5,
y=row_coord[c("SD_T0_1", "SD_T0_2", "SD_T0_3"), 2]+5,
z=row_coord[c("SD_T0_1", "SD_T0_2", "SD_T0_3"), 3]+5,
text=c("SD_T0_1", "SD_T0_2", "SD_T0_3"),
col = "darkorange",
size=10)
# Activate for or gif generation
# par3d(windowRect = c(20, 30, 800, 800))
#
# rgl::movie3d(
# movie="3dAnimated_PCA_metaB_assemblage",
# spin3d( axis = c(0, 0, 1), rpm = 3),
# duration = 20,
# dir = "../figures",
# type = "gif",
# clean = TRUE, webshot=FALSE, fps=20)
diohu_geneid <- annotation_table$Organism == "D.hungarica"
diohu_df <- counts_standard[startsWith(rownames_to_column(counts_standard)$rowname, "D.hungarica"),]
psesy_geneid <- annotation_table$Organism == "P.syringae"
psesy_df <- counts_standard[startsWith(rownames_to_column(counts_standard)$rowname, "P.syringae"),]
psegr_geneid <- annotation_table$Organism == "P.graminis"
psegr_df <- counts_standard[startsWith(rownames_to_column(counts_standard)$rowname, "P.graminis"),]
rhoen_geneid <- annotation_table$Organism == "R.enclensis"
rhoen_df <- counts_standard[startsWith(rownames_to_column(counts_standard)$rowname, "R.enclensis"),]
## remaining zeroes smoothing (as in Zhang et al. 2021)
min_val <- apply(diohu_df, 1, function(x) (min(x[x>0])/2))
for (gene in row.names(diohu_df)) {
diohu_df[gene,][diohu_df[gene,] == 0] <- min_val[gene]
}
min_val <- apply(psesy_df, 1, function(x) (min(x[x>0])/2))
for (gene in row.names(psesy_df)) {
psesy_df[gene,][psesy_df[gene,] == 0] <- min_val[gene]
}
min_val <- apply(psegr_df, 1, function(x) (min(x[x>0])/2))
for (gene in row.names(psegr_df)) {
psegr_df[gene,][psegr_df[gene,] == 0] <- min_val[gene]
}
min_val <- apply(rhoen_df, 1, function(x) (min(x[x>0])/2))
for (gene in row.names(rhoen_df)) {
rhoen_df[gene,][rhoen_df[gene,] == 0] <- min_val[gene]
}
diohu_clr <- diohu_df
psesy_clr <- psesy_df
psegr_clr <- psegr_df
rhoen_clr <- rhoen_df
for (samp in samples) {
diohu_clr[,samp] <- as.vector(clr(diohu_df[,samp]))
psesy_clr[,samp] <- as.vector(clr(psesy_df[,samp]))
psegr_clr[,samp] <- as.vector(clr(psegr_df[,samp]))
rhoen_clr[,samp] <- as.vector(clr(rhoen_df[,samp]))
}
par(mfrow=c(1,1))
hist(unlist(diohu_clr),
breaks = 200,
cex.axis = 0.7,
las = 1,
col = "skyblue",
ylim = c(0,1800),
xlim = c(-5,15),
xlab = "clr-transformed counts",
main = "Distribution of D. hungarica clr-transformed counts")
par(mfrow=c(1,1))
hist(unlist(psegr_clr),
breaks = 200,
cex.axis = 0.7,
las = 1,
col = "skyblue",
ylim = c(0,1800),
xlim = c(-5,15),
xlab = "clr-transformed counts",
main = "Distribution of P. graminis clr-transformed counts")
Very few P. graminis counts, probably not exploitable.
boxplot(diohu_clr,
main = "D. hungarica clr-transformed expression",
horizontal = TRUE,
col = metadata_table$color,
cex = 0.5,
cex.axis = 0.8,
las = 1)
boxplot(psegr_clr,
main = "P. graminis clr-transformed expression",
horizontal = TRUE,
col = metadata_table$color,
cex = 0.5,
cex.axis = 0.8,
las = 1)
counts_data <- t(diohu_clr)
metadata_tmp <- data.frame(metadata_table, stringsAsFactors = TRUE)
permanova <- adonis2(formula = as.formula(paste0("counts_data~", "condition")),
data = metadata_tmp, method="euclidean", # method="bray", #otu_data~TIME
permutations = 999, sqrt.dist = FALSE, add = FALSE, by = "terms")
pv = permanova$`Pr(>F)`[1]
rm(counts_data, metadata_tmp)
pca_tmp <- rbind(res_pca$ind$coord,
res_pca$ind.sup$coord) %>% data.frame() %>%
rownames_to_column("sample")
pca_data <- left_join(pca_tmp, rownames_to_column(metadata_table), by=join_by("sample" == "rowname"))
rm(pca_tmp)
D1_text = paste0("Dim 1 (",
round(data.frame(res_pca$eig)$percentage.of.variance[1], digits=2),
"%)")
D2_text = paste0("Dim 2 (",
round(data.frame(res_pca$eig)$percentage.of.variance[2], digits=2),
"%)")
D3_text = paste0("Dim 3 (",
round(data.frame(res_pca$eig)$percentage.of.variance[3], digits=2),
"%)")
ggplot(pca_data) +
geom_point(aes(x=Dim.1, y=Dim.2, colour=color, ), size=3) +
scale_colour_identity("Condition", labels=c("Winter Night", "Summer Day"), guide = "legend") +
ggrepel::geom_text_repel(aes(x=Dim.1, y=Dim.2, colour=color, label=sample),
nudge_y = -2, seed = 42, segment.size=0.1, show.legend = FALSE) +
xlab(D1_text) +
ylab(D2_text) +
ggtitle("PCA metatranscriptomics \nD. hungarica") +
labs(caption = paste0("PERMANOVA on condition: p-value: ", pv)) +
theme(panel.background= element_rect(fill="#f4f4f4"),
plot.caption = element_text(size=7)) -> p1
ggplot(pca_data) +
geom_point(aes(x=Dim.1, y=Dim.3, colour=color), size=3) +
scale_colour_identity("Condition", labels=c("Winter Night", "Summer Day"), guide = "legend") +
ggrepel::geom_text_repel(aes(x=Dim.1, y=Dim.3, colour=color, label=sample),
nudge_y = -2, seed = 42, segment.size=0.1, show.legend = FALSE) +
xlab(D1_text) +
ylab(D3_text) +
ggtitle("PCA metatranscriptomics \nD. hungarica") +
labs(caption = paste0("PERMANOVA on condition: p-value: ", pv)) +
theme(panel.background= element_rect(fill="#f4f4f4"),
plot.caption = element_text(size=7)) -> p2
ggplot(pca_data) +
geom_point(aes(x=Dim.2, y=Dim.3, colour=color), size=3) +
scale_colour_identity("Condition", labels=c("Winter Night", "Summer Day"), guide = "legend") +
ggrepel::geom_text_repel(aes(x=Dim.2, y=Dim.3, colour=color, label=sample),
nudge_y = -2, seed = 42, segment.size=0.1, show.legend = FALSE) +
xlab(D2_text) +
ylab(D3_text) +
ggtitle("PCA metatranscriptomics \nD. hungarica") +
labs(caption = paste0("PERMANOVA on condition: p-value: ", pv)) +
theme(panel.background= element_rect(fill="#f4f4f4"),
plot.caption = element_text(size=7)) -> p3
plotgrid_PCA <- ggarrange(p1, p2, p3, labels = c("A", "B", "C"), common.legend=TRUE, legend="bottom", nrow=1)
print(plotgrid_PCA)
ggsave(plot = plotgrid_PCA, filename = "../figures/PCA_metatranscriptomics_3_composantes_diohu.tiff", dpi = 300, width = 15, height = 5, bg="white")
ggsave(plot=p1, filename = "../figures/PCA_metatranscriptomics_diohu.tiff", dpi = 300, width = 6, height = 5, bg="white")
row_coord <- res_pca$ind$coord
invisible(rgl::open3d())
#rgl::bg3d("lightgray")
rgl::plot3d(x=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 1],
y=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 2],
z=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 3],
xlab = paste0(colnames(row_coord)[1] , " (" , substr(as.character(res_pca$eig[,2][1]), 1, 5) , "%)"),
ylab = paste0(colnames(row_coord)[2] , " (" , substr(as.character(res_pca$eig[,2][2]), 1, 5) , "%)"),
zlab = paste0(colnames(row_coord)[3] , " (" , substr(as.character(res_pca$eig[,2][3]), 1, 5) , "%)"),
xlim = c(min(row_coord[, 1]), max(row_coord[, 1])+10),
ylim = c(min(row_coord[, 2]), max(row_coord[, 2])+10),
zlim = c(min(row_coord[, 3]), max(row_coord[, 3])+10),
col = "darkcyan",
size=10,
main = "PCA D. hungarica")
rgl::text3d(x=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 1]+5,
y=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 2]+5,
z=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 3]+5,
text=c("WN_TF_1", "WN_TF_2", "WN_TF_3"),
col = "darkcyan",
size=10)
rgl::points3d(x=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 1],
y=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 2],
z=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 3],
col = "darkorange",
size=10)
rgl::text3d(x=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 1]+5,
y=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 2]+5,
z=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 3]+5,
text=c("SD_TF_1", "SD_TF_2", "SD_TF_3"),
col = "darkorange",
size=10)
## Activate for or gif generation
#par3d(windowRect = c(20, 30, 800, 800))
#rgl::movie3d(
# movie="3DAnimated_PCA_diohu",
# spin3d( axis = c(0, 0, 1), rpm = 3),
# duration = 20,
# dir = "../figures",
# type = "gif",
# clean = TRUE, webshot=FALSE, fps=10)
counts_data <- t(psegr_clr)
metadata_tmp <- data.frame(metadata_table, stringsAsFactors = TRUE)
permanova <- adonis2(formula = as.formula(paste0("counts_data~", "condition")),
data = metadata_tmp, method="euclidean", # method="bray", #otu_data~TIME
permutations = 999, sqrt.dist = FALSE, add = FALSE, by = "terms")
pv = permanova$`Pr(>F)`[1]
# print(pv)
rm(counts_data, metadata_tmp)
pca_tmp <- rbind(res_pca$ind$coord,
res_pca$ind.sup$coord) %>% data.frame() %>%
rownames_to_column("sample")
pca_data <- left_join(pca_tmp, rownames_to_column(metadata_table), by=join_by("sample" == "rowname"))
rm(pca_tmp)
D1_text = paste0("Dim 1 (",
round(data.frame(res_pca$eig)$percentage.of.variance[1], digits=2),
"%)")
D2_text = paste0("Dim 2 (",
round(data.frame(res_pca$eig)$percentage.of.variance[2], digits=2),
"%)")
D3_text = paste0("Dim 3 (",
round(data.frame(res_pca$eig)$percentage.of.variance[3], digits=2),
"%)")
ggplot(pca_data) +
geom_point(aes(x=Dim.1, y=Dim.2, colour=color, ), size=3) +
scale_colour_identity("Condition", labels=c("Winter Night", "Summer Day"), guide = "legend") +
ggrepel::geom_text_repel(aes(x=Dim.1, y=Dim.2, colour=color, label=sample),
nudge_y = -2, seed = 42, segment.size=0.1, show.legend = FALSE) +
xlab(D1_text) +
ylab(D2_text) +
ggtitle("PCA metatranscriptomics \nP. graminis") +
labs(caption = paste0("PERMANOVA on condition: p-value: ", pv)) +
theme(panel.background= element_rect(fill="#f4f4f4"),
plot.caption = element_text(size=7)) -> p1
ggplot(pca_data) +
geom_point(aes(x=Dim.1, y=Dim.3, colour=color), size=3) +
scale_colour_identity("Condition", labels=c("Winter Night", "Summer Day"), guide = "legend") +
ggrepel::geom_text_repel(aes(x=Dim.1, y=Dim.3, colour=color, label=sample),
nudge_y = -2, seed = 42, segment.size=0.1, show.legend = FALSE) +
xlab(D1_text) +
ylab(D3_text) +
ggtitle("PCA metatranscriptomics \nP. graminis") +
labs(caption = paste0("PERMANOVA on condition: p-value: ", pv)) +
theme(panel.background= element_rect(fill="#f4f4f4"),
plot.caption = element_text(size=7)) -> p2
ggplot(pca_data) +
geom_point(aes(x=Dim.2, y=Dim.3, colour=color), size=3) +
scale_colour_identity("Condition", labels=c("Winter Night", "Summer Day"), guide = "legend") +
ggrepel::geom_text_repel(aes(x=Dim.2, y=Dim.3, colour=color, label=sample),
nudge_y = -2, seed = 42, segment.size=0.1, show.legend = FALSE) +
xlab(D2_text) +
ylab(D3_text) +
ggtitle("PCA metatranscriptomics \nP. graminis") +
labs(caption = paste0("PERMANOVA on condition: p-value: ", pv)) +
theme(panel.background= element_rect(fill="#f4f4f4"),
plot.caption = element_text(size=7)) -> p3
plotgrid_PCA <- ggarrange(p1, p2, p3, labels = c("A", "B", "C"), common.legend=TRUE, legend="bottom", nrow=1)
print(plotgrid_PCA)
ggsave(plot = plotgrid_PCA, filename = "../figures/PCA_metatranscriptomics_3_composantes_psegr.tiff", dpi = 300, width = 15, height = 5, bg="white")
ggsave(plot=p1, filename = "../figures/PCA_metatranscriptomics_psegr.tiff", dpi = 300, width = 6, height = 5, bg="white")
row_coord <- res_pca$ind$coord
invisible(rgl::open3d())
#rgl::bg3d("lightgray")
rgl::plot3d(x=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 1],
y=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 2],
z=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 3],
xlab = paste0(colnames(row_coord)[1] , " (" , substr(as.character(res_pca$eig[,2][1]), 1, 5) , "%)"),
ylab = paste0(colnames(row_coord)[2] , " (" , substr(as.character(res_pca$eig[,2][2]), 1, 5) , "%)"),
zlab = paste0(colnames(row_coord)[3] , " (" , substr(as.character(res_pca$eig[,2][3]), 1, 5) , "%)"),
xlim = c(min(row_coord[, 1]), max(row_coord[, 1])+10),
ylim = c(min(row_coord[, 2]), max(row_coord[, 2])+10),
zlim = c(min(row_coord[, 3]), max(row_coord[, 3])+10),
col = "darkcyan",
size=10,
main = "PCA P. graminis")
rgl::text3d(x=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 1]+5,
y=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 2]+5,
z=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 3]+5,
text=c("WN_TF_1", "WN_TF_2", "WN_TF_3"),
col = "darkcyan",
size=10)
rgl::points3d(x=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 1],
y=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 2],
z=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 3],
col = "darkorange",
size=10)
rgl::text3d(x=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 1]+3,
y=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 2]+3,
z=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 3]+3,
text=c("SD_TF_1", "SD_TF_2", "SD_TF_3"),
col = "darkorange",
size=10)
## Activate for or gif generation
#par3d(windowRect = c(20, 30, 800, 800))
#rgl::movie3d(
# movie="3DAnimated_PCA_psegr",
# spin3d( axis = c(0, 0, 1), rpm = 3),
# duration = 20,
# dir = "../figures",
# type = "gif",
# clean = TRUE, webshot=FALSE, fps=10)
counts_data <- t(psesy_clr)
metadata_tmp <- data.frame(metadata_table, stringsAsFactors = TRUE)
permanova <- adonis2(formula = as.formula(paste0("counts_data~", "condition")),
data = metadata_tmp, method="euclidean", # method="bray", #otu_data~TIME
permutations = 999, sqrt.dist = FALSE, add = FALSE, by = "terms")
pv = permanova$`Pr(>F)`[1]
# print(pv)
rm(counts_data, metadata_tmp)
pca_tmp <- rbind(res_pca$ind$coord,
res_pca$ind.sup$coord) %>% data.frame() %>%
rownames_to_column("sample")
pca_data <- left_join(pca_tmp, rownames_to_column(metadata_table), by=join_by("sample" == "rowname"))
rm(pca_tmp)
D1_text = paste0("Dim 1 (",
round(data.frame(res_pca$eig)$percentage.of.variance[1], digits=2),
"%)")
D2_text = paste0("Dim 2 (",
round(data.frame(res_pca$eig)$percentage.of.variance[2], digits=2),
"%)")
D3_text = paste0("Dim 3 (",
round(data.frame(res_pca$eig)$percentage.of.variance[3], digits=2),
"%)")
ggplot(pca_data) +
geom_point(aes(x=Dim.1, y=Dim.2, colour=color, ), size=3) +
scale_colour_identity("Condition", labels=c("Winter Night", "Summer Day"), guide = "legend") +
ggrepel::geom_text_repel(aes(x=Dim.1, y=Dim.2, colour=color, label=sample),
nudge_y = -2, seed = 42, segment.size=0.1, show.legend = FALSE) +
xlab(D1_text) +
ylab(D2_text) +
ggtitle("PCA metatranscriptomics \nP. syringae") +
labs(caption = paste0("PERMANOVA on condition: p-value: ", pv)) +
theme(panel.background= element_rect(fill="#f4f4f4"),
plot.caption = element_text(size=7)) -> p1
ggplot(pca_data) +
geom_point(aes(x=Dim.1, y=Dim.3, colour=color), size=3) +
scale_colour_identity("Condition", labels=c("Winter Night", "Summer Day"), guide = "legend") +
ggrepel::geom_text_repel(aes(x=Dim.1, y=Dim.3, colour=color, label=sample),
nudge_y = -2, seed = 42, segment.size=0.1, show.legend = FALSE) +
xlab(D1_text) +
ylab(D3_text) +
ggtitle("PCA metatranscriptomics \nP. syringae") +
labs(caption = paste0("PERMANOVA on condition: p-value: ", pv)) +
theme(panel.background= element_rect(fill="#f4f4f4"),
plot.caption = element_text(size=7)) -> p2
ggplot(pca_data) +
geom_point(aes(x=Dim.2, y=Dim.3, colour=color), size=3) +
scale_colour_identity("Condition", labels=c("Winter Night", "Summer Day"), guide = "legend") +
ggrepel::geom_text_repel(aes(x=Dim.2, y=Dim.3, colour=color, label=sample),
nudge_y = -2, seed = 42, segment.size=0.1, show.legend = FALSE) +
xlab(D2_text) +
ylab(D3_text) +
ggtitle("PCA metatranscriptomics \nP. syringae") +
labs(caption = paste0("PERMANOVA on condition: p-value: ", pv)) +
theme(panel.background= element_rect(fill="#f4f4f4"),
plot.caption = element_text(size=7)) -> p3
plotgrid_PCA <- ggarrange(p1, p2, p3, labels = c("A", "B", "C"), common.legend=TRUE, legend="bottom", nrow=1)
print(plotgrid_PCA)
ggsave(plot = plotgrid_PCA, filename = "../figures/PCA_metatranscriptomics_3_composantes_psesy.tiff", dpi = 300, width = 15, height = 5, bg="white")
ggsave(plot=p1, filename = "../figures/PCA_metatranscriptomics_psesy.tiff", dpi = 300, width = 6, height = 5, bg="white")
row_coord <- res_pca$ind$coord
invisible(rgl::open3d())
#rgl::bg3d("lightgray")
rgl::plot3d(x=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 1],
y=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 2],
z=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 3],
xlab = paste0(colnames(row_coord)[1] , " (" , substr(as.character(res_pca$eig[,2][1]), 1, 5) , "%)"),
ylab = paste0(colnames(row_coord)[2] , " (" , substr(as.character(res_pca$eig[,2][2]), 1, 5) , "%)"),
zlab = paste0(colnames(row_coord)[3] , " (" , substr(as.character(res_pca$eig[,2][3]), 1, 5) , "%)"),
xlim = c(min(row_coord[, 1]), max(row_coord[, 1])+10),
ylim = c(min(row_coord[, 2]), max(row_coord[, 2])+10),
zlim = c(min(row_coord[, 3]), max(row_coord[, 3])+10),
col = "darkcyan",
size=10,
main = "PCA P. syringae")
rgl::text3d(x=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 1]+3,
y=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 2]+3,
z=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 3]+3,
text=c("WN_TF_1", "WN_TF_2", "WN_TF_3"),
col = "darkcyan",
size=10)
rgl::points3d(x=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 1],
y=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 2],
z=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 3],
col = "darkorange",
size=10)
rgl::text3d(x=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 1]+3,
y=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 2]+3,
z=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 3]+3,
text=c("SD_TF_1", "SD_TF_2", "SD_TF_3"),
col = "darkorange",
size=10)
## Activate for or gif generation
#par3d(windowRect = c(20, 30, 800, 800))
#rgl::movie3d(
# movie="3DAnimated_PCA_psesy",
# spin3d( axis = c(0, 0, 1), rpm = 3),
# duration = 20,
# dir = "../figures",
# type = "gif",
# clean = TRUE, webshot=FALSE, fps=10)
counts_data <- t(rhoen_clr)
metadata_tmp <- data.frame(metadata_table, stringsAsFactors = TRUE)
permanova <- adonis2(formula = as.formula(paste0("counts_data~", "condition")),
data = metadata_tmp, method="euclidean", # method="bray", #otu_data~TIME
permutations = 999, sqrt.dist = FALSE, add = FALSE, by = "terms")
pv = permanova$`Pr(>F)`[1]
# print(pv)
rm(counts_data, metadata_tmp)
pca_tmp <- rbind(res_pca$ind$coord,
res_pca$ind.sup$coord) %>% data.frame() %>%
rownames_to_column("sample")
pca_data <- left_join(pca_tmp, rownames_to_column(metadata_table), by=join_by("sample" == "rowname"))
rm(pca_tmp)
D1_text = paste0("Dim 1 (",
round(data.frame(res_pca$eig)$percentage.of.variance[1], digits=2),
"%)")
D2_text = paste0("Dim 2 (",
round(data.frame(res_pca$eig)$percentage.of.variance[2], digits=2),
"%)")
D3_text = paste0("Dim 3 (",
round(data.frame(res_pca$eig)$percentage.of.variance[3], digits=2),
"%)")
ggplot(pca_data) +
geom_point(aes(x=Dim.1, y=Dim.2, colour=color, ), size=3) +
scale_colour_identity("Condition", labels=c("Winter Night", "Summer Day"), guide = "legend") +
ggrepel::geom_text_repel(aes(x=Dim.1, y=Dim.2, colour=color, label=sample),
nudge_y = -2, seed = 42, segment.size=0.1, show.legend = FALSE) +
xlab(D1_text) +
ylab(D2_text) +
ggtitle("PCA metatranscriptomics \nR. enclensis") +
labs(caption = paste0("PERMANOVA on condition: p-value: ", pv)) +
theme(panel.background= element_rect(fill="#f4f4f4"),
plot.caption = element_text(size=7)) -> p1
ggplot(pca_data) +
geom_point(aes(x=Dim.1, y=Dim.3, colour=color), size=3) +
scale_colour_identity("Condition", labels=c("Winter Night", "Summer Day"), guide = "legend") +
ggrepel::geom_text_repel(aes(x=Dim.1, y=Dim.3, colour=color, label=sample),
nudge_y = -2, seed = 42, segment.size=0.1, show.legend = FALSE) +
xlab(D1_text) +
ylab(D3_text) +
ggtitle("PCA metatranscriptomics \nR. enclensis") +
labs(caption = paste0("PERMANOVA on condition: p-value: ", pv)) +
theme(panel.background= element_rect(fill="#f4f4f4"),
plot.caption = element_text(size=7)) -> p2
ggplot(pca_data) +
geom_point(aes(x=Dim.2, y=Dim.3, colour=color), size=3) +
scale_colour_identity("Condition", labels=c("Winter Night", "Summer Day"), guide = "legend") +
ggrepel::geom_text_repel(aes(x=Dim.2, y=Dim.3, colour=color, label=sample),
nudge_y = -2, seed = 42, segment.size=0.1, show.legend = FALSE) +
xlab(D2_text) +
ylab(D3_text) +
ggtitle("PCA metatranscriptomics \nR. enclensis") +
labs(caption = paste0("PERMANOVA on condition: p-value: ", pv)) +
theme(panel.background= element_rect(fill="#f4f4f4"),
plot.caption = element_text(size=7)) -> p3
plotgrid_PCA <- ggarrange(p1, p2, p3, labels = c("A", "B", "C"), common.legend=TRUE, legend="bottom", nrow=1)
print(plotgrid_PCA)
ggsave(plot = plotgrid_PCA, filename = "../figures/PCA_metatranscriptomics_3_composantes_rhoen.tiff", dpi = 300, width = 15, height = 5, bg="white")
ggsave(plot=p1, filename = "../figures/PCA_metatranscriptomics_rhoen.tiff", dpi = 300, width = 6, height = 5, bg="white")
row_coord <- res_pca$ind$coord
invisible(rgl::open3d())
#rgl::bg3d("lightgray")
rgl::plot3d(x=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 1],
y=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 2],
z=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 3],
xlab = paste0(colnames(row_coord)[1] , " (" , substr(as.character(res_pca$eig[,2][1]), 1, 5) , "%)"),
ylab = paste0(colnames(row_coord)[2] , " (" , substr(as.character(res_pca$eig[,2][2]), 1, 5) , "%)"),
zlab = paste0(colnames(row_coord)[3] , " (" , substr(as.character(res_pca$eig[,2][3]), 1, 5) , "%)"),
xlim = c(min(row_coord[, 1]), max(row_coord[, 1])+10),
ylim = c(min(row_coord[, 2]), max(row_coord[, 2])+10),
zlim = c(min(row_coord[, 3]), max(row_coord[, 3])+10),
col = "darkcyan",
size=10,
main = "PCA R. enclensis")
rgl::text3d(x=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 1]+3,
y=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 2]+3,
z=row_coord[c("WN_TF_1", "WN_TF_2", "WN_TF_3"), 3]+3,
text=c("WN_TF_1", "WN_TF_2", "WN_TF_3"),
col = "darkcyan",
size=10)
rgl::points3d(x=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 1],
y=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 2],
z=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 3],
col = "darkorange",
size=10)
rgl::text3d(x=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 1]+3,
y=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 2]+3,
z=row_coord[c("SD_TF_1", "SD_TF_2", "SD_TF_3"), 3]+3,
text=c("SD_TF_1", "SD_TF_2", "SD_TF_3"),
col = "darkorange",
size=10)
## Activate for or gif generation
#par3d(windowRect = c(20, 30, 800, 800))
#rgl::movie3d(
# movie="3DAnimated_PCA_rhoen",
# spin3d( axis = c(0, 0, 1), rpm = 3),
# duration = 20,
# dir = "../figures",
# type = "gif",
# clean = TRUE, webshot=FALSE, fps=10)
#### Sample distances ####
message("Computing inter-sample distances")
## Pearson
dist_pearson_diohu <- as.dist(1 - cor(diohu_clr, use = "everything",
method = "pearson"))
par(bg = "white", mfrow=c(1, 1))
plotColoredClusters(tree_pearson_diohu, labs = row.names(metadata_table),
ylab = NA, xlab = NA, cex = 1, las = 1,
cols = metadata_table$color, col = "black",
main = "Samples Pearson distance hierarchical clustering,
complete linkage, D. hungarica genes.")
Natural separation of samples according to their incubation condition for D. hungarica.
#### Sample distances ####
message("Computing inter-sample distances")
## Pearson
dist_pearson_psegr <- as.dist(1 - cor(psegr_clr, use = "everything",
method = "pearson"))
par(bg = "white", mfrow=c(1, 1))
plotColoredClusters(tree_pearson_psegr, labs = row.names(metadata_table),
ylab = NA, xlab = NA, cex = 1, las = 1,
cols = metadata_table$color, col = "black",
main = "Samples Pearson distance hierarchical clustering,
complete linkage, P. graminis genes.")
For P. graminis, the clustering does not distinguish between the two incubation conditions, probably because of the very low amounts of counts attributed to this species.
#### Sample distances ####
message("Computing inter-sample distances")
## Pearson
dist_pearson_psesy <- as.dist(1 - cor(psesy_clr, use = "everything",
method = "pearson"))
par(bg = "white", mfrow=c(1, 1))
plotColoredClusters(tree_pearson_psesy, labs = row.names(metadata_table),
ylab = NA, xlab = NA, cex = 1, las = 1,
cols = metadata_table$color, col = "black",
main = "Samples Pearson distance hierarchical clustering,
complete linkage, P. syringae genes.")
Samples are separated according to their experimental condition, which is encouraging for further analysis of P. syringae.
#### Sample distances ####
message("Computing inter-sample distances")
## Pearson
dist_pearson_rhoen <- as.dist(1 - cor(rhoen_clr, use = "everything",
method = "pearson"))
par(bg = "white", mfrow=c(1, 1))
plotColoredClusters(tree_pearson_rhoen, labs = row.names(metadata_table),
ylab = NA, xlab = NA, cex = 1, las = 1,
cols = metadata_table$color, col = "black",
main = "Samples Pearson distance hierarchical clustering,
complete linkage, R. enclensis genes.")
Again, samples are naturally separated according to their incubation condition for R. enclensis.
MTXmodel was used on the complete assemblage at once. As input, filtered data (without the undetected genes) but untransformed was given to MTXmodel, as it performs its own CLR normalisation.
analysis_method = 'LM'
correction = 'BH'
,
fit_data <- MTXmodel(
counts_standard, metadata_table, 'MTXmodel_output',
cores = 2,
fixed_effects = c('temperature'),
reference = c("temperature,5"),
min_abundance = 0,
min_prevalence = 0,
normalization = 'CLR',
analysis_method = 'LM',
correction = 'BH',
standardize = FALSE,
transform = 'NONE',
plot_scatter = FALSE,
plot_heatmap = TRUE)
# Loading MTXmodel results
res_mtx <- read.csv("../scripts/MTXmodel_output/all_results.tsv", sep="\t", row.names="feature")
tryCatch({
inner_join(rownames_to_column(res_mtx), annotation_table_fig, by=c("rowname" = "gene")) -> res_mtx_fig
res_mtx_fig %>% filter(abs(qval) <= 0.2) -> res_mtx_fig
},
error=function(e){str(e)
}
)
## Data for final table
annotation_table_long <- read.csv("../data/annotations_final_community_long2.tsv", sep="\t", row.names = "Geneid")
tryCatch({
annotation_table_long$gene <- row.names(annotation_table_long)
inner_join(rownames_to_column(res_mtx), annotation_table_long, by=c("rowname" = "gene")) -> res_mtx_filt
res_mtx_filt %>% filter(abs(qval) <= 0.2) -> res_mtx_filt
},
error=function(e){str(e)
}
)
As a complementary approach, separate differential analyses were conducted for each species separately with DESeq2.
We use filtered counts data (only genes detected in at least 70% of biological samples). Zeroes are conserved as is. We round the counts matrix before performing DESeq2 analysis.
tryCatch({
res_df %>% inner_join(annotation_table_fig, by="gene") -> tmp_res_df
tmp_res_df %>% filter(padj <= 0.2) -> res_deseq_fig
},
error=function(e){str(e)
}
)
rm(tmp_res_df)
## data for final table
annotation_table_long <- read.csv("../data/annotations_final_community_long2.tsv", sep="\t", row.names = "Geneid")
tryCatch({
annotation_table_long$gene <- row.names(annotation_table_long)
res_df %>% inner_join(annotation_table_long, by="gene") -> res_df
res_df %>% filter(padj <= 0.2) -> res_deseq_filt # & abs(log2FoldChange) >= 1
},
error=function(e){str(e)
}
)
temp_df <- right_join(rownames_to_column(res_mtx), annotation_table_long, join_by("rowname"=="gene"))
rename(temp_df, "gene" = "rowname") -> temp_df
all_genes <- full_join(res_df, temp_df, by=c("gene", "locus_tag", "transcriptId", "Organism",
"Chr", "Start", "End", "Strand", "Length",
"product", "COG_process", "COG_category",
"COGid", "GOs", "COG_cat", "COG_category_long",
"ecNum" ),
suffix = c("", "w"))
rm(temp_df)
columns_to_remove <- grep(".w", names(all_genes))
all_genes %>% dplyr::select(-columns_to_remove) %>% filter(qval<=0.2 | padj<=0.2 ) -> all_genes_filtered
write.table(all_genes_filtered, "../results/DEG_all_methods_community.tsv", sep='\t', row.names = FALSE)
write.table(res_df, "../results/DEG_DESeq2_all_community.tsv", sep='\t', row.names = FALSE)
sign_deseq2_df <- res_df
#### Both methods DEGs
full_join(all_genes_filtered, annotation_table_fig, by="gene") -> data
data$title <- "Differentially expressed genes SD vs WN by strain (DESeq2 & MTXmodel)"
data$Organism <- factor(data$organism, levels=c("D. hungarica", "P. graminis", "P. syringae", "R. enclensis"))
custom_strips <- strip_nested(background_x = elem_list_rect(fill = c("lightgrey",
species_colours[["D.hungarica"]],
species_colours[["P.syringae"]],
species_colours[["R.enclensis"]])),
text_x = list(element_text(face = "plain", colour = "black", size = 17),
element_text(face = "italic", colour = "white", size = 15),
element_text(face = "italic", colour = "white", size = 15),
element_text(face = "italic", colour = "white", size = 15)),
by_layer_x = FALSE)
ggplot(data, aes(x = coef, y = COG_category.y, color = after_scale(alpha(fill, 0.3)), fill=COG_category_long.y, alpha=0.7, label=name_figure)) +
geom_point(aes(alpha=0.7), position="dodge") +
geom_violin(aes(alpha=0.3), show.legend = TRUE) +
annotate("rect", xmin=-Inf, xmax=0, ymin=-Inf, ymax=Inf, fill="#184ca5", alpha=0.1) +
annotate("rect", xmin=0, xmax=Inf, ymin=-Inf, ymax=Inf, fill="gold", alpha=0.1) +
geom_vline(xintercept = 0, linetype="dashed") +
ylab("COG category") +
xlab("MTXmodel coefficient") +
ggrepel::geom_text_repel(nudge_y = 0.5, segment.size=0.1, seed = 42) +
guides(fill=guide_legend(ncol=4), alpha="none", color="none") + #color="none",
scale_y_discrete(limits=rev(names(vect_COG_category_long))) +
scale_colour_manual(limits=vect_COG_category_long, values=COG_colours, drop=FALSE) +
scale_fill_manual(limits=vect_COG_category_long, values=COG_colours, drop=FALSE) +
theme(axis.title = element_text(size=13, "Differentially expressed genes SD vs WN"),
axis.text = element_text(size=13),
strip.text.y = element_text(size = 17),
#strip.text.x = element_text(size = 17, face = "italic"),
legend.text = element_text(size = 12),
legend.title = element_blank(),
legend.key.size = unit(0.5, "line"),
legend.position = "bottom",
panel.background = element_rect(fill = "#f4f4f4")) +
facet_nested(~title + Organism, strip = custom_strips, drop=TRUE) -> plot_degs
# print(plot_degs)
#
# ggsave(plot = plot_degs, filename = "../figures/DEG_day_vs_night_commu_both_methods_vertical.tiff", dpi = 300, width = 13, height = 13, bg="white")
columns_to_remove <- grep(".w", names(all_genes))
all_genes %>% dplyr::select(-columns_to_remove) %>% filter(!is.na(coef)) -> all_genes_unfiltered
full_join(all_genes_unfiltered, annotation_table_long, by="gene") %>% filter(!is.na(coef)) -> data
data[data$COG_category_long.y == "S: Function Unknown", "COG_category_long.y"] <- "S: Function unknown"
data$title <- "All detected genes"
data$subtitle <- "Assemblage"
data$Organism <- factor(data$organism, levels=c("D. hungarica", "P. graminis", "P. syringae", "R. enclensis"))
data$COG_category_fig <- substr(data$COG_category_long.y, 0, 1)
custom_strips <- strip_nested(background_x = elem_list_rect(fill = c("lightgrey", "black")),
text_x = list(element_text(face = "plain", colour = "black", size = 17),
element_text(face = "italic", colour = "white", size = 15)),
by_layer_x = FALSE)
custom_strips_strains <- strip_nested(background_x = elem_list_rect(fill = c("lightgrey",
species_colours[["D.hungarica"]],
species_colours[["P.syringae"]],
species_colours[["R.enclensis"]])),
text_x = list(element_text(face = "plain", colour = "black", size = 17),
element_text(face = "italic", colour = "white", size = 15),
element_text(face = "italic", colour = "white", size = 15),
element_text(face = "italic", colour = "white", size = 15)),
by_layer_x = FALSE)
ggplot(data, aes(x = coef, y = COG_category_fig,color = after_scale(alpha(fill, 0.3)), fill=COG_category_long.y, alpha=0.9)) + #label=name_figure
# geom_point(aes(alpha=0.7)) + #, position="dodge"
geom_violin(aes(alpha=0.3), show.legend = TRUE) +
geom_boxplot(outlier.colour = "black", width=0.2, color="white", outlier.alpha = 0.4, alpha=0.2, show.legend = FALSE) +
annotate("rect", xmin=-Inf, xmax=0, ymin=-Inf, ymax=Inf, fill="#184ca5", alpha=0.1) +
annotate("rect", xmin=0, xmax=Inf, ymin=-Inf, ymax=Inf, fill="gold", alpha=0.1) +
geom_vline(xintercept = 0, linetype="dashed") +
ylab("COG category") +
xlab("MTXmodel coefficient") +
# ggrepel::geom_text_repel(nudge_y = 0.5, segment.size=0.1, seed = 42) +
guides(fill=guide_legend(ncol=4), alpha="none", color="none") + #color="none",
scale_y_discrete(limits=rev(names(vect_COG_category_long))) +
scale_colour_manual(limits=vect_COG_category_long, values=COG_colours, drop=FALSE) +
scale_fill_manual(limits=vect_COG_category_long, values=COG_colours, drop=FALSE) +
theme(axis.title = element_text(size=13, "Expression coefficient (in SD vs WN) of all assemblage detected genes"),
axis.text = element_text(size=13),
strip.text.y = element_text(size = 17),
#strip.text.x = element_text(size = 17, face = "italic"),
legend.text = element_text(size = 12),
legend.title = element_blank(),
legend.key.size = unit(0.5, "line"),
legend.position = "bottom",
panel.background = element_rect(fill = "#f4f4f4")) -> p
p + facet_nested(~title + subtitle, strip = custom_strips, drop=TRUE) -> plot_assemblage
p + facet_nested(~title + Organism, strip = custom_strips_strains, drop=TRUE) +
guides(fill=guide_legend(ncol=3), alpha="none", color="none") +
theme(legend.text = element_text(size = 10))-> plot_strains
# print(plot_assemblage)
#
ggsave(plot = plot_assemblage, filename = "../figures/expressed_genes_MTX_coeff_assemblage.tiff", dpi = 300, width = 13/3, height = 13, bg="white")
ggsave(plot = plot_strains, filename = "../figures/expressed_genes_MTX_coeff_strains.tiff", dpi = 300, width = 13, height = 13, bg="white")
plot_all <- ggarrange(plot_assemblage, plot_degs,
labels = c("A", "B"),
common.legend=TRUE,
legend="bottom",
nrow=1,
widths = c(1, 3))
print(plot_all)
ggsave(plot = plot_all, filename = "../figures/Figure_4.tiff", dpi = 300, width = 13+4.35, height = 13, bg="white")
print(plot_strains)
list_venn <- list(res_deseq_filt[(res_deseq_filt$SAMPLE_COMPARISON == "17_VS_5"), "gene"],
res_mtx_filt[, "rowname"])
invisible(grid.newpage())
draw.pairwise.venn(area1 = length(list_venn[[1]]),
area2 = length(list_venn[[2]]),
cross.area = length(intersect.Vector(list_venn[[1]], list_venn[[2]])),
fill = c("#D53F7F", "#039EBD"),
lty = "blank",
fontfamily = "Helvetica",
cex = rep(2, 3),
cat.cex = rep(1.5, 2),
cat.pos = c(-50, 50),
cat.dist = c(-0.05, -0.05),
cat.prompts = TRUE,
cat.col = c("#D53F7F", "#039EBD"),
cat.fontfamily = "Helvetica",
category = c("DESeq \nalone", "MTXModel \nassemblage"),
title = "Differentially abundant genes found by DESeq2 and MTXmodel",
margin = 0.1) -> venn_plot
identified_metabolites <- left_join(metabolomics_annotations[,c("metabolite identification", "ID")],
rownames_to_column(metabolomics_all_times_filtered), by = join_by("ID" == "rowname"))
identified_metabolites <- identified_metabolites[identified_metabolites$`metabolite identification` != "unknown",]
row.names(identified_metabolites) <- identified_metabolites$`metabolite identification`
identified_metabolites <- identified_metabolites %>% t() %>% data.frame()
identified_metabolites <- identified_metabolites[3:nrow(identified_metabolites),]
identified_metabolites$sample <- row.names(identified_metabolites)
identified_metabolites <- left_join(identified_metabolites, rownames_to_column(metadata_table_metaB_all_times), by = join_by("sample" == "rowname"))
identified_metabolites %>% rename("2-Aminobenzoic acid" = "X2.aminobenzoic.acid",
"DL-Methionine sulfoxide" = "DL.methionine.sulfoxide",
"Pyridoxal" = "pyridoxal",
"D-Pantothenic acid" = "D.pantothenic.acid" ,
"N6-Acetyl-L-lysine" = "N6.acetyl.L.lysine",
"L-Glutamic acid" = "L.glutamic.acid",
"L-Isoleucine" = "L.isoleucine",
"Butyryl-L-carnitine" = "butyryl.L.carnitine",
"Acetyl-L-carnitine" = "acetyl.L.carnitine",
"Isovaleryl-L-carnitine" = "isovaleryl.L.carnitine") -> identified_metabolites
identified_metabolites %>%
pivot_longer(cols = c("2-Aminobenzoic acid", "DL-Methionine sulfoxide", "Pyridoxal",
"D-Pantothenic acid", "N6-Acetyl-L-lysine", "L-Glutamic acid",
"L-Isoleucine", "Butyryl-L-carnitine", "Acetyl-L-carnitine",
"Isovaleryl-L-carnitine"),
values_to = "intensity") -> identified_metabolites
identified_metabolites[identified_metabolites$temperature == 17 & identified_metabolites$time == 0, "condition"] <- "SD_T0"
identified_metabolites[identified_metabolites$temperature == 5 & identified_metabolites$time == 0, "condition"] <- "WN_T0"
identified_metabolites[identified_metabolites$temperature == 17 & identified_metabolites$time == 3, "condition"] <- "SD_TF"
identified_metabolites[identified_metabolites$temperature == 5 & identified_metabolites$time == 3, "condition"] <- "WN_TF"
identified_metabolites[identified_metabolites$temperature == 17, "fill"] <- "#F8AD18"
identified_metabolites[identified_metabolites$temperature == 5, "fill"] <- "#89DDF8"
identified_metabolites[identified_metabolites$temperature == 17, "colour"] <- "#b0790b"
identified_metabolites[identified_metabolites$temperature == 5, "colour"] <- "#1aa7d4"
identified_metabolites$intensity <- as.numeric(identified_metabolites$intensity)
identified_metabolites %>%
filter(name %in% c("2-Aminobenzoic acid", "L-Glutamic acid",
"DL-Methionine sulfoxide", "N6-Acetyl-L-lysine",
"D-Pantothenic acid", "Pyridoxal" )) %>%
filter(condition %in% c("SD_TF", "WN_TF")) %>%
# ggplot(aes(y=intensity, x=condition, group=condition)) +
ggplot(aes(y=intensity, x=name, fill=condition)) +
geom_boxplot(aes(fill=fill, colour=colour)) +
scale_fill_identity() +
scale_color_identity() +
ylab(expression("intensity * 10"^" -2")) +
theme(strip.text = element_text(face = "bold", size=12),
axis.title.x = element_blank(),
axis.text.x = element_text(size=10)) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 14, whitespace_only = FALSE)) +
# facet_wrap(~name, scale="free", ncol=2) +
scale_y_continuous(labels = function(x) format(x * 100, scientific = FALSE),
limits = c(0,0.055), breaks = extended_breaks(n=8)) -> plot_wn
identified_metabolites %>%
filter(name %in% c("L-Isoleucine", "Acetyl-L-carnitine",
"Butyryl-L-carnitine",
"Isovaleryl-L-carnitine")) %>%
filter(condition %in% c("SD_TF", "WN_TF")) %>%
# ggplot(aes(y=intensity, x=condition, group=condition)) +
ggplot(aes(y=intensity, x=name, fill=condition)) +
geom_boxplot(aes(fill=fill, colour=colour)) +
scale_fill_identity("Condition", labels=c("Winter Night", "Summer Day"), guide = "legend") +
scale_color_identity("Condition", labels=c("Winter Night", "Summer Day"), guide = "legend") +
ylab(expression("intensity * 10"^" -2")) +
theme(strip.text = element_text(face = "bold", size=12),
axis.title.x = element_blank(),
axis.text.x = element_text(size=10)) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 14, whitespace_only = FALSE)) +
# facet_wrap(~name, scale="free", ncol=2) +
scale_y_continuous(labels = function(x) format(x * 100, scientific = FALSE),
limits = c(0,0.055), breaks = extended_breaks(n=8)) -> plot_sd
plot_all <- ggarrange(plot_wn, plot_sd,
labels = c("A", "B"),
common.legend=TRUE,
legend="bottom",
nrow=1,
widths = c(6, 4))
print(plot_all)
R version 4.3.1 (2023-06-16 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 11 x64 (build 26100)
Matrix products: default
locale:
[1] LC_COLLATE=French_France.utf8 LC_CTYPE=French_France.utf8 LC_MONETARY=French_France.utf8 LC_NUMERIC=C LC_TIME=French_France.utf8
time zone: Europe/Paris
tzcode source: internal
attached base packages:
[1] splines stats4 grid stats graphics grDevices utils datasets methods base
other attached packages:
[1] ggridges_0.5.6 rlang_1.1.3 lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4 purrr_1.0.2 readr_2.1.5 tidyr_1.3.1 tidyverse_2.0.0
[11] ggtext_0.1.2 cplm_0.7-12 Matrix_1.6-5 coda_0.19-4.1 compositions_2.0-8 ClassDiscovery_3.4.0 oompaBase_3.2.9 cluster_2.1.6 BiocManager_1.30.22 data.table_1.15.2
[21] FactoMineR_2.10 ggh4x_0.2.8 knitr_1.45 pheatmap_1.0.12 rgl_1.3.1 scales_1.3.0 tibble_3.2.1 vegan_2.6-4 lattice_0.22-5 permute_0.9-7
[31] VennDiagram_1.7.3 futile.logger_1.4.3 MTXmodel_1.2.4 DESeq2_1.40.2 SummarizedExperiment_1.30.2 Biobase_2.60.0 MatrixGenerics_1.12.3 matrixStats_1.2.0 GenomicRanges_1.52.0 GenomeInfoDb_1.36.4
[41] IRanges_2.34.1 S4Vectors_0.38.2 BiocGenerics_0.46.0 ggpubr_0.6.0 ggplot2_3.4.3 devtools_2.4.5 usethis_2.2.3
loaded via a namespace (and not attached):
[1] later_1.3.2 bitops_1.0-7 cellranger_1.1.0 lpsymphony_1.30.0 lifecycle_1.0.4 rstatix_0.7.2 MASS_7.3-60.0.1 oompaData_3.1.3 flashClust_1.01-2 backports_1.4.1 magrittr_2.0.3 sass_0.4.8
[13] rmarkdown_2.26 jquerylib_0.1.4 yaml_2.3.8 remotes_2.4.2.1 httpuv_1.6.14 sessioninfo_1.2.2 pkgbuild_1.4.3 cowplot_1.1.3.9000 bayesm_3.1-6 DBI_1.2.2 minqa_1.2.6 RColorBrewer_1.1-3
[25] multcomp_1.4-25 abind_1.4-5 pkgload_1.3.4 zlibbioc_1.46.0 RCurl_1.98-1.14 TH.data_1.1-2 tensorA_0.36.2.1 sandwich_3.1-0 GenomeInfoDbData_1.2.11 ggrepel_0.9.5 tweedie_2.3.5 commonmark_1.9.1
[37] codetools_0.2-19 getopt_1.20.4 DelayedArray_0.26.7 DT_0.32 xml2_1.3.6 tidyselect_1.2.1 farver_2.1.2 base64enc_0.1-3 jsonlite_1.8.8 ellipsis_0.3.2 survival_3.5-8 emmeans_1.10.0
[49] systemfonts_1.0.6 tools_4.3.1 ragg_1.3.0 Rcpp_1.0.12 glue_1.7.0 gridExtra_2.3 xfun_0.42 mgcv_1.9-1 withr_3.0.1 formatR_1.14 fastmap_1.1.1 fansi_1.0.6
[61] digest_0.6.35 timechange_0.3.0 R6_2.5.1 mime_0.12 estimability_1.5 textshaping_0.3.7 colorspace_2.1-0 markdown_1.12 utf8_1.2.4 generics_0.1.3 robustbase_0.99-2 htmlwidgets_1.6.4
[73] S4Arrays_1.0.6 scatterplot3d_0.3-44 pkgconfig_2.0.3 gtable_0.3.5 XVector_0.40.0 pcaPP_2.0-4 htmltools_0.5.7 carData_3.0-5 profvis_0.3.8 multcompView_0.1-10 leaps_3.1 optparse_1.7.4
[85] lambda.r_1.2.4 rstudioapi_0.15.0 tzdb_0.4.0 reshape2_1.4.4 nlme_3.1-164 cachem_1.0.8 zoo_1.8-12 parallel_4.3.1 miniUI_0.1.1.1 pillar_1.9.0 vctrs_0.6.5 urlchecker_1.0.1
[97] promises_1.2.1 car_3.1-2 xtable_1.8-4 evaluate_0.23 mvtnorm_1.2-4 cli_3.6.1 locfit_1.5-9.9 compiler_4.3.1 futile.options_1.0.1 crayon_1.5.2 ggsignif_0.6.4 labeling_0.4.3
[109] mclust_6.1 plyr_1.8.9 fs_1.6.3 stringi_1.8.3 BiocParallel_1.34.2 munsell_0.5.1 hms_1.1.3 statmod_1.5.0 shiny_1.8.0 highr_0.10 gridtext_0.1.5 broom_1.0.5
[121] memoise_2.0.1 bslib_0.6.1 biglm_0.9-2.1 DEoptimR_1.1-3 readxl_1.4.3