```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE)
```
```{r create-cdb}
library(CompoundDb)
library(DT)
cmps <- read.csv("molecules.csv")
if(!any(grepl("CompDb_inhouse_0", list.files()))){
# Create the dataframe with the necessary data from each compound
cmps <- data.frame(
compound_id = cmps$ID_M,
name = cmps$Molecule,
inchi = NA_character_,
inchikey = NA_character_,
formula = cmps$Formula,
exactmass = MetaboCoreUtils::calculateMass(cmps$Formula),
rtime = cmps$RT_urine,
synonyms = NA_character_,
kingdom = cmps$kingdom,
superclass = cmps$superclass,
class = cmps$class,
subclass = cmps$subclass,
parent = cmps$parent,
RefMet = cmps$RefMet,
adducts = cmps$adduct
)
# Create "metadata":
metad <- make_metadata(source = "inhouse", url = "",
source_version = "0",
source_date = Sys.Date(),
organism = NA_character_)
# Create "CompDb":
db_file <- createCompDb(cmps, metadata = metad,
path = getwd(),
dbFile = "CompDb_inhouse_0.sqlite")
rm(cmps, metad, db_file)
cdb <- CompDb("CompDb_inhouse_0.sqlite", flags = RSQLite::SQLITE_RW)
} else {
cdb <- CompDb("CompDb_inhouse_0.sqlite", flags = RSQLite::SQLITE_RW)
cmps <- cmps[!cmps$ID_M %in% compounds(cdb, "compound_id")[,1],]
}
cmps <- compounds(
cdb,
columns = c("compound_id", "name", "formula", "exactmass",
"kingdom", "superclass", "class", "subclass", "parent",
"RefMet")
)
colnames(cmps) <- c("ID_M", "Metabolite", "Formula", "Exact Mass",
"Kingdom", "Superclass", "Class", "Subclass", "Parent",
"RefMet")
idx <- which(cmps$ID_M %in% gsub("\\.html", "", list.files("molecule/")))
cmps$ID_M[idx] <- paste0("",
cmps$ID_M[idx], "")
idx <- which(!is.na(cmps$RefMet))
cmps$RefMet[idx] <- paste0(
"", cmps$RefMet[idx], "")
datatable(cmps, escape = FALSE, rownames= FALSE, filter = "top") %>%
formatRound(columns = c("Exact Mass"), digits = 5)
```
```{r ms2}
# Define a function to remove low intensity peaks
low_int <- function(x, ...) {
x > max(x, na.rm = TRUE) * 0.05
}
#' Define a function to *scale* the intensities
scale_int <- function(x, ...) {
maxint <- max(x[, "intensity"], na.rm = TRUE)
x[, "intensity"] <- 100 * x[, "intensity"] / maxint
x
}
library(Spectra)
ms2_tb <- read.csv("MSMS.csv")
# Exclude items already in the DB:
sps <- Spectra(cdb)
ms2_tb <- ms2_tb[!paste(
stringi::stri_escape_unicode(ms2_tb$file), ms2_tb$scan) %in%
stringi::stri_escape_unicode(unique(paste(sps$dataOrigin,
sps$scanIndex))),]
if(nrow(ms2_tb) > 0){
ms2 <- Spectra(unique(ms2_tb$file), backend = MsBackendDataFrame())
ca <- unique(paste(ms2_tb$compound_id, ms2_tb$adduct))
for(i in seq(length(ca))){
tmp_ms2_tb <- ms2_tb[paste(ms2_tb$compound_id, ms2_tb$adduct) == ca[i],]
df <- DataFrame(msLevel = rep(NA, nrow(tmp_ms2_tb)))
precursorMz <- rep(NA, nrow(tmp_ms2_tb))
rt <- rep(NA, nrow(tmp_ms2_tb))
p <- rep(NA, nrow(tmp_ms2_tb))
ce <- rep(NA, nrow(tmp_ms2_tb))
do <- rep(NA, nrow(tmp_ms2_tb))
mz <- vector(mode = "list", length = nrow(tmp_ms2_tb))
intensity <- vector(mode = "list", length = nrow(tmp_ms2_tb))
for(j in seq(nrow(tmp_ms2_tb))){
tmp_ms2 <- ms2[
ms2@backend@spectraData@listData$dataOrigin == tmp_ms2_tb$file[j]]
tmp_ms2 <- tmp_ms2[
tmp_ms2@backend@spectraData@listData$scanIndex == tmp_ms2_tb$scan[j]]
df$msLevel[j] <- msLevel(tmp_ms2)
precursorMz[j] <- precursorMz(tmp_ms2)
rt[j] <- rtime(tmp_ms2)
p[j] <- polarity(tmp_ms2)
ce[j] <- tmp_ms2@backend@spectraData@listData$collisionEnergy
do[j] <- tmp_ms2@backend@spectraData@listData$dataOrigin
mz[[j]] <- unlist(mz(tmp_ms2))
intensity[[j]] <- unlist(intensity(tmp_ms2))
}
df$precursorMz <- precursorMz
df$rtime <- rt
df$scanIndex <- tmp_ms2_tb$scan
df$polarity <- p
df$collisionEnergy <- ce
df$dataOrigin <- do
df$mz <- mz
df$intensity <- intensity
df$LC_method <- tmp_ms2_tb$LC_method
df$adduct <- tmp_ms2_tb$adduct
sps <- Spectra(df)
sps$compound_id <- tmp_ms2_tb$compound_id[1]
sps$instrument <- "Orbitrap LTQ-XL"
sps$raw_spectra <- TRUE
cdb <- insertSpectra(cdb, spectra = sps)
precursorMz <- mean(precursorMz)
rt <- mean(rt)
p <- paste(unique(p), collapse = "-")
ce <- paste(unique(ce), collapse = "-")
lc <- paste(unique(df$LC_method), collapse = "-")
ad <- paste(unique(df$adduct), collapse = "-")
ins <- paste(unique(sps$instrument), collapse = "-")
# Scale fragment intensities:
sps <- addProcessing(sps, intensity = scale_int)
#'Remove peaks with an intensity below 5% of BPI
sps <- filterIntensity(sps, intensity = low_int)
# Combine all spectras:
sps <- Spectra::combineSpectra(
sps, mzFun = base::mean, intensityFun = base::mean, # OR "sum"?
tolerance = 0.02, minProp = 0.5, peaks = "intersect",
weighted = TRUE)
sps <- addProcessing(sps, intensity = scale_int)
sps$precursorMz <- precursorMz
sps$rtime <- rt
sps$scanIndex <- 0L
sps$polarity <- as.integer(p)
sps$collisionEnergy <- as.integer(ce)
sps$dataOrigin <- ""
sps$LC_method <- lc
sps$adduct <- ad
sps$instrument <- ins
sps$raw_spectra <- FALSE
cdb <- insertSpectra(cdb, spectra = sps)
}
}
```
**Last update:** `r format(Sys.time(), "%d %b %Y")`