```{r setup, include=FALSE} knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE) ``` ```{r create-cdb} library(CompoundDb) library(DT) cmps <- read.csv("molecules.csv") if(!any(grepl("CompDb_inhouse_0", list.files()))){ # Create the dataframe with the necessary data from each compound cmps <- data.frame( compound_id = cmps$ID_M, name = cmps$Molecule, inchi = NA_character_, inchikey = NA_character_, formula = cmps$Formula, exactmass = MetaboCoreUtils::calculateMass(cmps$Formula), rtime = cmps$RT_urine, synonyms = NA_character_, kingdom = cmps$kingdom, superclass = cmps$superclass, class = cmps$class, subclass = cmps$subclass, parent = cmps$parent, RefMet = cmps$RefMet, adducts = cmps$adduct ) # Create "metadata": metad <- make_metadata(source = "inhouse", url = "", source_version = "0", source_date = Sys.Date(), organism = NA_character_) # Create "CompDb": db_file <- createCompDb(cmps, metadata = metad, path = getwd(), dbFile = "CompDb_inhouse_0.sqlite") rm(cmps, metad, db_file) cdb <- CompDb("CompDb_inhouse_0.sqlite", flags = RSQLite::SQLITE_RW) } else { cdb <- CompDb("CompDb_inhouse_0.sqlite", flags = RSQLite::SQLITE_RW) cmps <- cmps[!cmps$ID_M %in% compounds(cdb, "compound_id")[,1],] } cmps <- compounds( cdb, columns = c("compound_id", "name", "formula", "exactmass", "kingdom", "superclass", "class", "subclass", "parent", "RefMet") ) colnames(cmps) <- c("ID_M", "Metabolite", "Formula", "Exact Mass", "Kingdom", "Superclass", "Class", "Subclass", "Parent", "RefMet") idx <- which(cmps$ID_M %in% gsub("\\.html", "", list.files("molecule/"))) cmps$ID_M[idx] <- paste0("", cmps$ID_M[idx], "") idx <- which(!is.na(cmps$RefMet)) cmps$RefMet[idx] <- paste0( "", cmps$RefMet[idx], "") datatable(cmps, escape = FALSE, rownames= FALSE, filter = "top") %>% formatRound(columns = c("Exact Mass"), digits = 5) ``` ```{r ms2} # Define a function to remove low intensity peaks low_int <- function(x, ...) { x > max(x, na.rm = TRUE) * 0.05 } #' Define a function to *scale* the intensities scale_int <- function(x, ...) { maxint <- max(x[, "intensity"], na.rm = TRUE) x[, "intensity"] <- 100 * x[, "intensity"] / maxint x } library(Spectra) ms2_tb <- read.csv("MSMS.csv") # Exclude items already in the DB: sps <- Spectra(cdb) ms2_tb <- ms2_tb[!paste( stringi::stri_escape_unicode(ms2_tb$file), ms2_tb$scan) %in% stringi::stri_escape_unicode(unique(paste(sps$dataOrigin, sps$scanIndex))),] if(nrow(ms2_tb) > 0){ ms2 <- Spectra(unique(ms2_tb$file), backend = MsBackendDataFrame()) ca <- unique(paste(ms2_tb$compound_id, ms2_tb$adduct)) for(i in seq(length(ca))){ tmp_ms2_tb <- ms2_tb[paste(ms2_tb$compound_id, ms2_tb$adduct) == ca[i],] df <- DataFrame(msLevel = rep(NA, nrow(tmp_ms2_tb))) precursorMz <- rep(NA, nrow(tmp_ms2_tb)) rt <- rep(NA, nrow(tmp_ms2_tb)) p <- rep(NA, nrow(tmp_ms2_tb)) ce <- rep(NA, nrow(tmp_ms2_tb)) do <- rep(NA, nrow(tmp_ms2_tb)) mz <- vector(mode = "list", length = nrow(tmp_ms2_tb)) intensity <- vector(mode = "list", length = nrow(tmp_ms2_tb)) for(j in seq(nrow(tmp_ms2_tb))){ tmp_ms2 <- ms2[ ms2@backend@spectraData@listData$dataOrigin == tmp_ms2_tb$file[j]] tmp_ms2 <- tmp_ms2[ tmp_ms2@backend@spectraData@listData$scanIndex == tmp_ms2_tb$scan[j]] df$msLevel[j] <- msLevel(tmp_ms2) precursorMz[j] <- precursorMz(tmp_ms2) rt[j] <- rtime(tmp_ms2) p[j] <- polarity(tmp_ms2) ce[j] <- tmp_ms2@backend@spectraData@listData$collisionEnergy do[j] <- tmp_ms2@backend@spectraData@listData$dataOrigin mz[[j]] <- unlist(mz(tmp_ms2)) intensity[[j]] <- unlist(intensity(tmp_ms2)) } df$precursorMz <- precursorMz df$rtime <- rt df$scanIndex <- tmp_ms2_tb$scan df$polarity <- p df$collisionEnergy <- ce df$dataOrigin <- do df$mz <- mz df$intensity <- intensity df$LC_method <- tmp_ms2_tb$LC_method df$adduct <- tmp_ms2_tb$adduct sps <- Spectra(df) sps$compound_id <- tmp_ms2_tb$compound_id[1] sps$instrument <- "Orbitrap LTQ-XL" sps$raw_spectra <- TRUE cdb <- insertSpectra(cdb, spectra = sps) precursorMz <- mean(precursorMz) rt <- mean(rt) p <- paste(unique(p), collapse = "-") ce <- paste(unique(ce), collapse = "-") lc <- paste(unique(df$LC_method), collapse = "-") ad <- paste(unique(df$adduct), collapse = "-") ins <- paste(unique(sps$instrument), collapse = "-") # Scale fragment intensities: sps <- addProcessing(sps, intensity = scale_int) #'Remove peaks with an intensity below 5% of BPI sps <- filterIntensity(sps, intensity = low_int) # Combine all spectras: sps <- Spectra::combineSpectra( sps, mzFun = base::mean, intensityFun = base::mean, # OR "sum"? tolerance = 0.02, minProp = 0.5, peaks = "intersect", weighted = TRUE) sps <- addProcessing(sps, intensity = scale_int) sps$precursorMz <- precursorMz sps$rtime <- rt sps$scanIndex <- 0L sps$polarity <- as.integer(p) sps$collisionEnergy <- as.integer(ce) sps$dataOrigin <- "" sps$LC_method <- lc sps$adduct <- ad sps$instrument <- ins sps$raw_spectra <- FALSE cdb <- insertSpectra(cdb, spectra = sps) } } ``` **Last update:** `r format(Sys.time(), "%d %b %Y")`