Skip to content

Commit e21ab1b

Browse files
authored
Merge pull request #599 from hechth/isolib_add_csv
isolib: added option to export to tabular
2 parents 7d98b78 + d74b0da commit e21ab1b

File tree

4 files changed

+646
-20
lines changed

4 files changed

+646
-20
lines changed

tools/isolib/isolib.R

Lines changed: 111 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,43 @@ library(Spectra)
33
library(MsBackendMsp)
44
library(MetaboCoreUtils)
55
library(readr)
6+
library(tidyselect)
67

7-
#' @param args A list of command line arguments.
8-
main <- function() {
9-
data(isotopes)
10-
data(adducts)
118

9+
parse_args <- function() {
1210
args <- commandArgs(trailingOnly = TRUE)
11+
1312
compound_table <- read_tsv(
1413
file = args[1],
1514
col_types = "ccd",
16-
col_select = tidyselect::all_of(c("name", "formula")) | tidyselect::any_of("rt")
15+
col_select = all_of(c("name", "formula")) | any_of("rt")
1716
)
18-
adducts_to_use <- c(unlist(strsplit(args[2], ",", fixed = TRUE)))
1917

20-
chemforms <- compound_table$formula
21-
chemforms <- check_chemform(isotopes, chemforms)[, 2]
18+
parsed <- list(
19+
compound_table = compound_table,
20+
adducts_to_use = c(unlist(strsplit(args[2], ",", fixed = TRUE))),
21+
threshold = as.numeric(args[3]),
22+
append_adducts = args[4],
23+
append_isotopes = args[5],
24+
out_format = args[6],
25+
outfile = args[7]
26+
)
27+
return(parsed)
28+
}
2229

30+
generate_isotope_spectra <- function(compound_table,
31+
adducts_to_use,
32+
append_adducts,
33+
threshold) {
34+
data(isotopes)
35+
data(adducts)
36+
37+
monoisotopic <- isotopes |>
38+
dplyr::group_by(element) |>
39+
dplyr::slice_max(abundance, n = 1) |>
40+
dplyr::filter(!stringr::str_detect(element, "\\[|\\]"))
41+
42+
chemforms <- check_chemform(isotopes, compound_table$formula)[, 2]
2343
spectra <- data.frame()
2444

2545
for (current in adducts_to_use) {
@@ -32,12 +52,19 @@ main <- function() {
3252
merged_chemforms <- mergeform(multiplied_chemforms, adduct$Formula_add)
3353
}
3454

35-
charge_string <- paste0(if (adduct$Charge > 0) "+" else "-", if (abs(adduct$Charge) > 1) abs(adduct$Charge) else "")
55+
charge_string <- paste0(
56+
if (adduct$Charge > 0) "+" else "-",
57+
if (abs(adduct$Charge) > 1) abs(adduct$Charge) else ""
58+
)
3659
adduct_string <- paste0("[", adduct$Name, "]", charge_string)
3760
precursor_mz <- calculateMass(multiplied_chemforms) + adduct$Mass
3861

39-
if (args[4] == TRUE) {
40-
names <- paste(compound_table$name, paste0("(", adduct$Name, ")"), sep = " ")
62+
if (append_adducts == TRUE) {
63+
names <- paste(
64+
compound_table$name,
65+
paste0("(", adduct$Name, ")"),
66+
sep = " "
67+
)
4168
} else {
4269
names <- compound_table$name
4370
}
@@ -60,26 +87,94 @@ main <- function() {
6087
isotopes = isotopes,
6188
chemforms = merged_chemforms,
6289
charge = adduct$Charge,
63-
threshold = as.numeric(args[3]),
90+
threshold = threshold,
6491
)
6592

6693
mzs <- list()
6794
intensities <- list()
95+
isos <- list()
96+
6897
for (i in seq_along(patterns)) {
6998
mzs <- append(mzs, list(patterns[[i]][, 1]))
7099
intensities <- append(intensities, list(patterns[[i]][, 2]))
100+
101+
# select all columns which describe the elemental composition
102+
# remove all 12C, 35Cl etc.
103+
# remove isotopes which don't occur
104+
compositions <- as.data.frame(patterns[[i]][, -c(1, 2)]) |>
105+
dplyr::select(-tidyselect::any_of(monoisotopic$isotope)) |>
106+
dplyr::select_if(~ !all(. == 0))
107+
108+
# combine elemental composition into single string
109+
compositions <- compositions |>
110+
dplyr::rowwise() |>
111+
dplyr::mutate(isotopes = paste(
112+
purrr::map2_chr(
113+
names(compositions),
114+
dplyr::c_across(everything()),
115+
~ paste(.x, .y, sep = ":")
116+
),
117+
collapse = ", "
118+
)) |>
119+
dplyr::ungroup() |>
120+
dplyr::select(isotopes)
121+
isos <- append(isos, list(compositions$isotopes))
71122
}
72123

73124
spectra_df$mz <- mzs
74125
spectra_df$intensity <- intensities
126+
spectra_df$isotopes <- isos
75127
spectra <- rbind(spectra, spectra_df)
76128
}
129+
return(spectra)
130+
}
131+
132+
write_to_msp <- function(spectra, file) {
133+
sps <- Spectra(dplyr::select(spectra, -isotopes))
134+
export(sps, MsBackendMsp(), file = file)
135+
}
77136

78-
sps <- Spectra(spectra)
79-
export(sps, MsBackendMsp(), file = args[5])
137+
write_to_table <- function(spectra, file, append_isotopes) {
138+
entries <- spectra |>
139+
dplyr::rowwise() |>
140+
dplyr::mutate(peaks = paste(unlist(mz), collapse = ";")) |>
141+
dplyr::mutate(isos = paste(unlist(isotopes), collapse = ";"))
142+
result <- tidyr::separate_longer_delim(
143+
entries,
144+
all_of(c("peaks", "isos")),
145+
";"
146+
)
147+
result <- result |>
148+
dplyr::select(-c("mz", "intensity", "isotopes")) |>
149+
dplyr::rename(mz = peaks, isotopes = isos, rt = retention_time)
150+
151+
if (append_isotopes) {
152+
result <- result |>
153+
dplyr::mutate(result,
154+
full_formula = paste0(formula, " (", isotopes, ")")
155+
) |>
156+
dplyr::select(-all_of(c("formula", "isotopes"))) |>
157+
dplyr::rename(formula = full_formula) |>
158+
dplyr::relocate(formula, .after = name)
159+
}
160+
readr::write_tsv(result, file = file)
161+
}
162+
163+
main <- function() {
164+
args <- parse_args()
165+
spectra <- generate_isotope_spectra(
166+
args$compound_table,
167+
args$adducts_to_use,
168+
args$append_adducts,
169+
args$threshold
170+
)
171+
172+
if (args$out_format == "msp") {
173+
write_to_msp(spectra, args$outfile)
174+
} else if (args$out_format == "tabular") {
175+
write_to_table(spectra, args$outfile, args$append_isotopes)
176+
}
80177
}
81178

82-
# Get the command line arguments
83-
args <- commandArgs(trailingOnly = TRUE)
84179
# Call the main function
85180
main()

tools/isolib/isolib.xml

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
<tool id="isolib" name="isolib" version="1.0.1+galaxy0" profile="21.09">
2-
<description>create an isotopic pattern library for given compounds and adducts</description>
1+
<tool id="isolib" name="isolib" version="2.6+galaxy0" profile="21.09">
2+
<description>create an isotopic pattern library for given compounds and adducts based on enviPat</description>
33
<creator>
44
<person
55
givenName="Helge"
@@ -20,9 +20,23 @@
2020
<requirement type="package" version="1.6.0">bioconductor-msbackendmsp</requirement>
2121
<requirement type="package" version="2.6">r-envipat</requirement>
2222
<requirement type="package" version="2.1.5">r-readr</requirement>
23+
<requirement type="package" version="1.3.1">r-tidyr</requirement>
24+
<requirement type="package" version="1.5.1">r-stringr</requirement>
25+
<requirement type="package" version="1.0.2">r-purrr</requirement>
2326
</requirements>
2427
<command detect_errors="exit_code"><![CDATA[
25-
Rscript '${__tool_directory__}/isolib.R' '${input_file}' '${ionization.adducts}' '${threshold}' '${append_adduct}' '${isotope_library}'
28+
Rscript '${__tool_directory__}/isolib.R'
29+
'${input_file}'
30+
'${ionization.adducts}'
31+
'${threshold}'
32+
'${append_adduct}'
33+
#if $formatting.out_format == "tabular"
34+
'${formatting.append_isotopes}'
35+
#else
36+
'FALSE'
37+
#end if
38+
'${formatting.out_format}'
39+
'${isotope_library}'
2640
]]></command>
2741
<inputs>
2842
<param name="input_file" type="data" format="tabular" label="Table with input compounds"/>
@@ -46,9 +60,25 @@
4660
</conditional>
4761
<param name="threshold" type="float" min="0" max="100" value="1" label="Threshold" help="Probability threshold to use as cutoff for isotopic pattern distribution - this can be used to remove low abundant peaks and improve computation performance." />
4862
<param name="append_adduct" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Append adduct to compound name" help="Append the adduct string to the compound name for easy identification." />
63+
<conditional name="formatting">
64+
<param name="out_format" type="select" label="Output Format" help="Choose the output format, either MSP or Tabular">
65+
<option value="tabular">tabular</option>
66+
<option value="msp" selected="true">msp</option>
67+
</param>
68+
<when value="tabular">
69+
<param name="append_isotopes" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Append isotopes to formula" help="Append the isotopic composition to the formula for easy identification." />
70+
</when>
71+
<when value="msp">
72+
</when>
73+
</conditional>
4974
</inputs>
5075
<outputs>
51-
<data format="msp" name="isotope_library"/>
76+
<data format="msp" name="isotope_library">
77+
<change_format>
78+
<when input="formatting.out_format" value="msp" format="msp" />
79+
<when input="formatting.out_format" value="tabular" format="tabular" />
80+
</change_format>
81+
</data>
5282
</outputs>
5383

5484
<tests>
@@ -60,6 +90,18 @@
6090
<param name="input_file" value="markers_no_rt.tsv"/>
6191
<output name="isotope_library" file="test1.msp"/>
6292
</test>
93+
<test>
94+
<param name="input_file" value="lc_markers_neg.tsv"/>
95+
<param name="out_format" value="tabular"/>
96+
<param name="append_isotopes" value="TRUE" />
97+
<output name="isotope_library" file="test2.tabular"/>
98+
</test>
99+
<test>
100+
<param name="input_file" value="lc_markers_neg.tsv"/>
101+
<param name="out_format" value="tabular"/>
102+
<param name="adducts" value="M-H,2M-H"/>
103+
<output name="isotope_library" file="test3.tabular"/>
104+
</test>
63105
</tests>
64106
<help><![CDATA[
65107
This tool computes isotopic patterns for given compounds and adduct forms.

0 commit comments

Comments
 (0)