Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,4 @@ Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
Depends:
R (>= 2.10)
LazyData: true
LazyData: true
4 changes: 3 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# volcalc (development version)

* adds a `validate = TRUE` option to `calc_vol()` and `get_fx_groups()` that returns `NA`s when there are suspected errors in parsing SMILES or .mol files. This is unfortunately not available on Windows due to differences in the windows version of `ChemmineOB`
* Added a `validate = TRUE` option to `calc_vol()` and `get_fx_groups()` that returns `NA`s when there are suspected errors in parsing SMILES or .mol files. This is unfortunately not available on Windows due to differences in the windows version of `ChemmineOB`
* Users can now supply a temperature at which to calculate volatility estimates. As a result, RVI values may now be *slightly* different compared to those from `volcalc` v2.1.2 due to rounding differences.
* Inputs to `calc_vol()` supplied as a named vector of SMILES strings now properly pass the names to `ChemmineR::smile2sdf()` where they are used as the molecule name.
* adds a dataset, `smarts_simpol1`, describing how functional groups are defined for the SIMPOL.1 and Meredith et al. methods
* `KEGGREST` is no longer a dependency of `volcalc` (previously used in `get_mol_kegg()`)

Expand Down
8 changes: 6 additions & 2 deletions R/calc_vol.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#' @param from The form of `input`. Either `"mol_path"` (default) or `"smiles"`.
#' @param method The method for calculating estimated volatility. See
#' [simpol1()] for more details.
#' @param temp_c numeric; For methods that allow it, specify temperature (in
#' degrees C) at which log10(P) estimates are calculated.
#' @param environment The environment for calculating relative volatility
#' categories. RVI thresholds for low, moderate, and high volatility are as
#' follows: `"clean"` (clean atmosphere, default) -2, 0, 2; `"polluted"`
Expand Down Expand Up @@ -59,6 +61,7 @@ calc_vol <-
function(input,
from = c("mol_path", "smiles"),
method = c("meredith", "simpol1"),
temp_c = 20,
environment = c("clean", "polluted", "soil"),
validate = TRUE,
return_fx_groups = FALSE,
Expand Down Expand Up @@ -88,6 +91,8 @@ calc_vol <-
}

if(from == "smiles") {
# a way of converting into a list of *named* vectors, so smiles2sdf() runs on named vectors as input
input <- split(input, seq_along(input))
compound_sdf_list <- lapply(input, ChemmineR::smiles2sdf)
}
fx_groups_df_list <-
Expand All @@ -98,7 +103,7 @@ calc_vol <-
dplyr::bind_rows(fx_groups_df_list, .id = {{ from }})

# calculate relative volatility & categories from logP
vol_df <- simpol1(fx_groups_df, meredith = meredith) %>%
vol_df <- simpol1(fx_groups_df, meredith = meredith, temp_c = temp_c) %>%
dplyr::mutate(
# mass is converted from grams to micrograms
# 0.0000821 is universal gas constant
Expand All @@ -121,7 +126,6 @@ calc_vol <-
colnames(fx_groups_df)[!colnames(fx_groups_df) %in% c("formula", "name", "molecular_weight")]
}
if (isTRUE(return_calc_steps)) {
#TODO document log_alpha (need to figure out why it's called that first)
cols_calc <- c("molecular_weight", "log_alpha", "log10_P")
}

Expand Down
171 changes: 117 additions & 54 deletions R/simpol1.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,29 @@
#' atmospheres.
#'
#' The modified method in Meredith et al. (2023) adds the following additional
#' functional groups and coefficients:
#' functional groups:
#'
#' - Phosphoric acid (-2.23)
#' - Phosphoric ester (-2.23)
#' - Sulfate (-2.23)
#' - Sulfonate (-2.23)
#' - Thiol (-2.23)
#' - Carbothioester (-1.20)
#' Using the same coefficient as nitrate
#'
#' @note The method described in Pankow & Asher (2008) allows for
#' calculations of logP at different temperatures. This implementation
#' currently only calculates values at 20ºC.
#' - Phosphoric acid
#' - Phosphoric ester
#' - Sulfate
#' - Sulfonate
#' - Thiol
#'
#' Using the same coefficient as ester
#' - Carbothioester
#'
#' @note The method described in Pankow & Asher (2008) was developed using data
#' between 0 °C and 120 °C, so extrapolating beyond that range is not
#' recommended.
#'
#' @param fx_groups A data.frame or tibble with counts of functional groups
#' produced by [get_fx_groups()] (or manually, with the same column names).
#' @param meredith Logical; `FALSE`: use the original SIMPOL.1 method. `TRUE`:
#' use the modified version in Meredith et al. (2023).
#'
#' @param temp_c Numeric; the temperature at which to calculate volatility
#' estimates in °C.
#' @returns The `fx_groups` tibble with the additional `log10_P` column.
#'
#' @references
Expand All @@ -50,65 +55,123 @@
#' sdf <- ChemmineR::read.SDFset(mol_path)
#' fx_groups <- get_fx_groups(sdf)
#' simpol1(fx_groups)
simpol1 <- function(fx_groups, meredith = TRUE) {
betas <- fx_groups %>%
simpol1 <- function(fx_groups, meredith = TRUE, temp_c = 20) {
#convert C to K
if (temp_c < 0 | temp_c > 120) {
warning("Temperatures below 0\u00b0C or above 120\u00b0C extrapolate beyond the range SIMPOL.1 was intended for. \n Interpret results with caution!")
}
temp_k <- temp_c + 273.15

b_k <- calc_bk_simpol1(temp_k)

contributions <- fx_groups %>%
# assume NAs are 0s for the sake of this calculation
dplyr::mutate(dplyr::across(dplyr::where(is.integer), function(x)
ifelse(is.na(x), 0L, x))) %>%
# TODO: There is also a table of coefs and equation to calculate b_k(T) at
# different values of T. Why wasn't this implemented?

dplyr::mutate(
# multiplier for each functional group is volatility contribution to log10P
# b_k(T) * v_k,i
b_00 = (1.79 * 1), #b_0(T) is an intercept/constant.
b_01 = (-0.438 * .data$carbons),
b_02 = (-0.0338 * .data$carbons_asa),
b_03 = (-0.675 * .data$rings_aromatic),
b_04 = (-0.0104 * .data$rings_aliphatic),
b_05 = (-0.105 * .data$carbon_dbl_bonds_aliphatic),
b_06 = (-0.506 * .data$CCCO_aliphatic_ring),
b_07 = (-2.23 * .data$hydroxyl_aliphatic),
b_08 = (-1.35 * .data$aldehydes),
b_09 = (-0.935 * .data$ketones),
b_10 = (-3.58 * .data$carbox_acids),
b_11 = (-1.20 * .data$ester),
b_12 = (-0.718 * .data$ether_alkyl),
b_13 = (-0.683 * .data$ether_alicyclic),
b_14 = (-1.03 * .data$ether_aromatic),
b_15 = (-2.23 * .data$nitrate),
b_16 = (-2.15 * .data$nitro),
b_17 = (-2.14 * .data$hydroxyl_aromatic),
b_18 = (-1.03 * .data$amine_primary),
b_19 = (-0.849 * .data$amine_secondary),
b_20 = (-0.608 * .data$amine_tertiary),
b_21 = (-1.61 * .data$amine_aromatic),
b_22 = (-4.49 * .data$amide_primary),
b_23 = (-5.26 * .data$amide_secondary),
b_24 = (-2.63 * .data$amide_tertiary),
b_25 = (-2.34 * .data$carbonylperoxynitrate),
b_26 = (-0.368 * .data$peroxide),
b_27 = (-2.48 * .data$hydroperoxide),
b_28 = (-2.48 * .data$carbonylperoxyacid),
b_29 = (0.0432 * .data$nitrophenol),
b_30 = (-2.67 * .data$nitroester)
vb_00 = (b_k["b0"] * 1), #b_0(T) is an intercept/constant.
vb_01 = (b_k["b1"] * .data$carbons),
vb_02 = (b_k["b2"] * .data$carbons_asa),
vb_03 = (b_k["b3"] * .data$rings_aromatic),
vb_04 = (b_k["b4"] * .data$rings_aliphatic),
vb_05 = (b_k["b5"] * .data$carbon_dbl_bonds_aliphatic),
vb_06 = (b_k["b6"] * .data$CCCO_aliphatic_ring),
vb_07 = (b_k["b7"] * .data$hydroxyl_aliphatic),
vb_08 = (b_k["b8"] * .data$aldehydes),
vb_09 = (b_k["b9"] * .data$ketones),
vb_10 = (b_k["b10"] * .data$carbox_acids),
vb_11 = (b_k["b11"] * .data$ester),
vb_12 = (b_k["b12"] * .data$ether_alkyl),
vb_13 = (b_k["b13"] * .data$ether_alicyclic),
vb_14 = (b_k["b14"] * .data$ether_aromatic),
vb_15 = (b_k["b15"] * .data$nitrate),
vb_16 = (b_k["b16"] * .data$nitro),
vb_17 = (b_k["b17"] * .data$hydroxyl_aromatic),
vb_18 = (b_k["b18"] * .data$amine_primary),
vb_19 = (b_k["b19"] * .data$amine_secondary),
vb_20 = (b_k["b20"] * .data$amine_tertiary),
vb_21 = (b_k["b21"] * .data$amine_aromatic),
vb_22 = (b_k["b22"] * .data$amide_primary),
vb_23 = (b_k["b23"] * .data$amide_secondary),
vb_24 = (b_k["b24"] * .data$amide_tertiary),
vb_25 = (b_k["b25"] * .data$carbonylperoxynitrate),
vb_26 = (b_k["b26"] * .data$peroxide),
vb_27 = (b_k["b27"] * .data$hydroperoxide),
vb_28 = (b_k["b28"] * .data$carbonylperoxyacid),
vb_29 = (b_k["b29"] * .data$nitrophenol),
vb_30 = (b_k["b30"] * .data$nitroester)
)

if (isTRUE(meredith)) {
betas <- betas %>%
contributions <- contributions %>%
dplyr::mutate(
# # Below are additions from Meredith et al.
b_32 = (-2.23 * .data$phosphoric_acids),
b_33 = (-2.23 * .data$phosphoric_esters),
b_34 = (-2.23 * .data$sulfates),
b_35 = (-2.23 * .data$sulfonates),
b_36 = (-2.23 * .data$thiols),
b_37 = (-1.20 * .data$carbothioesters)
# use the coef for nitrate for the following groups
vb_32 = (b_k["b15"] * .data$phosphoric_acids),
vb_33 = (b_k["b15"] * .data$phosphoric_esters),
vb_34 = (b_k["b15"] * .data$sulfates),
vb_35 = (b_k["b15"] * .data$sulfonates),
vb_36 = (b_k["b15"] * .data$thiols),
# use the coef for ester for carbothioesters
vb_37 = (b_k["b11"] * .data$carbothioesters)
)
}

betas %>%
contributions %>%
dplyr::rowwise() %>%
dplyr::mutate(log10_P = sum(dplyr::c_across(dplyr::starts_with("b_")))) %>%
dplyr::mutate(log10_P = sum(dplyr::c_across(dplyr::starts_with("vb_")))) %>%
dplyr::ungroup() %>%
dplyr::select(-dplyr::starts_with("b_"))
dplyr::select(-dplyr::starts_with("vb_"))
}


#from table 5 of Pankow & Asher
calc_bk_simpol1 <- function(temp_k = 293.15) {
table_5 <- tibble::tribble(
~k, ~B1, ~B2, ~B3, ~B4,
0, -4.26938E+02, 2.89223E-01, 4.42057E-03, 2.92846E-01,
1, -4.11248E+02, 8.96919E-01, -2.48607E-03, 1.40312E-01,
2, -1.46442E+02, 1.54528E+00, 1.71021E-03, -2.78291E-01,
3, 3.50262E+01, -9.20839E-01, 2.24399E-03, -9.36300E-02,
4, -8.72770E+01, 1.78059E+00, -3.07187E-03, -1.04341E-01,
5, 5.73335E+00, 1.69764E-02, -6.28957E-04, 7.55434E-03,
6, -2.61268E+02, -7.63282E-01, -1.68213E-03, 2.89038E-01,
7, -7.25373E+02, 8.26326E-01, 2.50957E-03, -2.32304E-01,
8, -7.29501E+02, 9.86017E-01, -2.92664E-03, 1.78077E-01,
9, -1.37456E+01, 5.23486E-01, 5.50298E-04, -2.76950E-01,
10, -7.98796E+02, -1.09436E+00, 5.24132E-03, -2.28040E-01,
11, -3.93345E+02, -9.51778E-01, -2.19071E-03, 3.05843E-01,
12, -1.44334E+02, -1.85617E+00, -2.37491E-05, 2.88290E-01,
13, 4.05265E+01, -2.43780E+00, 3.60133E-03, 9.86422E-02,
14, -7.07406E+01, -1.06674E+00, 3.73104E-03, -1.44003E-01,
15, -7.83648E+02, -1.03439E+00, -1.07148E-03, 3.15535E-01,
16, -5.63872E+02, -7.18416E-01, 2.63016E-03, -4.99470E-02,
17, -4.53961E+02, -3.26105E-01, -1.39780E-04, -3.93916E-02,
18, 3.71375E+01, -2.66753E+00, 1.01483E-03, 2.14233E-01,
19, -5.03710E+02, 1.04092E+00, -4.12746E-03, 1.82790E-01,
20, -3.59763E+01, -4.08458E-01, 1.67264E-03, -9.98919E-02,
21, -6.09432E+02, 1.50436E+00, -9.09024E-04, -1.35495E-01,
22, -1.02367E+02, -7.16253E-01, -2.90670E-04, -5.88556E-01,
23, -1.93802E+03, 6.48262E-01, 1.73245E-03, 3.47940E-02,
24, -5.26919E+00, 3.06435E-01, 3.25397E-03, -6.81506E-01,
25, -2.84042E+02, -6.25424E-01, -8.22474E-04, -8.80549E-02,
26, 1.50093E+02, 2.39875E-02, -3.37969E-03, 1.52789E-02,
27, -2.03387E+01, -5.48718E+00, 8.39075E-03, 1.07884E-01,
28, -8.38064E+02, -1.09600E+00, -4.24385E-04, 2.81812E-01,
29, -5.27934E+01, -4.63689E-01, -5.11647E-03, 3.84965E-01,
30, -1.61520E+03, 9.01669E-01, 1.44536E-03, 2.66889E-01
)
Comment thread
Aariq marked this conversation as resolved.

b_k_df <-
dplyr::mutate(table_5, b_k = .data$B1/temp_k + .data$B2 + .data$B3*temp_k + .data$B4*log(temp_k))

out <- b_k_df$b_k
names(out) <- paste0("b", b_k_df$k)
#return:
out
}
4 changes: 4 additions & 0 deletions man/calc_vol.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 20 additions & 11 deletions man/simpol1.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 10 additions & 6 deletions tests/testthat/test-calc_vol.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
test_that("volatility estimate is correct", {
ex_vol_df <- calc_vol("data/C16181.mol")
expect_equal(round(ex_vol_df$rvi, 6), 6.975349)
ex_vol_df <- calc_vol(testthat::test_path("data/C16181.mol"), temp_c = 20)
expect_equal(round(ex_vol_df$rvi, 4), 6.9776)
})

test_that("returns correct columns depending on return arguments", {
Expand Down Expand Up @@ -28,11 +28,15 @@ test_that("calc_vol() works with multiple inputs", {

test_that("smiles and .mol give same results", {
paths <-
c("data/C16181.mol",
"data/map00361/C00011.mol",
"data/map00361/C00042.mol")
c(testthat::test_path("data/C16181.mol"),
testthat::test_path("data/map00361/C00011.mol"),
testthat::test_path("data/map00361/C00042.mol"))
smiles <-
c("C1(C(C(C(C(C1Cl)Cl)Cl)Cl)Cl)O", "O=C=O", "C(CC(=O)O)C(=O)O")
c(
"beta-2,3,4,5,6-Pentachlorocyclohexanol" = "C1(C(C(C(C(C1Cl)Cl)Cl)Cl)Cl)O",
"CO2" = "O=C=O",
"Succinate" = "C(CC(=O)O)C(=O)O"
)
expect_equal(
calc_vol(smiles, from = "smiles") %>% dplyr::select(-name,-smiles),
calc_vol(paths) %>% dplyr::select(-name,-mol_path)
Expand Down
8 changes: 8 additions & 0 deletions tests/testthat/test-simpol1.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,11 @@ test_that("isoprene is more volatile than glucose", {
expect_gt(log10P_together[1], log10P_together[2])
expect_equal(log10P_together, log10P_separate)
})

test_that("changing temperature does something", {
isoprene <- ChemmineR::read.SDFset(mol_example()[5])
groups <- get_fx_groups(isoprene)
c_30 <- simpol1(groups, temp_c = 30)$log10_P
c_20 <- simpol1(groups, temp_c = 20)$log10_P
expect_gt(c_30, c_20)
})
Loading