Meredith-Lab · Aariq · Apr 24, 2025 · Aug 6, 2024 · Aug 9, 2024 · Aug 9, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -48,4 +48,4 @@ Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.3.2
 Depends: 
     R (>= 2.10)
-LazyData: true
+LazyData: true
diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,8 @@
 # volcalc (development version)
 
-* adds a `validate = TRUE` option to `calc_vol()` and `get_fx_groups()` that returns `NA`s when there are suspected errors in parsing SMILES or .mol files. This is unfortunately not available on Windows due to differences in the windows version of `ChemmineOB`
+* Added a `validate = TRUE` option to `calc_vol()` and `get_fx_groups()` that returns `NA`s when there are suspected errors in parsing SMILES or .mol files. This is unfortunately not available on Windows due to differences in the windows version of `ChemmineOB`
+* Users can now supply a temperature at which to calculate volatility estimates. As a result, RVI values may now be *slightly* different compared to those from `volcalc` v2.1.2 due to rounding differences.
+* Inputs to `calc_vol()` supplied as a named vector of SMILES strings now properly pass the names to `ChemmineR::smile2sdf()` where they are used as the molecule name.
 * adds a dataset, `smarts_simpol1`, describing how functional groups are defined for the SIMPOL.1 and Meredith et al. methods
 * `KEGGREST` is no longer a dependency of `volcalc` (previously used in `get_mol_kegg()`)
 

diff --git a/R/calc_vol.R b/R/calc_vol.R
@@ -7,6 +7,8 @@
 #' @param from The form of `input`. Either `"mol_path"` (default) or `"smiles"`.
 #' @param method The method for calculating estimated volatility. See
 #'   [simpol1()] for more details.
+#' @param temp_c numeric; For methods that allow it, specify temperature (in
+#'   degrees C) at which log10(P) estimates are calculated.
 #' @param environment The environment for calculating relative volatility
 #'   categories. RVI thresholds for low, moderate, and high volatility are as
 #'   follows: `"clean"` (clean atmosphere, default) -2, 0, 2; `"polluted"`
@@ -59,6 +61,7 @@ calc_vol <-
   function(input, 
            from = c("mol_path", "smiles"),
            method = c("meredith", "simpol1"),
+           temp_c = 20,
            environment = c("clean", "polluted", "soil"),
            validate = TRUE,
            return_fx_groups = FALSE,
@@ -88,6 +91,8 @@ calc_vol <-
     }
 
     if(from == "smiles") {
+      # a way of converting into a list of *named* vectors, so smiles2sdf() runs on named vectors as input
+      input <- split(input, seq_along(input))
       compound_sdf_list <- lapply(input, ChemmineR::smiles2sdf)
     }
     fx_groups_df_list <-
@@ -98,7 +103,7 @@ calc_vol <-
       dplyr::bind_rows(fx_groups_df_list, .id = {{ from }}) 
 
     # calculate relative volatility & categories from logP
-    vol_df <- simpol1(fx_groups_df, meredith = meredith) %>% 
+    vol_df <- simpol1(fx_groups_df, meredith = meredith, temp_c = temp_c) %>% 
       dplyr::mutate(
         # mass is converted from grams to micrograms
         # 0.0000821 is universal gas constant
@@ -121,7 +126,6 @@ calc_vol <-
         colnames(fx_groups_df)[!colnames(fx_groups_df) %in% c("formula", "name", "molecular_weight")]
     }
     if (isTRUE(return_calc_steps)) {
-      #TODO document log_alpha (need to figure out why it's called that first)
       cols_calc <- c("molecular_weight", "log_alpha", "log10_P")
     }
 

diff --git a/R/simpol1.R b/R/simpol1.R
@@ -12,24 +12,29 @@
 #' atmospheres.
 #' 
 #' The modified method in Meredith et al. (2023) adds the following additional
-#' functional groups and coefficients:
+#' functional groups:
 #' 
-#' - Phosphoric acid (-2.23)
-#' - Phosphoric ester (-2.23)
-#' - Sulfate (-2.23)
-#' - Sulfonate (-2.23)
-#' - Thiol (-2.23)
-#' - Carbothioester (-1.20)
+#' Using the same coefficient as nitrate
 #' 
-#' @note The method described in Pankow & Asher (2008) allows for
-#' calculations of logP at different temperatures.  This implementation
-#' currently only calculates values at 20ºC.
+#' - Phosphoric acid
+#' - Phosphoric ester
+#' - Sulfate
+#' - Sulfonate
+#' - Thiol
 #' 
+#' Using the same coefficient as ester
+#' - Carbothioester
+#' 
+#' @note The method described in Pankow & Asher (2008) was developed using data
+#'   between 0 °C and 120 °C, so extrapolating beyond that range is not
+#'   recommended.
+#'
 #' @param fx_groups A data.frame or tibble with counts of functional groups
 #'   produced by [get_fx_groups()] (or manually, with the same column names).
 #' @param meredith Logical; `FALSE`: use the original SIMPOL.1 method. `TRUE`:
 #'   use the modified version in Meredith et al. (2023).
-#'
+#' @param temp_c Numeric; the temperature at which to calculate volatility
+#'   estimates in °C.
 #' @returns The `fx_groups` tibble with the additional `log10_P` column.
 #'
 #' @references 
@@ -50,65 +55,123 @@
 #' sdf <- ChemmineR::read.SDFset(mol_path)
 #' fx_groups <- get_fx_groups(sdf)
 #' simpol1(fx_groups)
-simpol1 <- function(fx_groups, meredith = TRUE) {
-  betas <- fx_groups %>%
+simpol1 <- function(fx_groups, meredith = TRUE, temp_c = 20) {
+  #convert C to K
+  if (temp_c < 0 | temp_c > 120) {
+    warning("Temperatures below 0\u00b0C or above 120\u00b0C extrapolate beyond the range SIMPOL.1 was intended for. \n  Interpret results with caution!")
+  }
+  temp_k <- temp_c + 273.15
+
+  b_k <- calc_bk_simpol1(temp_k)
+
+  contributions <- fx_groups %>%
     # assume NAs are 0s for the sake of this calculation
     dplyr::mutate(dplyr::across(dplyr::where(is.integer), function(x)
       ifelse(is.na(x), 0L, x))) %>%
     # TODO: There is also a table of coefs and equation to calculate b_k(T) at
     # different values of T. Why wasn't this implemented?
+
     dplyr::mutate(
       # multiplier for each functional group is volatility contribution to log10P
       # b_k(T)  * v_k,i
-      b_00 = (1.79     * 1), #b_0(T) is an intercept/constant.  
-      b_01 = (-0.438   * .data$carbons),
-      b_02 = (-0.0338  * .data$carbons_asa),
-      b_03 = (-0.675	 * .data$rings_aromatic),
-      b_04 = (-0.0104  * .data$rings_aliphatic),
-      b_05 = (-0.105   * .data$carbon_dbl_bonds_aliphatic),
-      b_06 = (-0.506   * .data$CCCO_aliphatic_ring),
-      b_07 = (-2.23	   * .data$hydroxyl_aliphatic),
-      b_08 = (-1.35	   * .data$aldehydes),
-      b_09 = (-0.935   * .data$ketones),
-      b_10 = (-3.58	   * .data$carbox_acids),
-      b_11 = (-1.20	   * .data$ester),
-      b_12 = (-0.718   * .data$ether_alkyl),
-      b_13 = (-0.683   * .data$ether_alicyclic),
-      b_14 = (-1.03	   * .data$ether_aromatic),
-      b_15 = (-2.23	   * .data$nitrate),
-      b_16 = (-2.15	   * .data$nitro),
-      b_17 = (-2.14	   * .data$hydroxyl_aromatic),
-      b_18 = (-1.03	   * .data$amine_primary),
-      b_19 = (-0.849	 * .data$amine_secondary),
-      b_20 = (-0.608 	 * .data$amine_tertiary),
-      b_21 = (-1.61    * .data$amine_aromatic),
-      b_22 = (-4.49    * .data$amide_primary),
-      b_23 = (-5.26    * .data$amide_secondary),
-      b_24 = (-2.63    * .data$amide_tertiary),
-      b_25 = (-2.34    * .data$carbonylperoxynitrate),
-      b_26 = (-0.368   * .data$peroxide),
-      b_27 = (-2.48	   * .data$hydroperoxide),
-      b_28 = (-2.48    * .data$carbonylperoxyacid),
-      b_29 = (0.0432 	 * .data$nitrophenol),
-      b_30 = (-2.67	   * .data$nitroester)
+      vb_00 = (b_k["b0"]  * 1), #b_0(T) is an intercept/constant.  
+      vb_01 = (b_k["b1"]  * .data$carbons),
+      vb_02 = (b_k["b2"]  * .data$carbons_asa),
+      vb_03 = (b_k["b3"]	* .data$rings_aromatic),
+      vb_04 = (b_k["b4"]  * .data$rings_aliphatic),
+      vb_05 = (b_k["b5"]  * .data$carbon_dbl_bonds_aliphatic),
+      vb_06 = (b_k["b6"]  * .data$CCCO_aliphatic_ring),
+      vb_07 = (b_k["b7"]  * .data$hydroxyl_aliphatic),
+      vb_08 = (b_k["b8"]  * .data$aldehydes),
+      vb_09 = (b_k["b9"]  * .data$ketones),
+      vb_10 = (b_k["b10"]  * .data$carbox_acids),
+      vb_11 = (b_k["b11"]  * .data$ester),
+      vb_12 = (b_k["b12"]  * .data$ether_alkyl),
+      vb_13 = (b_k["b13"]  * .data$ether_alicyclic),
+      vb_14 = (b_k["b14"]  * .data$ether_aromatic),
+      vb_15 = (b_k["b15"]  * .data$nitrate),
+      vb_16 = (b_k["b16"]  * .data$nitro),
+      vb_17 = (b_k["b17"]  * .data$hydroxyl_aromatic),
+      vb_18 = (b_k["b18"]  * .data$amine_primary),
+      vb_19 = (b_k["b19"]	 * .data$amine_secondary),
+      vb_20 = (b_k["b20"]	 * .data$amine_tertiary),
+      vb_21 = (b_k["b21"]  * .data$amine_aromatic),
+      vb_22 = (b_k["b22"]  * .data$amide_primary),
+      vb_23 = (b_k["b23"]  * .data$amide_secondary),
+      vb_24 = (b_k["b24"]  * .data$amide_tertiary),
+      vb_25 = (b_k["b25"]  * .data$carbonylperoxynitrate),
+      vb_26 = (b_k["b26"]  * .data$peroxide),
+      vb_27 = (b_k["b27"]  * .data$hydroperoxide),
+      vb_28 = (b_k["b28"]  * .data$carbonylperoxyacid),
+      vb_29 = (b_k["b29"]	 * .data$nitrophenol),
+      vb_30 = (b_k["b30"]  * .data$nitroester)
     ) 
 
   if (isTRUE(meredith)) {
-    betas <- betas %>% 
+    contributions <- contributions %>% 
       dplyr::mutate(
         # # Below are additions from Meredith et al.
-        b_32 = (-2.23	  * .data$phosphoric_acids),
-        b_33 = (-2.23	  * .data$phosphoric_esters),
-        b_34 = (-2.23	  * .data$sulfates),
-        b_35 = (-2.23	  * .data$sulfonates),
-        b_36 = (-2.23	  * .data$thiols),
-        b_37 = (-1.20	  * .data$carbothioesters)
+        # use the coef for nitrate for the following groups
+        vb_32 = (b_k["b15"] * .data$phosphoric_acids),
+        vb_33 = (b_k["b15"] * .data$phosphoric_esters),
+        vb_34 = (b_k["b15"] * .data$sulfates),
+        vb_35 = (b_k["b15"] * .data$sulfonates),
+        vb_36 = (b_k["b15"] * .data$thiols),
+        # use the coef for ester for carbothioesters
+        vb_37 = (b_k["b11"] * .data$carbothioesters)
       )
   }
 
-  betas %>% 
+  contributions %>% 
     dplyr::rowwise() %>% 
-    dplyr::mutate(log10_P = sum(dplyr::c_across(dplyr::starts_with("b_")))) %>% 
+    dplyr::mutate(log10_P = sum(dplyr::c_across(dplyr::starts_with("vb_")))) %>% 
     dplyr::ungroup() %>% 
-    dplyr::select(-dplyr::starts_with("b_"))
+    dplyr::select(-dplyr::starts_with("vb_"))
+}
+
+
+#from table 5 of Pankow & Asher
+calc_bk_simpol1 <- function(temp_k = 293.15) {
+  table_5 <- tibble::tribble(
+    ~k, ~B1, ~B2, ~B3, ~B4,
+    0, -4.26938E+02, 2.89223E-01, 4.42057E-03, 2.92846E-01,
+    1, -4.11248E+02, 8.96919E-01, -2.48607E-03, 1.40312E-01,
+    2, -1.46442E+02, 1.54528E+00, 1.71021E-03, -2.78291E-01,
+    3, 3.50262E+01, -9.20839E-01, 2.24399E-03, -9.36300E-02,
+    4, -8.72770E+01, 1.78059E+00, -3.07187E-03, -1.04341E-01,
+    5, 5.73335E+00, 1.69764E-02, -6.28957E-04, 7.55434E-03,
+    6, -2.61268E+02, -7.63282E-01, -1.68213E-03, 2.89038E-01,
+    7, -7.25373E+02, 8.26326E-01, 2.50957E-03, -2.32304E-01,
+    8, -7.29501E+02, 9.86017E-01, -2.92664E-03, 1.78077E-01,
+    9, -1.37456E+01, 5.23486E-01, 5.50298E-04, -2.76950E-01,
+    10, -7.98796E+02, -1.09436E+00, 5.24132E-03, -2.28040E-01,
+    11, -3.93345E+02, -9.51778E-01, -2.19071E-03, 3.05843E-01,
+    12, -1.44334E+02, -1.85617E+00, -2.37491E-05, 2.88290E-01,
+    13, 4.05265E+01, -2.43780E+00, 3.60133E-03, 9.86422E-02,
+    14, -7.07406E+01, -1.06674E+00, 3.73104E-03, -1.44003E-01,
+    15, -7.83648E+02, -1.03439E+00, -1.07148E-03, 3.15535E-01,
+    16, -5.63872E+02, -7.18416E-01, 2.63016E-03, -4.99470E-02,
+    17, -4.53961E+02, -3.26105E-01, -1.39780E-04, -3.93916E-02,
+    18, 3.71375E+01, -2.66753E+00, 1.01483E-03, 2.14233E-01,
+    19, -5.03710E+02, 1.04092E+00, -4.12746E-03, 1.82790E-01,
+    20, -3.59763E+01, -4.08458E-01, 1.67264E-03, -9.98919E-02,
+    21, -6.09432E+02, 1.50436E+00, -9.09024E-04, -1.35495E-01,
+    22, -1.02367E+02, -7.16253E-01, -2.90670E-04, -5.88556E-01,
+    23, -1.93802E+03, 6.48262E-01, 1.73245E-03, 3.47940E-02,
+    24, -5.26919E+00, 3.06435E-01, 3.25397E-03, -6.81506E-01,
+    25, -2.84042E+02, -6.25424E-01, -8.22474E-04, -8.80549E-02,
+    26, 1.50093E+02, 2.39875E-02, -3.37969E-03, 1.52789E-02,
+    27, -2.03387E+01, -5.48718E+00, 8.39075E-03, 1.07884E-01,
+    28, -8.38064E+02, -1.09600E+00, -4.24385E-04, 2.81812E-01,
+    29, -5.27934E+01, -4.63689E-01, -5.11647E-03, 3.84965E-01,
+    30, -1.61520E+03, 9.01669E-01, 1.44536E-03, 2.66889E-01
+  )
+
+  b_k_df <- 
+    dplyr::mutate(table_5, b_k = .data$B1/temp_k + .data$B2 + .data$B3*temp_k + .data$B4*log(temp_k))
+
+  out <- b_k_df$b_k
+  names(out) <- paste0("b", b_k_df$k)
+  #return:
+  out
 }
diff --git a/man/calc_vol.Rd b/man/calc_vol.Rd
diff --git a/man/simpol1.Rd b/man/simpol1.Rd
diff --git a/tests/testthat/test-calc_vol.R b/tests/testthat/test-calc_vol.R
@@ -1,6 +1,6 @@
 test_that("volatility estimate is correct", {
-  ex_vol_df <- calc_vol("data/C16181.mol")
-  expect_equal(round(ex_vol_df$rvi, 6), 6.975349)
+  ex_vol_df <- calc_vol(testthat::test_path("data/C16181.mol"), temp_c = 20)
+  expect_equal(round(ex_vol_df$rvi, 4), 6.9776)
 })
 
 test_that("returns correct columns depending on return arguments", {
@@ -28,11 +28,15 @@ test_that("calc_vol() works with multiple inputs", {
 
 test_that("smiles and .mol give same results", {
   paths <-
-    c("data/C16181.mol",
-      "data/map00361/C00011.mol",
-      "data/map00361/C00042.mol")
+    c(testthat::test_path("data/C16181.mol"),
+      testthat::test_path("data/map00361/C00011.mol"),
+      testthat::test_path("data/map00361/C00042.mol"))
   smiles <-
-    c("C1(C(C(C(C(C1Cl)Cl)Cl)Cl)Cl)O", "O=C=O", "C(CC(=O)O)C(=O)O")
+    c(
+      "beta-2,3,4,5,6-Pentachlorocyclohexanol" = "C1(C(C(C(C(C1Cl)Cl)Cl)Cl)Cl)O",
+      "CO2" = "O=C=O",
+      "Succinate" = "C(CC(=O)O)C(=O)O"
+    )
   expect_equal(
     calc_vol(smiles, from = "smiles") %>% dplyr::select(-name,-smiles),
     calc_vol(paths) %>% dplyr::select(-name,-mol_path)

diff --git a/tests/testthat/test-simpol1.R b/tests/testthat/test-simpol1.R
@@ -15,3 +15,11 @@ test_that("isoprene is more volatile than glucose", {
   expect_gt(log10P_together[1], log10P_together[2])
   expect_equal(log10P_together, log10P_separate)
 })
+
+test_that("changing temperature does something", {
+  isoprene <- ChemmineR::read.SDFset(mol_example()[5])
+  groups <- get_fx_groups(isoprene)
+  c_30 <- simpol1(groups, temp_c = 30)$log10_P
+  c_20 <- simpol1(groups, temp_c = 20)$log10_P
+  expect_gt(c_30, c_20)
+})