Performs the correction of LiP-peptides for changes in protein abundance and calculates their significance using a t-test. This function was implemented based on the MSstatsLiP package developed by the Vitek lab.

correct_lip_for_abundance(
  lip_data,
  trp_data,
  protein_id,
  grouping,
  comparison = comparison,
  diff = diff,
  n_obs = n_obs,
  std_error = std_error,
  p_adj_method = "BH",
  retain_columns = NULL,
  method = c("satterthwaite", "no_df_approximation")
)

Arguments

lip_data

a data frame containing at least the input variables. Ideally, the result from the calculate_diff_abundance function is used.

trp_data

a data frame containing at least the input variables minus the grouping column. Ideally, the result from the calculate_diff_abundance function is used.

protein_id

a character column in the lip_data and trp_data data frames that contains protein identifiers.

grouping

a character column in the lip_data data frame that contains precursor or peptide identifiers.

comparison

a character column in the lip_data and trp_data data frames that contains the comparisons between conditions.

diff

a numeric column in the lip_data and trp_data data frames that contains log2-fold changes for peptide or protein quantities.

n_obs

a numeric column in the lip_data and trp_data data frames containing the number of observations used to calculate fold changes.

std_error

a numeric column in the lip_data and trp_data data frames containing the standard error of fold changes.

p_adj_method

a character value, specifies the p-value correction method. Possible methods are c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"). Default method is "BH".

retain_columns

a vector indicating if certain columns should be retained from the input data frame. Default is not retaining additional columns retain_columns = NULL. Specific columns can be retained by providing their names (not in quotations marks, just like other column names, but in a vector). Please note that if you retain columns that have multiple rows per grouped variable there will be duplicated rows in the output.

method

a character value, specifies the method used to estimate the degrees of freedom. Possible methods are c("satterthwaite", "no_df_approximation"). satterthwaite uses the Welch-Satterthwaite equation to estimate the pooled degrees of freedom, as described in https://doi.org/10.1016/j.mcpro.2022.100477 and implemented in the MSstatsLiP package. This approach respects the number of protein measurements for the degrees of freedom. no_df_approximation just takes the number of peptides into account when calculating the degrees of freedom.

Value

a data frame containing corrected differential abundances (adj_diff, adjusted standard errors (adj_std_error), degrees of freedom (df), pvalues (pval) and adjusted p-values (adj_pval)

Author

Aaron Fehr

Examples


# Load libraries

library(dplyr)

# Load example data and simulate tryptic data by summing up precursors

data <- rapamycin_10uM

data_trp <- data %>%
  dplyr::group_by(pg_protein_accessions, r_file_name) %>%
  dplyr::mutate(pg_quantity = sum(fg_quantity)) %>%
  dplyr::distinct(
    r_condition,
    r_file_name,
    pg_protein_accessions,
    pg_quantity
  )


# Calculate differential abundances for LiP and Trp data

diff_lip <- data %>%
  dplyr::mutate(fg_intensity_log2 = log2(fg_quantity)) %>%
  assign_missingness(
    sample = r_file_name,
    condition = r_condition,
    intensity = fg_intensity_log2,
    grouping = eg_precursor_id,
    ref_condition = "control",
    retain_columns = "pg_protein_accessions"
  ) %>%
  calculate_diff_abundance(
    sample = r_file_name,
    condition = r_condition,
    grouping = eg_precursor_id,
    intensity_log2 = fg_intensity_log2,
    comparison = comparison,
    method = "t-test",
    retain_columns = "pg_protein_accessions"
  )
#> [1/2] Create input for t-tests ... 
#> DONE
#> [2/2] Calculate t-tests ... 
#> DONE


diff_trp <- data_trp %>%
  dplyr::mutate(pg_intensity_log2 = log2(pg_quantity)) %>%
  assign_missingness(
    sample = r_file_name,
    condition = r_condition,
    intensity = pg_intensity_log2,
    grouping = pg_protein_accessions,
    ref_condition = "control"
  ) %>%
  calculate_diff_abundance(
    sample = r_file_name,
    condition = r_condition,
    grouping = pg_protein_accessions,
    intensity_log2 = pg_intensity_log2,
    comparison = comparison,
    method = "t-test"
  )
#> [1/2] Create input for t-tests ... 
#> DONE
#> [2/2] Calculate t-tests ... 
#> DONE

# Correct for abundance changes

corrected <- correct_lip_for_abundance(
  lip_data = diff_lip,
  trp_data = diff_trp,
  protein_id = pg_protein_accessions,
  grouping = eg_precursor_id,
  retain_columns = c("missingness"),
  method = "satterthwaite"
)

head(corrected, n = 10)
#> # A tibble: 10 × 15
#>    missingness comparison         pg_protein_accessions eg_precursor_id diff_pep
#>    <chr>       <chr>              <chr>                 <chr>              <dbl>
#>  1 complete    rapamycin_vs_cont… P62942                _RGQTC[Carbami…     2.73
#>  2 complete    rapamycin_vs_cont… P62942                _LVFDVELLK_.2      -2.15
#>  3 complete    rapamycin_vs_cont… P62942                _GWEEGVAQMSVGQ…     2.88
#>  4 complete    rapamycin_vs_cont… P62942                _VFDVELLKLE_.2     -2.09
#>  5 MAR         rapamycin_vs_cont… P62942                _VFDVELLK_.2       -2.11
#>  6 complete    rapamycin_vs_cont… P62942                _GWEEGVAQMSVGQ…     2.77
#>  7 complete    rapamycin_vs_cont… P62942                _LTISPDYAYGAT_…    -2.12
#>  8 complete    rapamycin_vs_cont… P62942                _RGQTC[Carbami…     2.43
#>  9 complete    rapamycin_vs_cont… Q16881                _DYDLIIIGGGSGG…    -1.03
#> 10 complete    rapamycin_vs_cont… P62942                _LVFDVELLKLE_.2    -2.09
#> # ℹ 10 more variables: n_obs_pep <int>, std_error_pep <dbl>, diff_prot <dbl>,
#> #   n_obs_prot <int>, std_error_prot <dbl>, adj_diff <dbl>,
#> #   adj_std_error <dbl>, df <dbl>, pval <dbl>, adj_pval <dbl>