LFQ QC and summarisation to protein-level abundance

Tom Smith





QC peptides= 0.7 # Reduce the column name size ) #> Warning in gplots::heatmap.2(as.matrix(missing_data), col = c("black", -#> "lightgray"), : Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting -#> column dendogram. +#> "lightgray"), : Discrepancy: Colv is FALSE, while dendrogram is `both'. +#> Omitting column dendogram.
Presence/Absence heatmap

Alternative normal programmatically.

 # remotes::install_github("csdaw/uniprotREST")
-res <- uniprotREST::uniprot_map(
+res <- uniprotREST::uniprot_map(
   ids = rownames(prot_robust),
   fields = c("accession", "feature_count", "go_id"),
   format = "tsv",
Alternative normal

Session info

#> R version 4.2.2 (2022-10-31)
#> R version 4.2.3 (2023-03-15)
 #> Platform: x86_64-pc-linux-gnu (64-bit)
-#> Running under: Ubuntu 22.04.1 LTS
+#> Running under: Ubuntu 22.04.2 LTS
 #> Matrix products: default
 #> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
Session info#> [8] base #> #> other attached packages: -#> [1] httr_1.4.4 tibble_3.1.8 dplyr_1.0.10 -#> [4] tidyr_1.2.1 ggplot2_3.4.0 biobroom_1.30.0 -#> [7] broom_1.0.2 gplots_3.1.3 naniar_0.6.1 +#> [1] httr_1.4.5 tibble_3.2.1 dplyr_1.1.1 +#> [4] tidyr_1.3.0 ggplot2_3.4.2 biobroom_1.30.0 +#> [7] broom_1.0.4 gplots_3.1.3 naniar_1.0.0 #> [10] MSnbase_2.24.2 ProtGenerics_1.30.0 mzR_2.32.0 -#> [13] Rcpp_1.0.9 Biobase_2.58.0 Biostrings_2.66.0 -#> [16] GenomeInfoDb_1.34.6 XVector_0.38.0 IRanges_2.32.0 -#> [19] S4Vectors_0.36.1 BiocGenerics_0.44.0 camprotR_0.0.0.9000 +#> [13] Rcpp_1.0.10 Biobase_2.58.0 Biostrings_2.66.0 +#> [16] GenomeInfoDb_1.34.9 XVector_0.38.0 IRanges_2.32.0 +#> [19] S4Vectors_0.36.2 BiocGenerics_0.44.0 camprotR_0.0.0.9000 #> #> loaded via a namespace (and not attached): -#> [1] colorspace_2.0-3 ellipsis_0.3.2 visdat_0.5.3 -#> [4] rprojroot_2.0.3 fs_1.5.2 clue_0.3-63 -#> [7] farver_2.1.1 affyio_1.68.0 bit64_4.0.5 -#> [10] AnnotationDbi_1.60.0 fansi_1.0.3 codetools_0.2-18 -#> [13] ncdf4_1.21 doParallel_1.0.17 cachem_1.0.6 -#> [16] impute_1.72.2 robustbase_0.95-0 knitr_1.41 -#> [19] jsonlite_1.8.4 GO.db_3.16.0 cluster_2.1.4 -#> [22] vsn_3.66.0 png_0.1-8 BiocManager_1.30.19 -#> [25] compiler_4.2.2 backports_1.4.1 assertthat_0.2.1 -#> [28] fastmap_1.1.0 limma_3.54.0 cli_3.6.0 -#> [31] htmltools_0.5.4 tools_4.2.2 gtable_0.3.1 -#> [34] glue_1.6.2 GenomeInfoDbData_1.2.9 affy_1.76.0 -#> [37] rappdirs_0.3.3 MALDIquant_1.22 jquerylib_0.1.4 -#> [40] pkgdown_2.0.7 vctrs_0.5.1 preprocessCore_1.60.1 -#> [43] iterators_1.0.14 xfun_0.36 stringr_1.5.0 -#> [46] lifecycle_1.0.3 iq_1.9.7 gtools_3.9.4 -#> [49] XML_3.99-0.13 DEoptimR_1.0-11 zlibbioc_1.44.0 -#> [52] MASS_7.3-58.1 scales_1.2.1 ragg_1.2.5 -#> [55] pcaMethods_1.90.0 parallel_4.2.2 httr2_0.2.2 -#> [58] yaml_2.3.6 curl_5.0.0 memoise_2.0.1 -#> [61] gridExtra_2.3 UpSetR_1.4.0 sass_0.4.4 -#> [64] RSQLite_2.2.20 stringi_1.7.12 highr_0.10 -#> [67] desc_1.4.2 foreach_1.5.2 checkmate_2.1.0 -#> [70] caTools_1.18.2 BiocParallel_1.32.5 rlang_1.0.6 -#> [73] pkgconfig_2.0.3 systemfonts_1.0.4 bitops_1.0-7 -#> [76] mzID_1.36.0 evaluate_0.19 lattice_0.20-45 -#> [79] purrr_1.0.1 labeling_0.4.2 bit_4.0.5 -#> [82] tidyselect_1.2.0 plyr_1.8.8 magrittr_2.0.3 -#> [85] R6_2.5.1 generics_0.1.3 DBI_1.1.3 -#> [88] pillar_1.8.1 withr_2.5.0 MsCoreUtils_1.10.0 -#> [91] KEGGREST_1.38.0 RCurl_1.98-1.9 crayon_1.5.2 -#> [94] KernSmooth_2.23-20 utf8_1.2.2 rmarkdown_2.19 -#> [97] grid_4.2.2 blob_1.2.3 uniprotREST_0.0.0.9000 -#> [100] digest_0.6.31 textshaping_0.3.6 munsell_0.5.0 -#> [103] bslib_0.4.2

+#> [1] colorspace_2.1-0 visdat_0.6.0 rprojroot_2.0.3 +#> [4] fs_1.6.1 clue_0.3-64 farver_2.1.1 +#> [7] affyio_1.68.0 bit64_4.0.5 AnnotationDbi_1.60.2 +#> [10] fansi_1.0.4 codetools_0.2-19 ncdf4_1.21 +#> [13] doParallel_1.0.17 cachem_1.0.7 impute_1.72.3 +#> [16] robustbase_0.95-1 knitr_1.42 jsonlite_1.8.4 +#> [19] GO.db_3.16.0 cluster_2.1.4 vsn_3.66.0 +#> [22] png_0.1-8 BiocManager_1.30.20 compiler_4.2.3 +#> [25] backports_1.4.1 fastmap_1.1.1 limma_3.54.2 +#> [28] cli_3.6.1 htmltools_0.5.5 tools_4.2.3 +#> [31] gtable_0.3.3 glue_1.6.2 GenomeInfoDbData_1.2.9 +#> [34] affy_1.76.0 rappdirs_0.3.3 MALDIquant_1.22.1 +#> [37] jquerylib_0.1.4 pkgdown_2.0.7 vctrs_0.6.2 +#> [40] preprocessCore_1.60.2 iterators_1.0.14 xfun_0.38 +#> [43] stringr_1.5.0 lifecycle_1.0.3 iq_1.9.10 +#> [46] gtools_3.9.4 XML_3.99-0.14 DEoptimR_1.0-12 +#> [49] zlibbioc_1.44.0 MASS_7.3-58.3 scales_1.2.1 +#> [52] ragg_1.2.5 pcaMethods_1.90.0 parallel_4.2.3 +#> [55] httr2_0.2.2 yaml_2.3.7 curl_5.0.0 +#> [58] memoise_2.0.1 gridExtra_2.3 UpSetR_1.4.0 +#> [61] sass_0.4.5 stringi_1.7.12 RSQLite_2.3.1 +#> [64] highr_0.10 desc_1.4.2 foreach_1.5.2 +#> [67] checkmate_2.1.0 caTools_1.18.2 BiocParallel_1.32.6 +#> [70] rlang_1.1.0 pkgconfig_2.0.3 systemfonts_1.0.4 +#> [73] bitops_1.0-7 mzID_1.36.0 evaluate_0.20 +#> [76] lattice_0.21-8 purrr_1.0.1 labeling_0.4.2 +#> [79] bit_4.0.5 tidyselect_1.2.0 plyr_1.8.8 +#> [82] magrittr_2.0.3 R6_2.5.1 generics_0.1.3 +#> [85] DBI_1.1.3 pillar_1.9.0 withr_2.5.0 +#> [88] MsCoreUtils_1.10.0 KEGGREST_1.38.0 RCurl_1.98-1.12 +#> [91] crayon_1.5.2 KernSmooth_2.23-20 utf8_1.2.3 +#> [94] rmarkdown_2.21 grid_4.2.3 blob_1.2.4 +#> [97] uniprotREST_1.0.0 digest_0.6.31 textshaping_0.3.6 +#> [100] munsell_0.5.0 bslib_0.4.2

Processing and QC of SILAC data

Tom Smith





Processing and QC of SILAC psm_matched_data[[psm_name]] %>% mutate(replicate = rep_n) }) %>% - bind_rows() + bind_rows() #> [1] "1" #> [1] "2" #> [1] "3" diff --git a/articles/TMT_PSM_QC_Summarisation.html b/articles/TMT_PSM_QC_Summarisation.html index 474c7b2..a40eb15 100644 --- a/articles/TMT_PSM_QC_Summarisation.html +++ b/articles/TMT_PSM_QC_Summarisation.html @@ -84,7 +84,7 @@

TMT QC PSM-level quantification and summarisation to protein-level abundance

Tom Smith





cRAP databases

Charlotte Dawson





Cambridge Centre for Prot
 ccp_tmp <- tempfile(fileext = ".fasta")
 download_ccp_crap(ccp_tmp, is_crap = TRUE, verbose = TRUE)
-#> Downloading from UniProtKB release: 2022_05
+#> Downloading from UniProtKB release: 2024_03

We can load this FASTA file into R and take a look.

 ccp_crap <- Biostrings::readAAStringSet(ccp_tmp)
Make your own cRAP databasemake_fasta(accessions = c("P00776", "P00777", "P80561"), file = griseus_tmp, is_crap = FALSE) -#> Downloading from UniProtKB release: 2022_05

+#> Downloading from UniProtKB release: 2024_03

Before we add these to our CCP cRAP FASTA, we just need to take note of what cRAP number the sequences headers stop at.

Working with MSnSets

Charlotte Dawson





Exploring an MSnSet#> ..@ experimentData :Formal class 'MIAPE' [package "MSnbase"] with 30 slots #> ..@ processingData :Formal class 'MSnProcess' [package "MSnbase"] with 10 slots #> ..@ qual :'data.frame': 220 obs. of 7 variables: -#> ..@ assayData :<environment: 0x55987b14f028> +#> ..@ assayData :<environment: 0x55d014466db0> #> ..@ phenoData :Formal class 'AnnotatedDataFrame' [package "Biobase"] with 4 slots #> ..@ featureData :Formal class 'AnnotatedDataFrame' [package "Biobase"] with 4 slots #> ..@ annotation : chr "No annotation" @@ -444,7 +444,7 @@

Make the MSnSet#> ..@ processingData :Formal class 'MSnProcess' [package "MSnbase"] with 10 slots #> ..@ qual :'data.frame': 0 obs. of 0 variables #> Formal class 'data.frame' [package "methods"] with 4 slots -#> ..@ assayData :<environment: 0x55988e413a10> +#> ..@ assayData :<environment: 0x55d0271dc2a8> #> ..@ phenoData :Formal class 'AnnotatedDataFrame' [package "Biobase"] with 4 slots #> ..@ featureData :Formal class 'AnnotatedDataFrame' [package "Biobase"] with 4 slots #> ..@ annotation : chr(0) diff --git a/authors.html b/authors.html index 00d0f51..c5e6002 100644 --- a/authors.html +++ b/authors.html @@ -78,14 +78,14 @@



Dawson C, Smith T (2023). +

Dawson C, Smith T (2024). camprotR: Processing, analysing and visualising CCP proteomics data. R package version

   title = {camprotR: Processing, analysing and visualising CCP proteomics data},
   author = {Charlotte Dawson and Tom Smith},
-  year = {2023},
+  year = {2024},
   note = {R package version},
# print release number to console
-#> [1] "2022_05"
+#> [1] "2024_03"
 # save release number and use in e.g. a file name
 rls <- check_uniprot_release()
 paste0("folder/filename_", rls, ".fasta")
-#> [1] "folder/filename_2022_05.fasta"
+#> [1] "folder/filename_2024_03.fasta"
diff --git a/reference/count_features_per_protein.html b/reference/count_features_per_protein.html index ca3d4c1..3d63bd9 100644 --- a/reference/count_features_per_protein.html +++ b/reference/count_features_per_protein.html @@ -121,7 +121,7 @@


#> 8 126 "A1X283" 1 #> 9 126 "A3KMH1" 1 #> 10 126 "A3KN83" 1 -#> # … with 23,532 more rows +#> # ℹ 23,532 more rows