Define various ID columns — fixFeatureIds • einprot

Define various types of feature IDs based on the information in the rowData of sce.

fixFeatureIds(
  sce,
  colDefs = list(einprotId = function(df) {
     combineIds(df, combineCols =
    c("Gene.names", "Majority.protein.IDs"), combineWhen = "nonunique", splitSeparator =
    ";", joinSeparator = ".", makeUnique = TRUE)
 }, einprotLabel = function(df) {
    
    combineIds(df, combineCols = c("Gene.names", "Majority.protein.IDs"), combineWhen =
    "nonunique", splitSeparator = ";", joinSeparator = ".", makeUnique = FALSE)
 },
    einprotGene = function(df) {
     getFirstId(df, colName = "Gene.names", separator =
    ";")
 }, einprotProtein = "Majority.protein.IDs", IDsForSTRING = function(df) {
    
    combineIds(df, c("Gene.names", "Majority.protein.IDs"), combineWhen = "missing",
    splitSeparator = ";", joinSeparator = ".", makeUnique = FALSE)
 })
)

Arguments

sce: A SummarizedExperiment object (or derivative).
colDefs: Named list defining how each new column should be defined. The names will be used as the column names. Each entry can be either a character vector of column names in rowData(sce), in which case the corresponding feature ID is generated by simply concatenating the values in these columns, or a function with one input argument (a data.frame, corresponding to rowData(sce)), returning a character vector corresponding to the desired feature IDs.

Value

An object of the same type as sce with additional columns in rowData(sce).

Author

Charlotte Soneson

Examples

sce <- importExperiment(system.file("extdata", "mq_example",
                                    "1356_proteinGroups.txt",
                                    package = "einprot"),
                        iColPattern = "^iBAQ\\.")$sce
sce <- fixFeatureIds(
    sce,
    colDefs = list(
        einprotId = function(df) combineIds(df, combineCols = c("Gene.names",
                                           "Majority.protein.IDs")),
        einprotLabel = c("Gene.names", "Majority.protein.IDs"),
        einprotGene = function(df) getFirstId(df, "Gene.names"),
        einprotProtein = "Majority.protein.IDs",
        IDsForSTRING = function(df) combineIds(df, c("Gene.names",
                                                     "Majority.protein.IDs"),
                                               combineWhen = "missing",
                                               makeUnique = FALSE))
)
head(SummarizedExperiment::rowData(sce)$einprotId)
#> [1] "RBM8"   "Dhx9"   "Zmynd8" "Krt10"  "Zmym4"  "Rlf"   
head(SummarizedExperiment::rowData(sce)$einprotLabel)
#> [1] "RBM8;Rbm8a.A0A023T672;Q9CWZ3-2;Q9CWZ3"                  
#> [2] "Dhx9.A0A087WPL5;E9QNN1;O70133;O70133-2;O70133-3;Q3UR42" 
#> [3] "Zmynd8.Q3UH28;Q3U1M7;A2A483;E9Q8D1;A2A482;A2A484;A2A485"
#> [4] "Krt10.A2A513;CON__P02535-1;P02535-3;P02535-2;P02535"    
#> [5] "Zmym4.A2A791;A2A791-2;F6VYE2"                           
#> [6] "Rlf.A2A7F4;E9Q532"                                      
head(SummarizedExperiment::rowData(sce)$einprotGene)
#> [1] "RBM8"   "Dhx9"   "Zmynd8" "Krt10"  "Zmym4"  "Rlf"   
head(SummarizedExperiment::rowData(sce)$einprotProtein)
#> [1] "A0A023T672;Q9CWZ3-2;Q9CWZ3"                       
#> [2] "A0A087WPL5;E9QNN1;O70133;O70133-2;O70133-3;Q3UR42"
#> [3] "Q3UH28;Q3U1M7;A2A483;E9Q8D1;A2A482;A2A484;A2A485" 
#> [4] "A2A513;CON__P02535-1;P02535-3;P02535-2;P02535"    
#> [5] "A2A791;A2A791-2;F6VYE2"                           
#> [6] "A2A7F4;E9Q532"                                    
head(SummarizedExperiment::rowData(sce)$IDsForSTRING)
#> [1] "RBM8"   "Dhx9"   "Zmynd8" "Krt10"  "Zmym4"  "Rlf"