fixFeatureIds.Rd
Define various types of feature IDs based on the information in the
rowData
of sce
.
fixFeatureIds(
sce,
colDefs = list(einprotId = function(df) {
combineIds(df, combineCols =
c("Gene.names", "Majority.protein.IDs"), combineWhen = "nonunique", splitSeparator =
";", joinSeparator = ".", makeUnique = TRUE)
}, einprotLabel = function(df) {
combineIds(df, combineCols = c("Gene.names", "Majority.protein.IDs"), combineWhen =
"nonunique", splitSeparator = ";", joinSeparator = ".", makeUnique = FALSE)
},
einprotGene = function(df) {
getFirstId(df, colName = "Gene.names", separator =
";")
}, einprotProtein = "Majority.protein.IDs", IDsForSTRING = function(df) {
combineIds(df, c("Gene.names", "Majority.protein.IDs"), combineWhen = "missing",
splitSeparator = ";", joinSeparator = ".", makeUnique = FALSE)
})
)
A SummarizedExperiment
object (or derivative).
Named list defining how each new column should be defined.
The names will be used as the column names. Each entry can be either
a character vector of column names in rowData(sce)
, in which case
the corresponding feature ID is generated by simply concatenating
the values in these columns, or a function with one input argument
(a data.frame, corresponding to rowData(sce)
), returning a
character vector corresponding to the desired feature IDs.
An object of the same type as sce
with additional columns
in rowData(sce)
.
sce <- importExperiment(system.file("extdata", "mq_example",
"1356_proteinGroups.txt",
package = "einprot"),
iColPattern = "^iBAQ\\.")$sce
sce <- fixFeatureIds(
sce,
colDefs = list(
einprotId = function(df) combineIds(df, combineCols = c("Gene.names",
"Majority.protein.IDs")),
einprotLabel = c("Gene.names", "Majority.protein.IDs"),
einprotGene = function(df) getFirstId(df, "Gene.names"),
einprotProtein = "Majority.protein.IDs",
IDsForSTRING = function(df) combineIds(df, c("Gene.names",
"Majority.protein.IDs"),
combineWhen = "missing",
makeUnique = FALSE))
)
head(SummarizedExperiment::rowData(sce)$einprotId)
#> [1] "RBM8" "Dhx9" "Zmynd8" "Krt10" "Zmym4" "Rlf"
head(SummarizedExperiment::rowData(sce)$einprotLabel)
#> [1] "RBM8;Rbm8a.A0A023T672;Q9CWZ3-2;Q9CWZ3"
#> [2] "Dhx9.A0A087WPL5;E9QNN1;O70133;O70133-2;O70133-3;Q3UR42"
#> [3] "Zmynd8.Q3UH28;Q3U1M7;A2A483;E9Q8D1;A2A482;A2A484;A2A485"
#> [4] "Krt10.A2A513;CON__P02535-1;P02535-3;P02535-2;P02535"
#> [5] "Zmym4.A2A791;A2A791-2;F6VYE2"
#> [6] "Rlf.A2A7F4;E9Q532"
head(SummarizedExperiment::rowData(sce)$einprotGene)
#> [1] "RBM8" "Dhx9" "Zmynd8" "Krt10" "Zmym4" "Rlf"
head(SummarizedExperiment::rowData(sce)$einprotProtein)
#> [1] "A0A023T672;Q9CWZ3-2;Q9CWZ3"
#> [2] "A0A087WPL5;E9QNN1;O70133;O70133-2;O70133-3;Q3UR42"
#> [3] "Q3UH28;Q3U1M7;A2A483;E9Q8D1;A2A482;A2A484;A2A485"
#> [4] "A2A513;CON__P02535-1;P02535-3;P02535-2;P02535"
#> [5] "A2A791;A2A791-2;F6VYE2"
#> [6] "A2A7F4;E9Q532"
head(SummarizedExperiment::rowData(sce)$IDsForSTRING)
#> [1] "RBM8" "Dhx9" "Zmynd8" "Krt10" "Zmym4" "Rlf"