- The latest version of tm provides a new function to access the document metadata - The data structures for storing the document metadata are now different and the old method using attr(..) or old accessor functions no longer works Signed-off-by: Mitchell Joblin <mitchell.joblin.ext@xxxxxxxxxxx> --- codeface/R/ml/analysis.r | 10 +++++----- codeface/R/ml/project.spec.r | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/codeface/R/ml/analysis.r b/codeface/R/ml/analysis.r index 8befdfb..7c7dc2c 100644 --- a/codeface/R/ml/analysis.r +++ b/codeface/R/ml/analysis.r @@ -193,12 +193,12 @@ check.corpus.precon <- function(corp.base) { ## Preconditions ###### ## Condition #1: Emails must have at most one reference Id - get.ref.id.lines <- function(x) { grep("^References:", attr(x, "Header"), + get.ref.id.lines <- function(x) { grep("^References:", meta(x, tag="header"), value = FALSE, useBytes = TRUE)} rmv.multi.refs <- function(doc) { ref.id.lines <- get.ref.id.lines(doc) rmv.lines <- ref.id.lines[-1] - header <- attr(doc, "Header") + header <- meta(doc, tag="header") if(length(rmv.lines) != 0) { ## Log number of removed reference id lines @@ -215,7 +215,7 @@ check.corpus.precon <- function(corp.base) { ## Condition #2: Authors must be specified using "name <email>" format fix.author <- function(doc) { - author <- attr(doc, "Author") + author <- meta(doc, tag="author") if(identical(author, character(0))) { author <- "unknown" @@ -267,8 +267,8 @@ check.corpus.precon <- function(corp.base) { ## Apply checks of conditions to all documents fix.corpus <- function(i) { doc <- corp.base$corp[[i]] - attr(doc, "Header") <- rmv.multi.refs(doc) - attr(doc, "Author") <- fix.author(doc) + meta(doc, tag="header") <- rmv.multi.refs(doc) + meta(doc, tag="author") <- fix.author(doc) return(doc) } diff --git a/codeface/R/ml/project.spec.r b/codeface/R/ml/project.spec.r index ad47b53..b9be2a2 100644 --- a/codeface/R/ml/project.spec.r +++ b/codeface/R/ml/project.spec.r @@ -18,14 +18,14 @@ ## TODO: Make this systematic, for instance via filter classes linux.kernel.preprocess <- function(corp) { # Remove anything sent by a tip bot - authors <- sapply(corp, Author) + authors <- sapply(corp, meta, tag='author') tip.bot <- grep("tip-bot for ", authors, fixed=TRUE, useBytes=TRUE) if (length(tip.bot) > 0) corp <- corp[-tip.bot] # Remove all git pull requests (don't remove any follow-up messages, # they likely contain discussions). - headings <- tolower(sapply(corp, Heading)) + headings <- tolower(sapply(corp, meta, tag='heading')) pull.req <- grep("^\\[git pull", headings) if (length(pull.req) > 0) corp <- corp[-pull.req] -- 1.9.1