[codeface] [PATCH] Change accessor to document metadata

  • From: Mitchell Joblin <joblin.m@xxxxxxxxx>
  • To: codeface@xxxxxxxxxxxxx
  • Date: Sun, 3 Aug 2014 23:17:34 +0200

- The latest version of tm provides a new function to access
  the document metadata

- The data structures for storing the document metadata are now
  different and the old method using attr(..) or old
  accessor functions no longer works

Signed-off-by: Mitchell Joblin <mitchell.joblin.ext@xxxxxxxxxxx>
---
 codeface/R/ml/analysis.r     | 10 +++++-----
 codeface/R/ml/project.spec.r |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/codeface/R/ml/analysis.r b/codeface/R/ml/analysis.r
index 8befdfb..7c7dc2c 100644
--- a/codeface/R/ml/analysis.r
+++ b/codeface/R/ml/analysis.r
@@ -193,12 +193,12 @@ check.corpus.precon <- function(corp.base) {
   ## Preconditions
   ######
   ## Condition #1: Emails must have at most one reference Id
-  get.ref.id.lines <- function(x) { grep("^References:", attr(x, "Header"),
+  get.ref.id.lines <- function(x) { grep("^References:", meta(x, tag="header"),
                                     value = FALSE, useBytes = TRUE)}
   rmv.multi.refs <- function(doc) {
                       ref.id.lines <- get.ref.id.lines(doc)
                       rmv.lines <- ref.id.lines[-1]
-                      header <- attr(doc, "Header")
+                      header <- meta(doc, tag="header")
 
                       if(length(rmv.lines) != 0) {
                         ## Log number of removed reference id lines
@@ -215,7 +215,7 @@ check.corpus.precon <- function(corp.base) {
 
   ## Condition #2: Authors must be specified using "name <email>" format
   fix.author <- function(doc) {
-    author <- attr(doc, "Author")
+    author <- meta(doc, tag="author")
 
     if(identical(author, character(0))) {
       author <- "unknown"
@@ -267,8 +267,8 @@ check.corpus.precon <- function(corp.base) {
   ## Apply checks of conditions to all documents
   fix.corpus <- function(i) {
     doc <- corp.base$corp[[i]]
-    attr(doc, "Header") <- rmv.multi.refs(doc)
-    attr(doc, "Author") <- fix.author(doc)
+    meta(doc, tag="header") <- rmv.multi.refs(doc)
+    meta(doc, tag="author") <- fix.author(doc)
 
     return(doc)
   }
diff --git a/codeface/R/ml/project.spec.r b/codeface/R/ml/project.spec.r
index ad47b53..b9be2a2 100644
--- a/codeface/R/ml/project.spec.r
+++ b/codeface/R/ml/project.spec.r
@@ -18,14 +18,14 @@
 ## TODO: Make this systematic, for instance via filter classes
 linux.kernel.preprocess <- function(corp) {
   # Remove anything sent by a tip bot
-  authors <- sapply(corp, Author)
+  authors <- sapply(corp, meta, tag='author')
   tip.bot <- grep("tip-bot for ", authors, fixed=TRUE, useBytes=TRUE)
   if (length(tip.bot) > 0)
       corp <- corp[-tip.bot]
 
   # Remove all git pull requests (don't remove any follow-up messages,
   # they likely contain discussions).
-  headings <- tolower(sapply(corp, Heading))
+  headings <- tolower(sapply(corp, meta, tag='heading'))
   pull.req <- grep("^\\[git pull", headings)
   if (length(pull.req) > 0)
     corp <- corp[-pull.req]
-- 
1.9.1


Other related posts: