- The duplicate ids causes problems when we convert the corpus
into a data frame before storing to the database because ids
are used as row names which must be unique
Signed-off-by: Mitchell Joblin <mitchell.joblin.ext@xxxxxxxxxxx>
---
codeface/R/ml/analysis.r | 2 ++
1 file changed, 2 insertions(+)
diff --git a/codeface/R/ml/analysis.r b/codeface/R/ml/analysis.r
index c1c9c5e..8203002 100644
--- a/codeface/R/ml/analysis.r
+++ b/codeface/R/ml/analysis.r
@@ -417,6 +417,8 @@ dispatch.all <- function(conf, repo.path, resdir) {
## NOTE: We only compute the forest for the complete interval to allow for
creating
## descriptive statistics.
corp <- corp.base$corp
+ ## Remove duplicate mails
+ corp <- corp[!duplicated(meta(corp, "id"))]
## NOTE: conf must be present in the defining scope
do.normalise.bound <- function(authors) {
--
2.1.4