Am 13/10/2016 um 17:29 schrieb Claus Hunsen:
To force Codeface to load a previously generated corpus at the start ofAcked-by: Wolfgag Mauerer <wolfgang.mauerer@xxxxxxxxxxxxxxxxx>
the ML analysis, the option '--use-corpus' is introduced to the Codeface
CLI and also the batch interface of the ML analysis.
The option '--use-corpus' is basically '!use.mbox' parameter to the
'codeface/R/ml/analysis.R::gen.forest' method.
If the option is not set, the corpus is (re-)generated from the .mbox
file by default.
Signed-off-by: Claus Hunsen <hunsen@xxxxxxxxxxxxxxxxx>
---
codeface/R/config.r | 5 ++++-
codeface/R/ml/analysis.r | 2 +-
codeface/cli.py | 5 ++++-
codeface/project.py | 4 +++-
4 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/codeface/R/config.r b/codeface/R/config.r
index de59711..2cd60cc 100644
--- a/codeface/R/config.r
+++ b/codeface/R/config.r
@@ -136,7 +136,9 @@ config.from.args <- function(positional.args=list(),
extra.args=list(),
make_option(c("-j", "--jobs"), type="integer", default=1,
help="Number of parallel jobs for analysis"),
make_option("--profile", help="Measure and store profiling data",
- action="store_true", default=FALSE)
+ action="store_true", default=FALSE),
+ make_option("--use-corpus", help="Re-use the corpus file that have been
generated before",
+ dest="use_corpus", action="store_true", default=FALSE)
), extra.args)
## Note that positional_arguments=TRUE even if no positional arguments are
@@ -178,6 +180,7 @@ config.from.args <- function(positional.args=list(),
extra.args=list(),
## Store other options that need to be propagated upwards
conf$profile <- opts$profile
conf$jobs <- opts$jobs
+ conf$use_corpus <- opts$use_corpus
logdebug.config(conf)
return(conf)
diff --git a/codeface/R/ml/analysis.r b/codeface/R/ml/analysis.r
index 17d3f11..5cdd67a 100644
--- a/codeface/R/ml/analysis.r
+++ b/codeface/R/ml/analysis.r
@@ -418,7 +418,7 @@ check.corpus.precon <- function(corp.base) {
dispatch.all <- function(conf, repo.path, resdir) {
loginfo("Starting mailinglist analysis", logger="ml.analysis")
- corp.base <- gen.forest(conf, repo.path, resdir)
+ corp.base <- gen.forest(conf, repo.path, resdir, use.mbox =
!conf$use_corpus)
loginfo("corp.base finished", logger="ml.analysis")
## TODO: When we consider incremental updates, would it make sense
## to just update the corpus, and let all other operations run
diff --git a/codeface/cli.py b/codeface/cli.py
index b5d2719..db95c5c 100644
--- a/codeface/cli.py
+++ b/codeface/cli.py
@@ -89,6 +89,8 @@ def get_parser():
ml_parser.add_argument('-m', '--mailinglist', help="Only run on the "
"specified mailing list (can be specified multiple times)",
default=[], action="append")
+ ml_parser.add_argument('--use-corpus', action="store_true",
+ help="Re-use the corpus file that have been
generated before")
ml_parser.add_argument('resdir',
help="Directory to store analysis results in")
ml_parser.add_argument('mldir',
@@ -126,7 +128,8 @@ def cmd_ml(args):
if logfile:
logfile = os.path.abspath(logfile)
mailinglist_analyse(resdir, mldir, codeface_conf, project_conf,
- args.loglevel, logfile, args.jobs, args.mailinglist)
+ args.loglevel, logfile, args.jobs, args.mailinglist,
+ args.use_corpus)
return 0
def cmd_dynamic(args):
diff --git a/codeface/project.py b/codeface/project.py
index 9143cf0..8b9eeb3 100644
--- a/codeface/project.py
+++ b/codeface/project.py
@@ -194,7 +194,7 @@ def project_analyse(resdir, gitdir, codeface_conf,
project_conf,
log.info("=> Codeface run complete!")
def mailinglist_analyse(resdir, mldir, codeface_conf, project_conf, loglevel,
- logfile, jobs, mailinglists):
+ logfile, jobs, mailinglists, use_corpus):
conf = Configuration.load(codeface_conf, project_conf)
ml_resdir = pathjoin(resdir, conf["project"], "ml")
@@ -205,6 +205,8 @@ def mailinglist_analyse(resdir, mldir, codeface_conf,
project_conf, loglevel,
cmd.extend(("-c", codeface_conf))
cmd.extend(("-p", project_conf))
cmd.extend(("-j", str(jobs)))
+ if (use_corpus):
+ cmd.append("--use-corpus")
cmd.append(ml_resdir)
cmd.append(mldir)
if not mailinglists: