Am 16/02/2016 um 13:40 schrieb Andreas Ringlstetter:
From: Benjamin Hiefner <benjamin.hiefner@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Benjamin Hiefner <benjamin.hiefner@xxxxxxxxxxxxxxxxxxxx>
---
codeface/util.py | 195
+++++++++++++++++++++++++++++++++----------------------
1 file changed, 118 insertions(+), 77 deletions(-)
diff --git a/codeface/util.py b/codeface/util.py
index b885254..4557d51 100644
--- a/codeface/util.py
+++ b/codeface/util.py
@@ -1,23 +1,23 @@
-## This file is part of Codeface. Codeface is free software: you can
-## redistribute it and/or modify it under the terms of the GNU General Public
-## License as published by the Free Software Foundation, version 2.
-##
-## This program is distributed in the hope that it will be useful, but
WITHOUT
-## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS
-## FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
-## details.
-##
-## You should have received a copy of the GNU General Public License
-## along with this program; if not, write to the Free Software
-## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-##
-## Copyright 2013 by Siemens AG, Wolfgang Mauerer
<wolfgang.mauerer@xxxxxxxxxxx>
-## All Rights Reserved.
-'''
+# This file is part of Codeface. Codeface is free software: you can
+# redistribute it and/or modify it under the terms of the GNU General Public
+# License as published by the Free Software Foundation, version 2.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# Copyright 2013 by Siemens AG, Wolfgang Mauerer
<wolfgang.mauerer@xxxxxxxxxxx>
+# All Rights Reserved.
+"""
Utility functions for running external commands
-'''
+"""
-import logging; log = logging.getLogger(__name__)
+import logging
import os
import re
import shutil
@@ -27,8 +27,7 @@ import traceback
from collections import OrderedDict, namedtuple
from glob import glob
from math import sqrt
-from multiprocessing import Process, Queue, JoinableQueue, Lock
-from pickle import dumps, PicklingError
+from multiprocessing import Process, Queue, Lock
from pkg_resources import resource_filename
from subprocess import Popen, PIPE
from tempfile import NamedTemporaryFile, mkdtemp
@@ -39,15 +38,19 @@ from datetime import timedelta, datetime
# Represents a job submitted to the batch pool.
BatchJobTuple = namedtuple('BatchJobTuple', ['id', 'func', 'args', 'kwargs',
- 'deps', 'startmsg', 'endmsg'])
+ 'deps', 'startmsg', 'endmsg'])
+log = logging.getLogger(__name__)
+
+
class BatchJob(BatchJobTuple):
def __init__(self, *args, **kwargs):
super(BatchJob, self).__init__(*args, **kwargs)
self.done = False
self.submitted = False
+
class BatchJobPool(object):
- '''
+ """
Implementation of a dependency-respecting batch pool
This system uses a pool of N worker processes to run jobs. Since the
@@ -63,12 +66,13 @@ class BatchJobPool(object):
Call pool.join() to start execution and wait until all jobs are complete.
If a work item raises an exception, the join() will terminate with
that exception, if pickleable, or a generic Exception if otherwise.
- '''
+ """
def __init__(self, n_cores):
self.n_cores = n_cores
self.next_id = 1
- self.jobs = OrderedDict() # Dictionary of jobs (ordered for
repeatability)
+ # Dictionary of jobs (ordered for repeatability)
+ self.jobs = OrderedDict()
# Initialize workers and their work and done queues
self.work_queue, self.done_queues, self.workers = Queue(), [], []
@@ -79,32 +83,33 @@ class BatchJobPool(object):
# n_core is 1.
for i in range(n_cores):
dq = Queue()
- w = Process(target=batchjob_worker_function,
args=(self.work_queue, dq))
+ w = Process(target=batchjob_worker_function,
+ args=(self.work_queue, dq))
self.done_queues.append(dq)
self.workers.append(w)
w.start()
def _is_ready(self, job):
- '''Returns true if the job is ready for submission'''
+ """Returns true if the job is ready for submission"""
if job.done or job.submitted:
return False
return all(self.jobs[j].done for j in job.deps if j is not None)
def _submit(self, job):
- '''Submit the job if it is ready'''
+ """Submit the job if it is ready"""
if self._is_ready(job):
self.work_queue.put(job)
job.submitted = True
def add(self, func, args, kwargs={}, deps=(), startmsg=None,
endmsg=None):
- '''
+ """
Add a job that executes func(*args, **kwargs) and depends on the
jobs with the ids listed in deps.
This function returns a job ID which can be used as a dependency
in other calls to add.
If n_cores is 1; this call immediately executes the given function
and returns None
- '''
+ """
if self.n_cores == 1:
log.info(startmsg)
func(*args, **kwargs)
@@ -117,9 +122,9 @@ class BatchJobPool(object):
return job_id
def join(self):
- '''
+ """
Submit jobs and wait for all jobs to finish.
- '''
+ """
try:
while not all(j.done for j in self.jobs.values()):
# Put jobs that are ready onto the work queue
@@ -137,7 +142,7 @@ class BatchJobPool(object):
if isinstance(res, Exception):
log.fatal("Uncaught exception in worker thread:")
raise res
- log.debug("Job {} has finished!".format(res))
+ log.debug("Job %s has finished!", res)
self.jobs[res].done = Truework_queue, not done_queue. Please add at least a little bit of
# Check if workers died
for w in self.workers:
@@ -154,14 +159,19 @@ class BatchJobPool(object):
w.terminate()
log.devinfo("Workers terminated.")
+
def batchjob_worker_function(work_queue, done_queue):
- '''
+ """
Worker function executed in a separate process.
This function pulls work items off the work queue; terminates if there
is no item for 0.5s; otherwise executes the work item. Any exception
is reraised after putting a None onto the done_queue (triggering an
exception in the main process)
- '''
+
+ Args:
+ done_queue:
+ done_queue:
+ """same as before (goes for all other string format operations below)
# Silently quit on CTRL+C
signal.signal(signal.SIGINT, handle_sigint_silent)
while True:
@@ -170,19 +180,20 @@ def batchjob_worker_function(work_queue, done_queue):
except ValueError as ve:
# This happens when the main loop stops before we do
return
- log.debug("Starting job id {}".format(job.id))
+ log.debug("Starting job id %s", job.id)
try:
if job.startmsg:
log.info(job.startmsg)
job.func(*job.args, **job.kwargs)
if job.endmsg:
log.info(job.endmsg)
- log.debug("Finished work id {}".format(job.id))
+ log.debug("Finished work id %s", job.id)
done_queue.put(job.id)
except Exception as e:
- log.debug("Failed work id {}".format(job.id))
+ log.debug("Failed work id %s", job.id)
done_queue.put(Exception(e.__class__.__name__ + ": " +
- str(e) + "\n" + traceback.format_exc()))
+ str(e) + "\n" + traceback.format_exc()))
+
# Function to dump the stacks of all threads
def get_stack_dump():
@@ -190,16 +201,19 @@ def get_stack_dump():
code = ["Stack dump:"]
for threadId, stack in sys._current_frames().items():
code.append("")
- code.append("# Thread: %s(%d)" % (id2name.get(threadId,""),
threadId))
+ code.append("# Thread: %s(%d)" % (id2name.get(threadId, ""),
threadId))
for filename, lineno, name, line in traceback.extract_stack(stack):
code.append('File: "%s", line %d, in %s' % (filename, lineno,
name))
if line:
code.append(" %s" % (line.strip()))
return code
+
# Signal handler that dumps all stacks and terminates
# Lock l dis-interleaves the stack traces of processes
l = Lock()
+
+
def handle_sigint(signal, frame):
with l:
log.fatal("CTRL-C pressed!")
@@ -210,6 +224,7 @@ def handle_sigint(signal, frame):
# For the main thread, this is what we want.
sys.exit(-1)
+
# Signal handler that dumps all stacks and terminates silently
# Also uses the Lock l to dis-interleave the stack traces
def handle_sigint_silent(signal, frame):
@@ -222,6 +237,7 @@ def handle_sigint_silent(signal, frame):
# otherwise the worker try/catch will also catch the SystemExit
os.exit_(-1)
+
def handle_sigterm(signal, frame):
# Since we want to terminate worker threads with prejudice,
# we use os._exit, which directly terminates the process.
@@ -229,10 +245,12 @@ def handle_sigterm(signal, frame):
logging.shutdown()
os._exit(-1)
+
def handle_sigusr1(signal, frame):
for c in get_stack_dump():
log.info(c)
+
# Dump all the stacks in case of CTRL-C
signal.signal(signal.SIGINT, handle_sigint)
# Also dump on sigterm
@@ -240,16 +258,27 @@ signal.signal(signal.SIGTERM, handle_sigterm)
# Also dump on sigusr1, but do not terminate
signal.signal(signal.SIGUSR1, handle_sigusr1)
+
def execute_command(cmd, ignore_errors=False, direct_io=False, cwd=None):
- '''
+ """
Execute the command `cmd` specified as a list of ['program', 'arg', ...]
If ignore_errors is true, a non-zero exit code will be ignored, otherwise
an exception is raised.
If direct_io is True, do not capture the stdin and stdout of the command
Returns the stdout of the command.
- '''
+
+ Args:
+ cmd:
+ ignore_errors:
+ direct_io:
+ cwd:
+ cmd:
+ ignore_errors:
+ direct_io:
+ cwd:
+ """
jcmd = " ".join(cmd)
- log.debug("Running command: {}".format(jcmd))
+ log.debug("Running command: %s", jcmd)
try:
if direct_io:
pipe = Popen(cmd, cwd=cwd)
@@ -257,36 +286,37 @@ def execute_command(cmd, ignore_errors=False,
direct_io=False, cwd=None):
pipe = Popen(cmd, stdout=PIPE, stderr=PIPE, cwd=cwd)
stdout, stderr = pipe.communicate()
except OSError:
- log.error("Error executing command {}!".format(jcmd))
+ log.error("Error executing command %s!", jcmd)
raise
if pipe.returncode != 0:
if ignore_errors:
- log.warning("Command '{}' failed with exit code {}. Ignored.".
- format(jcmd, pipe.returncode))
+ log.warning("Command '%s' failed with exit code %s. Ignored.",
+ jcmd, pipe.returncode)
else:
if not direct_io:
- log.info("Command '{}' stdout:".format(jcmd))
+ log.info("Command '%s' stdout:", jcmd)
for line in stdout.splitlines():
log.info(line)
- log.info("Command '{}' stderr:".format(jcmd))
+ log.info("Command '%s' stderr:", jcmd)
for line in stderr.splitlines():
log.info(line)
msg = "Command '{}' failed with exit code {}. \n" \
- "(stdout: {}\nstderr: {})"\
- .format(jcmd, pipe.returncode, stdout, stderr)
+ "(stdout: {}\nstderr: {})" \
+ .format(jcmd, pipe.returncode, stdout, stderr)
log.error(msg)
raise Exception(msg)
return stdout
+
def _convert_dot_file(dotfile):
- '''
+ """
Convert duplicate edges in the given dot file into edges with
a larger pen width.
- '''
+ """
res = []
edges = {}
- edge_spec = re.compile("\s+(\d+) -> (\d+);")
+ edge_spec = re.compile("\\s+(\\d+) -> (\\d+);")
file = open(dotfile, "r")
lines = [line.strip("\n") for line in file]
@@ -294,30 +324,31 @@ def _convert_dot_file(dotfile):
lines[0] = "digraph {"
lines[1] = "node[fontsize=30, shape=\"box\"];"
- lines[len(lines)-1] = "" # Skip closing brace
+ lines[len(lines) - 1] = "" # Skip closing brace
for line in lines:
m = re.match(edge_spec, line)
if m:
a, b = m.group(1), m.group(2)
- edges[(a,b)] = edges.get((a,b), 0) + 1
+ edges[(a, b)] = edges.get((a, b), 0) + 1
else:
res.append(line + "\n")
# sort the edges for reproducibility
for ((a, b), count) in sorted(edges.items()):
res.append("{0} -> {1} [weight={2} penwidth={3}];\n".
- format(a,b,count, sqrt(float(count))))
+ format(a, b, count, sqrt(float(count))))
res.append("overlap=prism;\n")
res.append("splines=true;\n")
res.append("}\n")
return res
+
def layout_graph(filename):
out = NamedTemporaryFile(mode="w", delete=False)
out.writelines(_convert_dot_file(filename))
- out.close() # flushes the cache
+ out.close() # flushes the cache
cmd = []
cmd.append("dot")
cmd.append("-Kfdp")
@@ -329,6 +360,7 @@ def layout_graph(filename):
# Manually remove the temporary file
os.unlink(out.name)
+
def generate_report(start_rev, end_rev, resdir):
log.devinfo(" -> Generating report")
report_base = "report-{0}_{1}".format(start_rev, end_rev)
@@ -363,29 +395,31 @@ def generate_report(start_rev, end_rev, resdir):
os.chdir(orig_wd)
shutil.rmtree(tmpdir)
+
def generate_reports(start_rev, end_rev, range_resdir):
files = glob(os.path.join(range_resdir, "*.dot"))
- log.info(" -> Analysing revision range {0}..{1}: Generating Reports...".
- format(start_rev, end_rev))
+ log.info(" -> Analysing revision range %s..%s: Generating Reports...",
+ start_rev, end_rev)
for file in files:
layout_graph(file)
generate_report(start_rev, end_rev, range_resdir)
+
def check4ctags():
# check if the appropriate ctags is installed on the system
- prog_name = 'Exuberant Ctags'
+ prog_name = 'Exuberant Ctags'
prog_version = 'Exuberant Ctags 5.9~svn20110310'
cmd = "ctags-exuberant --version".split()
res = execute_command(cmd, None)
- if not(res.startswith(prog_name)):
- log.error("program '{0}' does not exist".format(prog_name))
+ if not res.startswith(prog_name):
+ log.error("program '%s' does not exist", prog_name)
raise Exception("ctags-exuberant not found")
- if not(res.startswith(prog_version)):
+ if not res.startswith(prog_version):
# TODO: change this to use standard mechanism for error logging
- log.error("Ctags version '{0}' not found".format(prog_version))
+ log.error("Ctags version '%s' not found", prog_version)
raise Exception("Incompatible ctags-exuberant version")
@@ -398,23 +432,24 @@ def check4cppstats():
line = "cppstats v0.8.4"
cmd = "/usr/bin/env cppstats --version".split()
res = execute_command(cmd)
- if not (res.startswith(line)):
- error_message = "expected the first line to start with '{0}' but "\
+ if not res.startswith(line):
+ error_message = "expected the first line to start with '{0}' but " \
"got '{1}'".format(line, res[0])
log.error("program cppstats does not exist, or it is not working "
- "as expected ({0}"
- .format(error_message))
- raise Exception("no working cppstats found ({0})"
- .format(error_message))
+ "as expected (%s", error_message)
+ raise Exception("no working cppstats found (%s)",
+ error_message)
def parse_iso_git_date(date_string):
- # from
http://stackoverflow.com/questions/526406/python-time-to-age-part-2-timezones
+ """http://stackoverflow.com/questions/
+ 526406/python-time-to-age-part-2-timezones
+ """
try:Any reason for duplicating the arguments?
offset = int(date_string[-5:])
except:
- log.error("could not extract timezone info from \"{0}\""
- .format(date_string))
+ log.error("could not extract timezone info from \"%s\"",
+ date_string)
raise
minutes = (offset if offset > 0 else -offset) % 100
delta = timedelta(hours=offset / 100,
@@ -434,8 +469,14 @@ def generate_analysis_windows(repo, window_size_months):
parameter. The window_size parameter specifies the number of months
between
revisions. This function is useful when the git repository has no tags
referencing releases.
+
+ Args:
+ repo:
+ window_size_months:
+ repo:
+ window_size_months:
"""
- cmd_date = 'git --git-dir={0} show --format=%ad --date=iso8601'\
+ cmd_date = 'git --git-dir={0} show --format=%ad --date=iso8601' \
.format(repo).split()
latest_date_result = execute_command(cmd_date).splitlines()[0]
latest_commit = parse_iso_git_date(latest_date_result)
@@ -450,7 +491,7 @@ def generate_analysis_windows(repo, window_size_months):
revs = []
start = window_size_months # Window size time ago
end = 0 # Present time
- cmd_base = 'git --git-dir={0} log --no-merges --format=%H,%ct'\
+ cmd_base = 'git --git-dir={0} log --no-merges --format=%H,%ct' \
.format(repo).split()
cmd_base_max1 = cmd_base + ['--max-count=1']
cmd = cmd_base_max1 + [get_before_arg(end)]
@@ -472,8 +513,8 @@ def generate_analysis_windows(repo, window_size_months):
# Check if any commits occurred since the last analysis window
if rev_start[0] != revs[0]:
revs = rev_start + revs
- # else: no commit happened since last window, don't add duplicate
- # revisions
+ # else: no commit happened since last window, don't add duplicate
+ # revisions
# End while
# Check that commit dates are monotonic, in some cases the earliest
@@ -481,7 +522,7 @@ def generate_analysis_windows(repo, window_size_months):
revs = [rev.split(",") for rev in revs]
rev_len = len(revs)
if int(revs[0][1]) > int(revs[1][1]):
- del revs[0]
+ del revs[0]
# Extract hash
revs = [rev[0] for rev in revs]