[codeface] Re: [PATCH 17/19] Whitespace cleanup in codeface/util.py

From: Wolfgang Mauerer <wolfgang.mauerer@xxxxxxxxxxxxxxxxx>
To: <codeface@xxxxxxxxxxxxx>
Date: Tue, 16 Feb 2016 16:52:36 +0100

Am 16/02/2016 um 13:40 schrieb Andreas Ringlstetter:

From: Benjamin Hiefner <benjamin.hiefner@xxxxxxxxxxxxxxxxxxxx>

Signed-off-by: Benjamin Hiefner <benjamin.hiefner@xxxxxxxxxxxxxxxxxxxx>
---
codeface/util.py | 195
+++++++++++++++++++++++++++++++++----------------------
1 file changed, 118 insertions(+), 77 deletions(-)

diff --git a/codeface/util.py b/codeface/util.py
index b885254..4557d51 100644
--- a/codeface/util.py
+++ b/codeface/util.py
@@ -1,23 +1,23 @@
-## This file is part of Codeface. Codeface is free software: you can
-## redistribute it and/or modify it under the terms of the GNU General Public
-## License as published by the Free Software Foundation, version 2.
-##
-## This program is distributed in the hope that it will be useful, but
WITHOUT
-## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS
-## FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
-## details.
-##
-## You should have received a copy of the GNU General Public License
-## along with this program; if not, write to the Free Software
-## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-##
-## Copyright 2013 by Siemens AG, Wolfgang Mauerer
<wolfgang.mauerer@xxxxxxxxxxx>
-## All Rights Reserved.
-'''
+#  This file is part of Codeface. Codeface is free software: you can
+# redistribute it and/or modify it under the terms of the GNU General Public
+# License as published by the Free Software Foundation, version 2.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS
+# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+# Copyright 2013 by Siemens AG, Wolfgang Mauerer
<wolfgang.mauerer@xxxxxxxxxxx>
+# All Rights Reserved.
+"""
Utility functions for running external commands
-'''
+"""

-import logging; log = logging.getLogger(__name__)
+import logging
import os
import re
import shutil
@@ -27,8 +27,7 @@ import traceback
from collections import OrderedDict, namedtuple
from glob import glob
from math import sqrt
-from multiprocessing import Process, Queue, JoinableQueue, Lock
-from pickle import dumps, PicklingError
+from multiprocessing import Process, Queue, Lock
from pkg_resources import resource_filename
from subprocess import Popen, PIPE
from tempfile import NamedTemporaryFile, mkdtemp
@@ -39,15 +38,19 @@ from datetime import timedelta, datetime

# Represents a job submitted to the batch pool.
BatchJobTuple = namedtuple('BatchJobTuple', ['id', 'func', 'args', 'kwargs',
-        'deps', 'startmsg', 'endmsg'])
+                                             'deps', 'startmsg', 'endmsg'])
+log = logging.getLogger(__name__)
+
+
class BatchJob(BatchJobTuple):
     def __init__(self, *args, **kwargs):
         super(BatchJob, self).__init__(*args, **kwargs)
         self.done = False
         self.submitted = False

+
class BatchJobPool(object):
-    '''
+    """
     Implementation of a dependency-respecting batch pool

     This system uses a pool of N worker processes to run jobs. Since the
@@ -63,12 +66,13 @@ class BatchJobPool(object):
     Call pool.join() to start execution and wait until all jobs are complete.
     If a work item raises an exception, the join() will terminate with
     that exception, if pickleable, or a generic Exception if otherwise.
-    '''
+    """

     def __init__(self, n_cores):
         self.n_cores = n_cores
         self.next_id = 1
-        self.jobs = OrderedDict() # Dictionary of jobs (ordered for
repeatability)
+        # Dictionary of jobs (ordered for repeatability)
+        self.jobs = OrderedDict()

         # Initialize workers and their work and done queues
         self.work_queue, self.done_queues, self.workers = Queue(), [], []
@@ -79,32 +83,33 @@ class BatchJobPool(object):
             # n_core is 1.
             for i in range(n_cores):
                 dq = Queue()
-                w = Process(target=batchjob_worker_function,
args=(self.work_queue, dq))
+                w = Process(target=batchjob_worker_function,
+                            args=(self.work_queue, dq))
                 self.done_queues.append(dq)
                 self.workers.append(w)
                 w.start()

     def _is_ready(self, job):
-        '''Returns true if the job is ready for submission'''
+        """Returns true if the job is ready for submission"""
         if job.done or job.submitted:
             return False
         return all(self.jobs[j].done for j in job.deps if j is not None)

     def _submit(self, job):
-        '''Submit the job if it is ready'''
+        """Submit the job if it is ready"""
         if self._is_ready(job):
             self.work_queue.put(job)
             job.submitted = True

     def add(self, func, args, kwargs={}, deps=(), startmsg=None,
endmsg=None):
-        '''
+        """
         Add a job that executes func(*args, **kwargs) and depends on the
         jobs with the ids listed in deps.
         This function returns a job ID which can be used as a dependency
         in other calls to add.
         If n_cores is 1; this call immediately executes the given function
         and returns None
-        '''
+        """
         if self.n_cores == 1:
             log.info(startmsg)
             func(*args, **kwargs)
@@ -117,9 +122,9 @@ class BatchJobPool(object):
         return job_id

     def join(self):
-        '''
+        """
         Submit jobs and wait for all jobs to finish.
-        '''
+        """
         try:
             while not all(j.done for j in self.jobs.values()):
                 # Put jobs that are ready onto the work queue
@@ -137,7 +142,7 @@ class BatchJobPool(object):
                     if isinstance(res, Exception):
                         log.fatal("Uncaught exception in worker thread:")
                         raise res
-                    log.debug("Job {} has finished!".format(res))
+                    log.debug("Job %s has finished!", res)

same sting formatting question as before

                     self.jobs[res].done = True
                 # Check if workers died
                 for w in self.workers:
@@ -154,14 +159,19 @@ class BatchJobPool(object):
                 w.terminate()
             log.devinfo("Workers terminated.")

+
def batchjob_worker_function(work_queue, done_queue):
-    '''
+    """
     Worker function executed in a separate process.
     This function pulls work items off the work queue; terminates if there
     is no item for 0.5s; otherwise executes the work item. Any exception
     is reraised after putting a None onto the done_queue (triggering an
     exception in the main process)
-    '''
+
+    Args:
+        done_queue:

work_queue, not done_queue. Please add at least a little bit of
description; otherwise I don't see much advantage compared
to the previous documentation format.

+ done_queue:

+    """
     # Silently quit on CTRL+C
     signal.signal(signal.SIGINT, handle_sigint_silent)
     while True:
@@ -170,19 +180,20 @@ def batchjob_worker_function(work_queue, done_queue):
         except ValueError as ve:
             # This happens when the main loop stops before we do
             return
-        log.debug("Starting job id {}".format(job.id))
+        log.debug("Starting job id %s", job.id)

same as before (goes for all other string format operations below)

         try:
             if job.startmsg:
                 log.info(job.startmsg)
             job.func(*job.args, **job.kwargs)
             if job.endmsg:
                 log.info(job.endmsg)
-            log.debug("Finished work id {}".format(job.id))
+            log.debug("Finished work id %s", job.id)
             done_queue.put(job.id)
         except Exception as e:
-            log.debug("Failed work id {}".format(job.id))
+            log.debug("Failed work id %s", job.id)
             done_queue.put(Exception(e.__class__.__name__ + ": " +
-                    str(e) + "\n" + traceback.format_exc()))
+                                     str(e) + "\n" + traceback.format_exc()))
+

# Function to dump the stacks of all threads
def get_stack_dump():
@@ -190,16 +201,19 @@ def get_stack_dump():
     code = ["Stack dump:"]
     for threadId, stack in sys._current_frames().items():
         code.append("")
-        code.append("# Thread: %s(%d)" % (id2name.get(threadId,""),
threadId))
+        code.append("# Thread: %s(%d)" % (id2name.get(threadId, ""),
threadId))
         for filename, lineno, name, line in traceback.extract_stack(stack):
             code.append('File: "%s", line %d, in %s' % (filename, lineno,
name))
             if line:
                 code.append("  %s" % (line.strip()))
     return code

+
# Signal handler that dumps all stacks and terminates
# Lock l dis-interleaves the stack traces of processes
l = Lock()
+
+
def handle_sigint(signal, frame):
     with l:
         log.fatal("CTRL-C pressed!")
@@ -210,6 +224,7 @@ def handle_sigint(signal, frame):
     # For the main thread, this is what we want.
     sys.exit(-1)

+
# Signal handler that dumps all stacks and terminates silently
# Also uses the Lock l to dis-interleave the stack traces
def handle_sigint_silent(signal, frame):
@@ -222,6 +237,7 @@ def handle_sigint_silent(signal, frame):
     # otherwise the worker try/catch will also catch the SystemExit
     os.exit_(-1)

+
def handle_sigterm(signal, frame):
     # Since we want to terminate worker threads with prejudice,
     # we use os._exit, which directly terminates the process.
@@ -229,10 +245,12 @@ def handle_sigterm(signal, frame):
     logging.shutdown()
     os._exit(-1)

+
def handle_sigusr1(signal, frame):
     for c in get_stack_dump():
         log.info(c)

+
# Dump all the stacks in case of CTRL-C
signal.signal(signal.SIGINT, handle_sigint)
# Also dump on sigterm
@@ -240,16 +258,27 @@ signal.signal(signal.SIGTERM, handle_sigterm)
# Also dump on sigusr1, but do not terminate
signal.signal(signal.SIGUSR1, handle_sigusr1)

+
def execute_command(cmd, ignore_errors=False, direct_io=False, cwd=None):
-    '''
+    """
     Execute the command `cmd` specified as a list of ['program', 'arg', ...]
     If ignore_errors is true, a non-zero exit code will be ignored, otherwise
     an exception is raised.
     If direct_io is True, do not capture the stdin and stdout of the command
     Returns the stdout of the command.
-    '''
+
+    Args:
+        cmd:
+        ignore_errors:
+        direct_io:
+        cwd:
+        cmd:
+        ignore_errors:
+        direct_io:
+        cwd:

this seems to duplicate the list of arguments

+    """
     jcmd = " ".join(cmd)
-    log.debug("Running command: {}".format(jcmd))
+    log.debug("Running command: %s", jcmd)
     try:
         if direct_io:
             pipe = Popen(cmd, cwd=cwd)
@@ -257,36 +286,37 @@ def execute_command(cmd, ignore_errors=False,
direct_io=False, cwd=None):
             pipe = Popen(cmd, stdout=PIPE, stderr=PIPE, cwd=cwd)
         stdout, stderr = pipe.communicate()
     except OSError:
-        log.error("Error executing command {}!".format(jcmd))
+        log.error("Error executing command %s!", jcmd)
         raise

     if pipe.returncode != 0:
         if ignore_errors:
-            log.warning("Command '{}' failed with exit code {}. Ignored.".
-                    format(jcmd, pipe.returncode))
+            log.warning("Command '%s' failed with exit code %s. Ignored.",
+                        jcmd, pipe.returncode)
         else:
             if not direct_io:
-                log.info("Command '{}' stdout:".format(jcmd))
+                log.info("Command '%s' stdout:", jcmd)
                 for line in stdout.splitlines():
                     log.info(line)
-                log.info("Command '{}' stderr:".format(jcmd))
+                log.info("Command '%s' stderr:", jcmd)
                 for line in stderr.splitlines():
                     log.info(line)
             msg = "Command '{}' failed with exit code {}. \n" \
-                  "(stdout: {}\nstderr: {})"\
-                  .format(jcmd, pipe.returncode, stdout, stderr)
+                  "(stdout: {}\nstderr: {})" \
+                .format(jcmd, pipe.returncode, stdout, stderr)
             log.error(msg)
             raise Exception(msg)
     return stdout

+
def _convert_dot_file(dotfile):
-    '''
+    """
     Convert duplicate edges in the given dot file into edges with
     a larger pen width.
-    '''
+    """
     res = []
     edges = {}
-    edge_spec = re.compile("\s+(\d+) -> (\d+);")
+    edge_spec = re.compile("\\s+(\\d+) -> (\\d+);")

can you please elaborate the reason behind this change? Have
there been any problems with the previous regexp? Please don't
hide such changes in a whitespace cleanup, this is not one.

     file = open(dotfile, "r")
     lines = [line.strip("\n") for line in file]
@@ -294,30 +324,31 @@ def _convert_dot_file(dotfile):
     lines[0] = "digraph {"
     lines[1] = "node[fontsize=30, shape=\"box\"];"

-    lines[len(lines)-1] = "" # Skip closing brace
+    lines[len(lines) - 1] = ""  # Skip closing brace

     for line in lines:
         m = re.match(edge_spec, line)
         if m:
             a, b = m.group(1), m.group(2)
-            edges[(a,b)] = edges.get((a,b), 0) + 1
+            edges[(a, b)] = edges.get((a, b), 0) + 1
         else:
             res.append(line + "\n")

     # sort the edges for reproducibility
     for ((a, b), count) in sorted(edges.items()):
         res.append("{0} -> {1} [weight={2} penwidth={3}];\n".
-              format(a,b,count, sqrt(float(count))))
+                   format(a, b, count, sqrt(float(count))))

     res.append("overlap=prism;\n")
     res.append("splines=true;\n")
     res.append("}\n")
     return res

+
def layout_graph(filename):
     out = NamedTemporaryFile(mode="w", delete=False)
     out.writelines(_convert_dot_file(filename))
-    out.close() # flushes the cache
+    out.close()  # flushes the cache
     cmd = []
     cmd.append("dot")
     cmd.append("-Kfdp")
@@ -329,6 +360,7 @@ def layout_graph(filename):
     # Manually remove the temporary file
     os.unlink(out.name)

+
def generate_report(start_rev, end_rev, resdir):
     log.devinfo("  -> Generating report")
     report_base = "report-{0}_{1}".format(start_rev, end_rev)
@@ -363,29 +395,31 @@ def generate_report(start_rev, end_rev, resdir):
     os.chdir(orig_wd)
     shutil.rmtree(tmpdir)

+
def generate_reports(start_rev, end_rev, range_resdir):
     files = glob(os.path.join(range_resdir, "*.dot"))
-    log.info("  -> Analysing revision range {0}..{1}: Generating Reports...".
-        format(start_rev, end_rev))
+    log.info("  -> Analysing revision range %s..%s: Generating Reports...",
+             start_rev, end_rev)
     for file in files:
         layout_graph(file)
     generate_report(start_rev, end_rev, range_resdir)

+
def check4ctags():
     # check if the appropriate ctags is installed on the system
-    prog_name    = 'Exuberant Ctags'
+    prog_name = 'Exuberant Ctags'
     prog_version = 'Exuberant Ctags 5.9~svn20110310'
     cmd = "ctags-exuberant --version".split()

     res = execute_command(cmd, None)

-    if not(res.startswith(prog_name)):
-        log.error("program '{0}' does not exist".format(prog_name))
+    if not res.startswith(prog_name):
+        log.error("program '%s' does not exist", prog_name)
         raise Exception("ctags-exuberant not found")

-    if not(res.startswith(prog_version)):
+    if not res.startswith(prog_version):
         # TODO: change this to use standard mechanism for error logging
-        log.error("Ctags version '{0}' not found".format(prog_version))
+        log.error("Ctags version '%s' not found", prog_version)
         raise Exception("Incompatible ctags-exuberant version")

@@ -398,23 +432,24 @@ def check4cppstats():
     line = "cppstats v0.8.4"
     cmd = "/usr/bin/env cppstats --version".split()
     res = execute_command(cmd)
-    if not (res.startswith(line)):
-        error_message = "expected the first line to start with '{0}' but "\
+    if not res.startswith(line):
+        error_message = "expected the first line to start with '{0}' but " \
                         "got '{1}'".format(line, res[0])
         log.error("program cppstats does not exist, or it is not working "
-                  "as expected ({0}"
-                  .format(error_message))
-        raise Exception("no working cppstats found ({0})"
-                        .format(error_message))
+                  "as expected (%s", error_message)
+        raise Exception("no working cppstats found (%s)",
+                        error_message)

def parse_iso_git_date(date_string):
-    # from
http://stackoverflow.com/questions/526406/python-time-to-age-part-2-timezones
+    """http://stackoverflow.com/questions/
+    526406/python-time-to-age-part-2-timezones
+    """

it's not worth breaking the the URL just to honour the 80 char width.
Besides, this really is just a comment, not something we want
to have as documentation.

     try:
         offset = int(date_string[-5:])
     except:
-        log.error("could not extract timezone info from \"{0}\""
-                  .format(date_string))
+        log.error("could not extract timezone info from \"%s\"",
+                  date_string)
         raise
     minutes = (offset if offset > 0 else -offset) % 100
     delta = timedelta(hours=offset / 100,
@@ -434,8 +469,14 @@ def generate_analysis_windows(repo, window_size_months):
     parameter. The window_size parameter specifies the number of months
between
     revisions. This function is useful when the git repository has no tags
     referencing releases.
+
+    Args:
+        repo:
+        window_size_months:
+        repo:
+        window_size_months:

Any reason for duplicating the arguments?

     """
-    cmd_date = 'git --git-dir={0} show --format=%ad  --date=iso8601'\
+    cmd_date = 'git --git-dir={0} show --format=%ad  --date=iso8601' \
         .format(repo).split()
     latest_date_result = execute_command(cmd_date).splitlines()[0]
     latest_commit = parse_iso_git_date(latest_date_result)
@@ -450,7 +491,7 @@ def generate_analysis_windows(repo, window_size_months):
     revs = []
     start = window_size_months  # Window size time ago
     end = 0  # Present time
-    cmd_base = 'git --git-dir={0} log --no-merges --format=%H,%ct'\
+    cmd_base = 'git --git-dir={0} log --no-merges --format=%H,%ct' \
         .format(repo).split()
     cmd_base_max1 = cmd_base + ['--max-count=1']
     cmd = cmd_base_max1 + [get_before_arg(end)]
@@ -472,8 +513,8 @@ def generate_analysis_windows(repo, window_size_months):
         # Check if any commits occurred since the last analysis window
         if rev_start[0] != revs[0]:
             revs = rev_start + revs
-        # else: no commit happened since last window, don't add duplicate
-        #       revisions
+            # else: no commit happened since last window, don't add duplicate
+            #       revisions

why indent the else to an unlogical level?

     # End while

     # Check that commit dates are monotonic, in some cases the earliest
@@ -481,7 +522,7 @@ def generate_analysis_windows(repo, window_size_months):
     revs = [rev.split(",") for rev in revs]
     rev_len = len(revs)
     if int(revs[0][1]) > int(revs[1][1]):
-      del revs[0]
+        del revs[0]

     # Extract hash
     revs = [rev[0] for rev in revs]

References:
- [codeface] Python codebase cleanup bundle part 1
  - From: Andreas Ringlstetter
- [codeface] [PATCH 17/19] Whitespace cleanup in codeface/util.py
  - From: Andreas Ringlstetter

[codeface] Re: [PATCH 17/19] Whitespace cleanup in codeface/util.py

Other related posts: