[codeface] [PATCH 5/9] Add a _getFeatureLines function which calculates feature sets for all source code lines.

  • From: Matthias Dittrich <matthi.d@xxxxxxxxxxxxxx>
  • To: codeface@xxxxxxxxxxxxx
  • Date: Fri, 17 Oct 2014 15:14:22 +0200

- The _getFeatureLines function calculates the feature sets in a similar way 
how _getFunctionsLines
calculates the current functions for all source code lines.
- Added some fields to codeface/fileCommit.py to save those results.

Signed-off-by: Matthias Dittrich <matthi.d@xxxxxxxxx>
---
 codeface/VCS.py        | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++
 codeface/fileCommit.py | 16 +++++++++
 2 files changed, 112 insertions(+)

diff --git a/codeface/VCS.py b/codeface/VCS.py
index 5229981..a62c72a 100644
--- a/codeface/VCS.py
+++ b/codeface/VCS.py
@@ -39,6 +39,7 @@ import commit
 import fileCommit
 import re
 import os
+import bisect
 import ctags
 import tempfile
 import source_analysis
@@ -1068,6 +1069,101 @@ class gitVCS (VCS):
          for lineNum, srcLine in enumerate(file_layout_src)]
 
 
+    def _getFeatureLines(self, file_layout_src, file_commit):
+        '''
+        similar to _getFunctionLines but computes the line numbers of each
+        feature in the file.
+        '''
+        '''
+        - Input -
+        file_name: original name of the file, used only to determine the
+                    programming language (ie. file.c is a c-language file)
+        file_layout_scr: dictionary with key=line number value = line of code
+        file_commit: fileCommit instance where the results will be stored
+
+        - Description -
+        The file_layout is used to construct a source code file that can be
+        parsed by ctags to generate a ctags file. The ctags file is then
+        accessed to extract the function tags and line numbers to be saved in
+        the fileCommit object
+        '''
+
+        # grab the file extension to determine the language of the file
+        fileExt = os.path.splitext(file_commit.filename)[1]
+
+        # temporary file where we write transient data needed for ctags
+        srcFile = tempfile.NamedTemporaryFile(suffix=fileExt)
+        featurefile = tempfile.NamedTemporaryFile()
+        # generate a source code file from the file_layout_src dictionary
+        # and save it to a temporary location
+        for line in file_layout_src:
+            srcFile.write(line)
+        srcFile.flush()
+
+        # run cppstats analysis on the file to get the feature locations
+        cmd = "cppstats -f {0} {1}".format(featurefile.name, 
srcFile.name).split()
+        output = execute_command(cmd).splitlines()
+
+        # mapping line -> feature list, we only add changing elements
+        feature_lines = {0: []}
+        # Helper list to get the last element of feature_lines (which contains 
only lines with changes)
+        line_nums = [0]
+
+        def parse_result_line(line):
+            """
+            parse the current line which is something like: feature_list, 
start_line, end_line
+            :param line: the line to parse
+            :return: start_line, end_line, feature_list
+            """
+            start_line = 0
+            end_line = 0
+            feature_list = {}
+            return start_line, end_line, feature_list
+
+        try:
+            results_file = open(featurefile.name, 'r')
+            parsed_lines = [parse_result_line(featureLine) for featureLine in 
results_file]
+            # we want a format like (is_start, features) for every changing 
line
+            better_format = {}
+            # We assume that every line is used at most once as start_line or 
end_line
+
+            def check_line(line):
+                if line in better_format:
+                    raise ParseError(
+                        "every line index can be used at most once 
(problematic line was {0} in file {1})"
+                        .format(line, file_commit.filename))
+
+            for start_line, end_line, feature_list in parsed_lines:
+                check_line(start_line)
+                check_line(end_line)
+                better_format[start_line] = (True, feature_list)
+                better_format[end_line] = (False, feature_list)
+
+            for line in sorted(better_format):
+                is_start, features = better_format[line]
+                # Get last line
+                line_nums.append(line)
+                last_feature_list_line = bisect.bisect_right(line_nums, line)
+                last_feature_list = feature_lines[last_feature_list_line-1]
+                # Copy last list and create new list for current line
+                new_feature_list = list(last_feature_list)
+                if is_start:
+                    new_feature_list.extend(features)
+                else:
+                    for r in features:
+                        new_feature_list.remove(r)
+                feature_lines[line] = new_feature_list
+        except:
+            log.critical("was unable unable to parse feature information of 
cppstats")
+            raise
+
+        # clean up temporary files
+        srcFile.close()
+        featurefile.close()
+
+        # save result to the file commit instance
+        file_commit.setFeatureLines(line_nums, feature_lines)
+
     def cmtHash2CmtObj(self, cmtHash):
         '''
         input: cmtHash
diff --git a/codeface/fileCommit.py b/codeface/fileCommit.py
index aa99e84..956307a 100644
--- a/codeface/fileCommit.py
+++ b/codeface/fileCommit.py
@@ -57,6 +57,13 @@ class FileCommit:
         # meta data
         self._src_elem_list = []
 
+        # dictionary with key = line number, value = feature list
+        self.featureLists = {}
+
+        # list of function line numbers in sorted order, this is for
+        # optimizing the process of finding a feature list given a line number
+        self.featureLineNums = [0]
+
     #Getter/Setters
     def getFileSnapShots(self):
         return self.fileSnapShots
@@ -84,6 +91,9 @@ class FileCommit:
     def setSrcElems(self, src_elem_list):
         self._src_elem_list.extend(src_elem_list)
 
+    def setFeatureLines(self, featureLineNums, featureLists):
+        self.featureLists.update(featureLists)
+        self.featureLineNums = featureLineNums  # 
.extend(sorted(self.featureLists.iterkeys()))
     #Methods
     def addFileSnapShot(self, key, dict):
         self.fileSnapShots[key] = dict
@@ -116,3 +126,9 @@ class FileCommit:
     def addFuncImplLine(self, lineNum, srcLine):
         id = self.findFuncId(lineNum)
         self.functionImpl[id].append(srcLine)
+
+    def findFeatureList(self, lineNum):
+        # returns the identifier of a feature given a line number
+        i = bisect.bisect_right(self.featureLineNums, lineNum)
+        featureLine = self.featureLineNums[i-1]
+        return self.featureLists[featureLine]
-- 
1.8.5.5


Other related posts: