- parseFeatureLine function to parse a single line of cppstats output. - Added FileDict class to encapsulate information about the features of a source code file. - parseFeatureLines function to generate a FileDict instance from all lines of cppstats output. Signed-off-by: Matthias Dittrich <matthi.d@xxxxxxxxx> --- codeface/VCS.py | 217 +++++++++++++++++++++++++++++++++++-------------- codeface/fileCommit.py | 72 +++++++++++++--- 2 files changed, 217 insertions(+), 72 deletions(-) diff --git a/codeface/VCS.py b/codeface/VCS.py index a62c72a..8bf7022 100644 --- a/codeface/VCS.py +++ b/codeface/VCS.py @@ -34,6 +34,8 @@ # VCS-specific. # TODO: Unify range handling. Either a range is always a list, or always # represented by two parameters. +import itertools +import readline import commit import fileCommit @@ -44,6 +46,7 @@ import ctags import tempfile import source_analysis import shutil +from fileCommit import FileDict from progressbar import ProgressBar, Percentage, Bar, ETA from ctags import CTags, TagEntry from logging import getLogger; log = getLogger(__name__) @@ -183,6 +186,140 @@ class VCS: return subsys=="__main__" or subsys in self.subsys_description.keys() +def parseSepLine (line): + if not line.startswith("\"sep="): + raise ParseError( + "expected that the csv file header starts with '\"sep=' but it started with '{}'" + .format(line), 'CSVFile') + stripped = line.rstrip() + if not stripped.endswith("\""): + raise ParseError( + "expected that the csv file header ends with '\"' but the line was '{}'" + .format(line), 'CSVFile') + return stripped[5:-1] + +def parseline(sep, line): + """ + Parses a line from a csv file + :param sep: + :param line: + :return: + """ + # TODO: Handle escaping: sep is escaped with quotes, quotes are escaped with quotes + # 'test,test' will be '"test,test"' in the csv file + # 'test"this,"test' will be '"test""this,""test"' in the csv file + return [l.strip() for l in line.split(sep)] + + +def enum(*sequential, **named): + enums = dict(zip(sequential, range(len(sequential))), **named) + reverse = dict((value, key) for key, value in enums.iteritems()) + enums['reverse_mapping'] = reverse + return type('Enum', (), enums) + + +LineType = enum(IF='#if', ELSE='#else', ELIF='#elif') + + +def parseFeatureLine(sep, line): + """ + parse the current line which is something like: feature_list, start_line, end_line + :param line: the line to parse + :return: start_line, end_line, feature_list + """ + parsedline = parseline(sep, line) + # FILENAME,LINE_START,LINE_END,TYPE,EXPRESSION,CONSTANTS + try: + start_line = int(parsedline[1]) + end_line = int(parsedline[2]) + line_type = LineType.reverse_mapping[parsedline[3]] + feature_list = parsedline[5].split(';') + return start_line, end_line, line_type, feature_list + except ValueError: + raise ParseError( + "could not parse feature line (most likely because we could not parse the start- or end-line which should be on index 2 and 3): \"{}\"" + .format(line), 'CSVFile') + + + + + + +def getFeatureLines(parsed_lines, filename): + """ + calculates an dictionary representing changes in the current feature set and a sorted helper list for calculating + indices of the dictionary for any line + :param parsed_lines: a list of tuples with (start_line, end_line, feature_list) elements + :param filename: the name or the analysed files (only used for descriptive error messages if the calculation fails + :return: line_nums, feature_lines: the first is a sorted list to be able to access the feature list for any line + (while only changing lines are in the dictionary) + """ + # mapping line -> feature list, we only add changing elements + feature_lines = FileDict() + feature_lines.add_line(0, []) + + # we want a format like (is_start, features) for every changing line + better_format = {} + # We assume that every line is used at most once as start_line or end_line + + def check_line(line): + if line in better_format: + raise ParseError( + "every line index can be used at most once (problematic line was {0} in file {1})" + .format(line, filename), filename) + + for start_line, end_line, line_type, feature_list in parsed_lines: + if start_line >= end_line: + raise ParseError( + "start_line can't be greater or equal to end_line (problematic line was {0} in file {1})" + .format(start_line, filename), filename) + if line_type == 'IF': + # ifs start on their own line, however the end_line could already be used by the start of an else/elif + check_line(start_line) + if end_line in better_format: + end_line -= 1 + check_line(end_line) + better_format[start_line] = (True, feature_list) + better_format[end_line] = (False, feature_list) + else: + # we try to mostly ignore else and elif if the feature_list doesn't change + is_start, old_feature_list = better_format[start_line] + if (not is_start) and old_feature_list == feature_list: + # moving to the new end + del better_format[start_line] + better_format[end_line] = is_start, old_feature_list + elif is_start: + raise ParseError( + "line {0} appeared twice as start line (problematic file was {1})" + .format(start_line, filename), filename) + else: + # So we have a elif with different features, + # so we start more features now end add them to the ending later + del better_format[start_line] + better_format[start_line] = (True, feature_list) + better_format[end_line] = (False, old_feature_list + feature_list) + + for line in sorted(better_format): + is_start, features = better_format[line] + # Get last infos + last_feature_list = feature_lines.get_line_info_raw(line) + # Copy last list and create new list for current line + new_feature_list = list(last_feature_list) + if is_start: + for r in features: + new_feature_list.insert(0, r) + else: + for r in reversed(features): + item = new_feature_list.pop(0) + assert(item == r) + ##new_feature_list.remove(r) + # Remove in next line (because we want to count the current #endif line as well). + line += 1 + + feature_lines.add_line(line, new_feature_list) + return feature_lines + + class gitVCS (VCS): def __init__(self): VCS.__init__(self) # Python OOP braindamage @@ -1070,10 +1207,10 @@ class gitVCS (VCS): def _getFeatureLines(self, file_layout_src, file_commit): - ''' + """ similar to _getFunctionLines but computes the line numbers of each feature in the file. - ''' + """ ''' - Input - file_name: original name of the file, used only to determine the @@ -1093,7 +1230,7 @@ class gitVCS (VCS): # temporary file where we write transient data needed for ctags srcFile = tempfile.NamedTemporaryFile(suffix=fileExt) - featurefile = tempfile.NamedTemporaryFile() + featurefile = tempfile.NamedTemporaryFile(suffix=".csv") # generate a source code file from the file_layout_src dictionary # and save it to a temporary location for line in file_layout_src: @@ -1101,68 +1238,28 @@ class gitVCS (VCS): srcFile.flush() # run cppstats analysis on the file to get the feature locations - cmd = "cppstats -f {0} {1}".format(featurefile.name, srcFile.name).split() - output = execute_command(cmd).splitlines() - - # mapping line -> feature list, we only add changing elements - feature_lines = {0: []} - # Helper list to get the last element of feature_lines (which contains only lines with changes) - line_nums = [0] - - def parse_result_line(line): - """ - parse the current line which is something like: feature_list, start_line, end_line - :param line: the line to parse - :return: start_line, end_line, feature_list - """ - start_line = 0 - end_line = 0 - feature_list = {} - return start_line, end_line, feature_list - - try: - results_file = open(featurefile.name, 'r') - parsed_lines = [parse_result_line(featureLine) for featureLine in results_file] - # we want a format like (is_start, features) for every changing line - better_format = {} - # We assume that every line is used at most once as start_line or end_line - - def check_line(line): - if line in better_format: - raise ParseError( - "every line index can be used at most once (problematic line was {0} in file {1})" - .format(line, file_commit.filename)) - - for start_line, end_line, feature_list in parsed_lines: - check_line(start_line) - check_line(end_line) - better_format[start_line] = (True, feature_list) - better_format[end_line] = (False, feature_list) - - for line in sorted(better_format): - is_start, features = better_format[line] - # Get last line - line_nums.append(line) - last_feature_list_line = bisect.bisect_right(line_nums, line) - last_feature_list = feature_lines[last_feature_list_line-1] - # Copy last list and create new list for current line - new_feature_list = list(last_feature_list) - if is_start: - new_feature_list.extend(features) - else: - for r in features: - new_feature_list.remove(r) - feature_lines[line] = new_feature_list - except: - log.critical("was unable unable to parse feature information of cppstats") - raise - + # TODO: fix hardcoded paths + # BUG: THIS IS VERY BAD HARDCODED CODE AND SHOULD BE FIXED, + # HOWEVER IT IS NOT CLEAR HOW CPPSTATS IS DISTRUBUTED NOR WHERE IT LIVES + oldPath = os.getenv("PYTHONPATH") + os.putenv("PYTHONPATH", "/home/drag0on/projects/cppstats/lib") + cmd = "/usr/bin/env python /home/drag0on/projects/cppstats/cppstats.py --kind featurelocations --file {0} {1}"\ + .format(srcFile.name, featurefile.name).split() + output = execute_command(cmd, cwd="/home/drag0on/projects/cppstats").splitlines() + os.putenv("PYTHONPATH", oldPath) + + results_file = open(featurefile.name, 'r') + sep = parseSepLine(next(results_file)) + headlines = parseline(sep, next(results_file)) + feature_lines = \ + getFeatureLines( + [parseFeatureLine(sep, line) for line in results_file], file_commit.filename) # clean up temporary files srcFile.close() featurefile.close() # save result to the file commit instance - file_commit.setFeatureLines(line_nums, feature_lines) + file_commit.set_feature_infos(feature_lines) def cmtHash2CmtObj(self, cmtHash): ''' diff --git a/codeface/fileCommit.py b/codeface/fileCommit.py index 956307a..2b8af8a 100644 --- a/codeface/fileCommit.py +++ b/codeface/fileCommit.py @@ -23,6 +23,61 @@ single file.''' import commit import bisect + +class FileDict: + """ + A dictionary saving per-line information. We assume that we have information on any line, + and that we only have to save changing lines. + """ + def __init__(self, line_list, line_dict): + """ + :rtype : FileDict + """ + self.line_list = line_list + self.line_dict = line_dict + self.lastItem = line_list[-1] + + def __init__(self): + """ + :rtype : FileDict + """ + self.line_list = [] + self.line_dict = {} + self.lastItem = -1 + + def __iter__(self): + return self.line_dict.__iter__() + + def get_line_info_raw(self, line_nr): + """ + Returns the info for the given line (if the line was never set, the info for the last set line is returned) + :param line_nr: + :return: + """ + i = bisect.bisect_right(self.line_list, line_nr) + info_line = self.line_list[i-1] + return self.line_dict[info_line] + + def get_line_info(self, line_nr): + return set(self.get_line_info_raw(line_nr)) + + def add_line(self, line_nr, info): + """ + Add the given information to the current dictionary. + Note: while filling the dictionary your line_nr has to be incremented! + :param line_nr: + :param info: + :return: + """ + if line_nr < self.lastItem: + raise ValueError("can only incrementally add items") + self.line_list.append(line_nr) + self.line_dict[line_nr] = info + + def values(self): + return self.line_dict.values() + + class FileCommit: def __init__(self): @@ -58,11 +113,7 @@ class FileCommit: self._src_elem_list = [] # dictionary with key = line number, value = feature list - self.featureLists = {} - - # list of function line numbers in sorted order, this is for - # optimizing the process of finding a feature list given a line number - self.featureLineNums = [0] + self.feature_info = FileDict() #Getter/Setters def getFileSnapShots(self): @@ -91,9 +142,9 @@ class FileCommit: def setSrcElems(self, src_elem_list): self._src_elem_list.extend(src_elem_list) - def setFeatureLines(self, featureLineNums, featureLists): - self.featureLists.update(featureLists) - self.featureLineNums = featureLineNums # .extend(sorted(self.featureLists.iterkeys())) + def set_feature_infos(self, feature_line_infos): + self.feature_info = feature_line_infos + #Methods def addFileSnapShot(self, key, dict): self.fileSnapShots[key] = dict @@ -128,7 +179,4 @@ class FileCommit: self.functionImpl[id].append(srcLine) def findFeatureList(self, lineNum): - # returns the identifier of a feature given a line number - i = bisect.bisect_right(self.featureLineNums, lineNum) - featureLine = self.featureLineNums[i-1] - return self.featureLists[featureLine] + return self.feature_info.get_line_info(lineNum) -- 1.8.5.5