[codeface] [PATCH 12/12] Fill the commit_dependency table on a feature analysis pass.

  • From: Matthias Dittrich <matthi.d@xxxxxxxxxxxxxx>
  • To: codeface@xxxxxxxxxxxxx
  • Date: Wed, 19 Nov 2014 21:40:31 +0100

When running a feature or feature_file analysis pass we generate entries in the 
commit_dependency table for each changed feature for each commit.

Signed-off-by: Matthias Dittrich <matthi.d@xxxxxxxxx>
---
 codeface/cluster/cluster.py | 101 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 89 insertions(+), 12 deletions(-)

diff --git a/codeface/cluster/cluster.py b/codeface/cluster/cluster.py
index 7fcbaf8..94a240a 100755
--- a/codeface/cluster/cluster.py
+++ b/codeface/cluster/cluster.py
@@ -1177,7 +1177,7 @@ def writeIDwithCmtStats2File(id_mgr, outdir, 
releaseRangeID, dbm, conf):
                      author_cmt_stats_rows)
 
 
-def writeDependsToDB(logical_depends, fileCommitDict, cmtlist, releaseRangeID, 
dbm, conf)  :
+def writeDependsToDB(logical_depends, fileCommitDict, cmtlist, releaseRangeID, 
dbm, conf, entity_type="Function", get_entity_source_code=None):
     '''
     Write logical dependency data to database
     '''
@@ -1188,6 +1188,10 @@ def writeDependsToDB(logical_depends, fileCommitDict, 
cmtlist, releaseRangeID, d
                  co-changed subroutines (i.e., functions)
     '''
     projectID = dbm.getProjectID(conf["project"], conf["tagging"])
+    if get_entity_source_code is None:
+        def get_source(file, id):
+            return ""
+        get_entity_source_code = get_source
 
     # List of tuples to store rows of DB table
     cmt_depend_rows = []
@@ -1197,11 +1201,11 @@ def writeDependsToDB(logical_depends, fileCommitDict, 
cmtlist, releaseRangeID, d
             key = dbm.getCommitId(projectID, cmt.id)
             depends_list = logical_depends[cmt.id]
             function_loc = [depend for depend, count in depends_list]
-            depend_impl_list = [' 
'.join(fileCommitDict[file].getFuncImpl(funcId)) for file, funcId in 
function_loc]
+            depend_impl_list = [' '.join(get_entity_source_code(file, funcId)) 
for file, funcId in function_loc]
             depends_list = [depends_list[indx][0] + (depends_list[indx][1], 
impl) for indx, impl in enumerate(depend_impl_list)]
 
             # Write Function level dependencies
-            rows = [(key, file, entityId, "Function", count, impl) for file, 
entityId, count, impl in depends_list]
+            rows = [(key, file, entityId, entity_type, count, impl) for file, 
entityId, count, impl in depends_list]
             cmt_depend_rows.extend(rows)
     # For cmt.id
 
@@ -1305,7 +1309,7 @@ def writeAdjMatrixMaxWeight2File(id_mgr, outdir, conf):
 
 
 def emitStatisticalData(cmtlist, id_mgr, logical_depends, outdir, 
releaseRangeID, dbm, conf, 
-                        fileCommitDict):
+                        fileCommitDict, entity_type="Function", 
get_entity_source_code=None):
     """Save the available information for a release interval for further 
statistical processing.
 
     Several files are created in outdir respectively the database:
@@ -1328,7 +1332,7 @@ def emitStatisticalData(cmtlist, id_mgr, logical_depends, 
outdir, releaseRangeID
     writeAdjMatrixMaxWeight2File(id_mgr, outdir, conf)
 
     if logical_depends is not None:
-        writeDependsToDB(logical_depends, fileCommitDict, cmtlist, 
releaseRangeID, dbm, conf)
+        writeDependsToDB(logical_depends, fileCommitDict, cmtlist, 
releaseRangeID, dbm, conf, entity_type, get_entity_source_code)
 
     return None
 
@@ -1409,6 +1413,60 @@ def computeLogicalDepends(fileCommit_list, cmt_dict, 
start_date):
     return func_depends_count
 
 
+def compute_logical_depends_features(file_commit_list, cmt_dict, start_date):
+    """
+    Compute logical dependencies at feature level
+    """
+
+    '''
+    Input:
+    file_commit_list - A list of fileCommit objects, the object
+                     contains the source code structural information
+                     and a commit reference for every line of a file
+    Output:
+    feature_depends - dictionary where key=commit hash (unique id) and
+                  value=the list of features changed with that commit
+
+    Description:
+    We use the source code structural information we acquired from the
+    cppstats analysis to identify which lines of code fall under a particular
+    feature space. We save the subroutine name together with the filename that 
the
+    subroutine belongs.
+    '''
+
+    feature_depends_count = {}
+    for file in file_commit_list.values():
+        feature_depends = {}
+        filename = file.getFilename()
+        idx = file.getIndx()
+        for line_num in idx:
+            cmt_id = file.getLineCmtId(line_num)
+
+            if cmt_id not in feature_depends_count:
+                feature_depends_count[cmt_id] = []
+
+            if cmt_id in cmt_dict:
+                # If line is older than start date then ignore
+                if cmt_dict[cmt_id].getCdate() >= start_date:
+                    feature_list = file.findFeatureList(line_num)
+
+                    feature_loc = [(filename, feature) for feature in 
feature_list]
+                    if cmt_id in feature_depends:
+                        feature_depends[cmt_id].extend(feature_loc)
+                    else:
+                        feature_depends[cmt_id] = feature_loc
+
+        # Compute the number of lines of code changed for each dependency.
+        # We captured the function dependency on a line by line basis above
+        # now we aggregate the lines that change one function
+        for cmt_id, depend_list in feature_depends.iteritems():
+            feature_depends_count[cmt_id].extend(
+                [(feature_id, len(list(group)))
+                    for feature_id, group in 
itertools.groupby(sorted(depend_list))])
+
+    return feature_depends_count
+
+
 def computeProximityLinks(fileCommitList, cmtList, id_mgr, link_type, \
                           startDate=None, speedUp=True):
     '''
@@ -1719,6 +1777,8 @@ def performAnalysis(conf, dbm, dbfilename, git_repo, 
revrange, subsys_descr,
         id_mgr.setSubsysNames(subsys_descr.keys())
 
     logical_depends = None
+    entity_type = "Function"
+    get_entity_source_code = None
     fileCommitDict = git.getFileCommitDict()
     #---------------------------------
     #compute network connections
@@ -1734,20 +1794,37 @@ def performAnalysis(conf, dbm, dbfilename, git_repo, 
revrange, subsys_descr,
             startDate = git.getRevStartDate()
         else:
             startDate = None
-
-        if link_type == LinkType.proximity:
+        if link_type in (LinkType.proximity, LinkType.file):
             computeProximityLinks(
                 fileCommitDict, cmtdict, id_mgr, link_type, startDate)
-            logical_depends = \
-                computeLogicalDepends(fileCommitDict, cmtdict, startDate)
+            logical_depends = computeLogicalDepends(
+                fileCommitDict, cmtdict, startDate)
+
+            def get_source(file, func_id):
+                return fileCommitDict[file].getFuncImpl(func_id)
+            get_entity_source_code = get_source
+            entity_type = "Function"
         elif link_type == LinkType.feature_file:
             compute_feature_proximity_links_per_file(
                 fileCommitDict, cmtdict, id_mgr, link_type, startDate)
+            logical_depends = compute_logical_depends_features(
+                fileCommitDict, cmtdict, startDate)
+
+            def get_source(file, feature_id):
+                return ""
+            get_entity_source_code = get_source
+            entity_type = "Feature"
         elif link_type == LinkType.feature:
             compute_feature_proximity_links(
                 fileCommitDict, cmtdict, id_mgr, link_type, startDate)
-        else:
-            raise Exception("Unsupported collaboration type!")
+            logical_depends = compute_logical_depends_features(
+                fileCommitDict, cmtdict, startDate)
+
+            def get_source(file, feature_id):
+                return ""
+            get_entity_source_code = get_source
+            entity_type = "Feature"
+
     #---------------------------------
     #compute statistical information
     #---------------------------------
@@ -1758,7 +1835,7 @@ def performAnalysis(conf, dbm, dbfilename, git_repo, 
revrange, subsys_descr,
     #statistical software, that is, GNU R
     #---------------------------------
     emitStatisticalData(cmtlist, id_mgr, logical_depends, outdir, 
releaseRangeID,\
-                        dbm, conf, fileCommitDict)
+                        dbm, conf, fileCommitDict, entity_type, 
get_entity_source_code)
 
 
 ##################################################################
-- 
1.8.5.5


Other related posts:

  • » [codeface] [PATCH 12/12] Fill the commit_dependency table on a feature analysis pass. - Matthias Dittrich