se-sic · flx5 · Nov 26, 2017 · Nov 27, 2017 · Nov 27, 2017 · Nov 29, 2017
diff --git a/tests/test-network-covariates.R b/tests/test-network-covariates.R
diff --git a/tests/test-split.R b/tests/test-split.R
@@ -1,6 +1,7 @@
 ## (c) Claus Hunsen, 2017
 ## [email protected]
-
+## (c) Felix Prasse, 2017
+## [email protected]
 
 context("Splitting functionality.")
 
@@ -1454,3 +1455,54 @@ test_that("Check and correct duplicate range names during network activity-based
     expect_identical(result, expected, info = "Removal of duplicate ranges.")
 
 })
+
+
+##
+## Test splitting data by network names.
+##
+test_that("Test splitting data by networks", {
+    ## configuration and data objects
+    proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT)
+    proj.conf$update.value("artifact.filter.base", FALSE)
+    net.conf = NetworkConf$new()
+    net.conf$update.values(list(author.relation = "cochange", simplify = FALSE))
+
+    ## retrieve project data and network builder
+    project.data = ProjectData$new(proj.conf)
+
+    ## split data
+    mybins = as.POSIXct(c("2016-07-12 15:00:00", "2016-07-12 16:00:00", "2016-07-12 16:05:00", "2030-01-01 00:00:00"))
+    input.data = split.data.time.based(project.data, bins = mybins)
+    input.data.network = lapply(input.data, function(d) NetworkBuilder$new(d, net.conf)$get.author.network())
+
+    aggregation.level = c("range", "cumulative", "project")
+    ## split data by networks
+    results = lapply(aggregation.level, function(level)
+        split.data.by.networks(input.data.network, project.data, level)
+    )
+    names(results) = aggregation.level
+
+    expected.ranges = list("range" = c("2016-07-12 15:00:00-2016-07-12 16:00:00",
+                                       "2016-07-12 16:00:00-2016-07-12 16:05:00",
+                                       "2016-07-12 16:05:00-2030-01-01 00:00:00"),
+                           "cumulative" = c("1970-01-01 00:00:00-2016-07-12 16:00:00",
+                                            "1970-01-01 00:00:00-2016-07-12 16:05:00",
+                                            "1970-01-01 00:00:00-2030-01-01 00:00:00"),
+                           "project" = c("1970-01-01 00:00:00-9999-01-01 00:00:00",
+                                         "1970-01-01 00:00:00-9999-01-01 00:00:00",
+                                         "1970-01-01 00:00:00-9999-01-01 00:00:00"))
+
+    test.each.network = function(aggregation.level) {
+        result.data = results[[aggregation.level]]
+        expected.range.names = expected.ranges[[aggregation.level]]
+
+        lapply(seq_along(result.data), function(i) {
+            result.entry = result.data[[i]]
+
+            expect_true(igraph::identical_graphs(result.entry[["network"]], input.data.network[[i]]))
+            expect_equal(result.entry[["data"]]$get.range(), expected.range.names[[i]])
+        })
+    }
+
+    lapply(names(results), test.each.network)
+})
diff --git a/util-core-peripheral.R b/util-core-peripheral.R
@@ -6,6 +6,8 @@
 ## [email protected]
 ## (c) Sofie Kemper, 2017
 ## [email protected]
+## (c) Felix Prasse, 2017
+## [email protected]
 
 ## This file is derived from following Codeface script:
 ## https://github.com/siemens/codeface/blob/master/codeface/R/developer_classification.r
@@ -451,6 +453,50 @@ get.commit.count.threshold = function(range.data) {
     return(threshold)
 }
 
+#' Get the commit count per comitter in the given range data
+#'
+#' @param range.data The data to count on
+#' @return A data frame in descending order by the commit count.
+get.committer.not.author.commit.count = function(range.data) {
+    logging::logdebug("get.committer.not.author.commit.count: starting.")
+
+    ## Get commit data
+    commits.df = get.commit.data(range.data, columns = c("committer.name", "author.name"))[[1]]
+
+    ## Return NA in case no commit data is available
+    if(all(is.na(commits.df))) {
+        return(NA)
+    }
+
+    ## Execute a query to get the commit count per author
+    res = sqldf::sqldf("SELECT *, COUNT(*) As `freq` FROM `commits.df` WHERE committer.name <> author.name GROUP BY `committer.name`,`author.name` ORDER BY `freq` DESC")
+
+    logging::logdebug("get.committer.not.author.commit.count: finished.")
+    return(res)
+}
+
+#' Get the commit count per comitter in the given range data
+#'
+#' @param range.data The data to count on
+#' @return A data frame in descending order by the commit count.
+get.committer.commit.count = function(range.data) {
+    logging::logdebug("get.contributer.commit.count: starting.")
+
+    ## Get commit data
+    commits.df = get.commit.data(range.data, columns = c("committer.name", "committer.email"))[[1]]
+
+    ## Return NA in case no commit data is available
+    if(all(is.na(commits.df))) {
+        return(NA)
+    }
+
+    ## Execute a query to get the commit count per author
+    res = sqldf::sqldf("select *, COUNT(*) as `freq` from `commits.df` group by `committer.name` order by `freq` desc")
+
+    logging::logdebug("get.author.commit.count: finished.")
+    return(res)
+}
+
 ## Get the commit count per author of the specified version range
 ## as a data frame ordered by the commit count.
 get.author.commit.count = function(range.data) {

diff --git a/util-init.R b/util-init.R
@@ -24,3 +24,4 @@ source("util-bulk.R")
 source("util-plot.R")
 source("util-core-peripheral.R")
 source("util-networks-metrics.R")
+source("util-networks-covariates.R")