Skip to content

Commit

Permalink
Reapply "i #284 Try Adding Debugging"
Browse files Browse the repository at this point in the history
This reverts commit e55b6e2.
  • Loading branch information
daomcgill committed Nov 13, 2024
1 parent f11e452 commit 09d00c3
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 13 deletions.
21 changes: 21 additions & 0 deletions R/git.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,19 @@ parse_gitlog <- function(perceval_path,git_repo_path,save_path=NA,perl_regex=NA)
git_uri <- git_repo_path
save_path <- ifelse(!is.na(save_path),path.expand(save_path),NA)

# DEBUG
print(paste("Perceval path:", perceval_path))
print(paste("Git repo path:", git_repo_path))
print(paste("Save path:", save_path))
print(paste("Perl regex:", perl_regex))

# Use percerval to parse .git --json line is required to be parsed by jsonlite::fromJSON.
# The log will be saved to the /tmp/ folder
gitlog_path <- "/tmp/gitlog.log"

# DEBUG
print(paste("Gitlog path:", gitlog_path))

# Perceval suggested flags
perceval_flags <-
c(
Expand Down Expand Up @@ -62,18 +71,30 @@ parse_gitlog <- function(perceval_path,git_repo_path,save_path=NA,perl_regex=NA)
}
}

# DEBUG
print("Git log call message:")
print(gitlog_call_message)

# Parsed JSON output.
perceval_output <- system2(perceval_path,
args = c('git', '--git-log',gitlog_path,git_uri,'--json-line'),
stdout = TRUE,
stderr = FALSE)

# DEBUG
print("Perceval Output:")
cat(perceval_output, sep = "\n")

perceval_parsed <- data.table(jsonlite::stream_in(textConnection(perceval_output),verbose = FALSE))

if(nrow(perceval_parsed) == 0){
stop("The repository specified has no commits.")
}

# DEBUG
print("Parsed data structure:")
print(str(perceval_parsed))

# APR very first commit is a weird single case of commit without files. We filter them here.
is_commit_with_files <- !!sapply(perceval_parsed$data.files,length)
perceval_parsed <- perceval_parsed[is_commit_with_files]
Expand Down
52 changes: 47 additions & 5 deletions R/mail.R
Original file line number Diff line number Diff line change
Expand Up @@ -589,17 +589,54 @@ parse_mbox <- function(perceval_path, mbox_file_path) {
mbox_dir <- dirname(mbox_file_path) # Extract directory path
mbox_uri <- mbox_file_path # URI points to the mbox file



# Debugging
print(paste("Perceval path:", perceval_path))
print(paste("Mbox file path:", mbox_file_path))
print(paste("Mbox directory path:", mbox_dir))

# Use Perceval to parse the mbox file
perceval_output <- system2(perceval_path,
args = c('mbox', mbox_uri, mbox_dir, '--json-line'),
stdout = TRUE,
stderr = TRUE)
perceval_output <- tryCatch({
system2(perceval_path,
args = c('mbox', mbox_uri, mbox_dir, '--json-line'),
stdout = TRUE,
stderr = TRUE)
}, error = function(e) {
print("Error running Perceval:")
print(e$message)
stop("Perceval execution failed.")
})

# Debugging Perceval output
print("Perceval Output:")
cat(perceval_output, sep = "\n")




# Filter JSON lines from Perceval output
json_lines <- perceval_output[grepl("^\\{", perceval_output)] # Escape the `{` character


if (length(json_lines) == 0) {
stop("No valid JSON lines found in Perceval output. Check the mbox file or Perceval configuration.")
}


# Parse JSON output as a data.table
perceval_parsed <- data.table(jsonlite::stream_in(textConnection(json_lines), verbose = FALSE))
perceval_parsed <- tryCatch({
data.table(jsonlite::stream_in(textConnection(json_lines), verbose = FALSE))
}, error = function(e) {
print("Error parsing JSON lines:")
print(e$message)
stop("JSON parsing failed.")
})

# Debugging parsed data
print("Parsed data structure:")
print(str(perceval_parsed))


columns_of_interest <- c("data.Message.ID", "data.In.Reply.To", "data.Date", "data.From", "data.To", "data.Cc", "data.Subject", "data.body.plain", "data.body")
columns_rename <- c("reply_id", "in_reply_to_id", "reply_datetimetz", "reply_from", "reply_to", "reply_cc", "reply_subject", "reply_body", "reply_body")
Expand All @@ -613,6 +650,11 @@ parse_mbox <- function(perceval_path, mbox_file_path) {
old = colnames(perceval_parsed),
new = columns_rename[is_available_column])

# Debugging final parsed data
print("Final parsed data:")
print(perceval_parsed)


return(perceval_parsed)
}

Expand Down
6 changes: 3 additions & 3 deletions man/parse_gitlog.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions man/parse_mbox.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions tests/testthat/test-git.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,25 @@ test_that("Calling parse_gitlog with correct perceval and correct git log path r
tools_path <- file.path(tools_path)
tool <- yaml::read_yaml(tools_path)
perceval_path <- tool[["perceval"]]

# Debugging output
print("Debugging parse_gitlog:")
print(paste("Tools path:", tools_path))
print(paste("Perceval path:", perceval_path))

git_repo_path <- suppressWarnings(git_create_sample_log())

# Debugging output
print(paste("Generated Git repo path:", git_repo_path))

result <- parse_gitlog(perceval_path, git_repo_path)

# Debugging output
print("Result of parse_gitlog:")
print(head(result))

expect_is(result, "data.table")

suppressWarnings(git_delete_sample_log(git_repo_path))
})

Expand Down
24 changes: 19 additions & 5 deletions tests/testthat/test-mail.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,30 @@ test_that("Calling parse_mbox with correct perceval and mbox path returns a data
tools_path <- file.path(tools_path)
tool <- yaml::read_yaml(tools_path)
perceval_path <- tool[["perceval"]]
mbox_path <- example_mailing_list_two_threads(folder_path = "/tmp",
folder_name="example_two_threads_mailing_list",
file_name = "two_thread_mailing_list")

# Debugging output
print("Debugging parse_mbox:")
print(paste("Tools path:", tools_path))
print(paste("Perceval path:", perceval_path))

mbox_path <- example_mailing_list_two_threads(
folder_path = "/tmp",
folder_name = "example_two_threads_mailing_list",
file_name = "two_thread_mailing_list"
)

# Debugging output
print(paste("Generated Mbox path:", mbox_path))

result <- parse_mbox(perceval_path, mbox_path)

io_delete_folder(folder_path="/tmp", folder_name="example_two_threads_mailing_list")
# Debugging output
print("Result of parse_mbox:")
print(head(result))

io_delete_folder(folder_path = "/tmp", folder_name = "example_two_threads_mailing_list")

expect_equal(result[reply_from == "John Doe <[email protected]>"]$reply_subject, "Subject 1")
expect_equal(result[reply_subject == "Re: Subject 1"]$reply_from, "Smithsonian Doe <[email protected]>")

})

0 comments on commit 09d00c3

Please sign in to comment.