diff --git a/UPDATES.md b/UPDATES.md index bb93c06..feb6f59 100644 --- a/UPDATES.md +++ b/UPDATES.md @@ -2,12 +2,18 @@ ## Latest Updates -### 1. Improved Scoring System +### 1. Repository Evaluation Command +- Added new `repo-eval` command to evaluate all commits in a repository for all committers +- Implemented incremental report generation - each committer's report is saved immediately after evaluation +- Added summary report with links to individual committer reports +- Added error handling to continue evaluation even if one committer's evaluation fails + +### 2. Improved Scoring System - Enhanced the scoring system to provide more accurate and comprehensive code evaluations - Added detailed scoring criteria for each dimension - Implemented weighted scoring for different aspects of code quality -### 2. Evaluation Dimensions +### 3. Evaluation Dimensions The evaluation now covers the following dimensions: - Readability: Code clarity and understandability - Efficiency & Performance: Code execution speed and resource usage @@ -65,7 +71,19 @@ The evaluation now covers the following dimensions: python run_codedog.py pr "repository_name" MR_number --platform gitlab --gitlab-url "https://your.gitlab.instance.com" ``` -3. **Set up Git Hooks**: +3. **Evaluate Repository**: + ```bash + # Evaluate all commits in a GitHub repository + python run_codedog.py repo-eval "repository_name" --start-date YYYY-MM-DD --end-date YYYY-MM-DD --platform github + + # Evaluate all commits in a GitLab repository + python run_codedog.py repo-eval "repository_name" --start-date YYYY-MM-DD --end-date YYYY-MM-DD --platform gitlab + + # Evaluate with specific model + python run_codedog.py repo-eval "repository_name" --start-date YYYY-MM-DD --end-date YYYY-MM-DD --platform gitlab --model deepseek + ``` + +4. **Set up Git Hooks**: ```bash python run_codedog.py setup-hooks ``` @@ -80,4 +98,9 @@ The evaluation now covers the following dimensions: 1. Implement better text chunking and processing for handling large code diffs 2. Develop more specialized scoring criteria for different file types 3. Further improve report presentation with visual charts -4. Deeper integration with CI/CD systems \ No newline at end of file +4. Deeper integration with CI/CD systems + +## TODO +1. Implement better handling of large diffs in GitLab API (currently limited to 20 files per diff) +2. Add support for local repositories in repo-eval command +3. Add more detailed statistics in the summary report \ No newline at end of file diff --git a/codedog/utils/code_evaluator.py b/codedog/utils/code_evaluator.py index a94257a..5f3ea12 100644 --- a/codedog/utils/code_evaluator.py +++ b/codedog/utils/code_evaluator.py @@ -14,6 +14,39 @@ import math import tiktoken # 用于精确计算token数量 +# Function to log LLM inputs and outputs to separate files +def log_llm_interaction(prompt, response, interaction_type="default"): + """ + Log LLM prompts to LLM_in.log and responses to LLM_out.log + + Args: + prompt: The prompt sent to the LLM + response: The response received from the LLM + interaction_type: A label to identify the type of interaction (e.g., "file_evaluation", "summary") + """ + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + # Create logs directory if it doesn't exist + os.makedirs("logs", exist_ok=True) + + # Log the prompt + with open("logs/LLM_in.log", "a", encoding="utf-8") as f: + f.write(f"\n\n{'='*50}\n") + f.write(f"TIMESTAMP: {timestamp}\n") + f.write(f"TYPE: {interaction_type}\n") + f.write(f"{'='*50}\n\n") + f.write(prompt) + f.write("\n\n") + + # Log the response + with open("logs/LLM_out.log", "a", encoding="utf-8") as f: + f.write(f"\n\n{'='*50}\n") + f.write(f"TIMESTAMP: {timestamp}\n") + f.write(f"TYPE: {interaction_type}\n") + f.write(f"{'='*50}\n\n") + f.write(response) + f.write("\n\n") + # 导入 grimoire 模板 from codedog.templates.grimoire_en import CODE_SUGGESTION from codedog.templates.grimoire_cn import GrimoireCn @@ -57,11 +90,27 @@ def from_dict(cls, data: Dict[str, Any]) -> "CodeEvaluation": score_fields = ["readability", "efficiency", "security", "structure", "error_handling", "documentation", "code_style"] + # Log the original data + logger.info(f"Creating CodeEvaluation from data: {data}") + print(f"DEBUG: Creating CodeEvaluation from data: {data}") + + # Make a copy of the data to avoid modifying the original + data_copy = data.copy() + for field in score_fields: - if field in data and isinstance(data[field], float): - data[field] = round(data[field]) + if field in data_copy and isinstance(data_copy[field], float): + # Log the conversion + logger.info(f"Converting {field} from float {data_copy[field]} to int {round(data_copy[field])}") + data_copy[field] = round(data_copy[field]) + + # Create the instance + instance = cls(**data_copy) - return cls(**data) + # Log the created instance + logger.info(f"Created CodeEvaluation instance: {instance}") + print(f"DEBUG: Created CodeEvaluation instance: {instance}") + + return instance @dataclass(frozen=True) # Make it immutable and hashable @@ -697,6 +746,10 @@ async def _evaluate_single_diff(self, diff_content: str) -> Dict[str, Any]: def _validate_scores(self, result: Dict[str, Any]) -> Dict[str, Any]: """Validate and normalize scores with enhanced format handling.""" try: + # 记录原始结果 + logger.info(f"Validating scores from result: {result}") + print(f"DEBUG: Original LLM result: {result}") + # 检查并处理不同格式的评分结果 normalized_result = {} @@ -706,6 +759,13 @@ def _validate_scores(self, result: Dict[str, Any]) -> Dict[str, Any]: "error_handling", "documentation", "code_style", "overall_score", "comments", "estimated_hours" ] + # 记录是否所有字段都存在 + missing_fields = [field for field in required_fields if field not in result] + if missing_fields: + logger.warning(f"Missing fields in result: {missing_fields}") + else: + logger.info("All required fields are present in the result") + # 处理可能的不同格式 # 格式1: {"readability": 8, "efficiency": 7, ...} # 格式2: {"score": {"readability": 8, "efficiency": 7, ...}} @@ -739,7 +799,7 @@ def _validate_scores(self, result: Dict[str, Any]) -> Dict[str, Any]: elif "evaluation" in result: normalized_result["comments"] = result["evaluation"] else: - normalized_result["comments"] = "无评价意见" + normalized_result["comments"] = "No evaluation comments were provided by the model. The code may require manual review." elif field in result and isinstance(result[field], dict) and "score" in result[field]: normalized_result[field] = result[field]["score"] else: @@ -785,7 +845,7 @@ def _validate_scores(self, result: Dict[str, Any]) -> Dict[str, Any]: normalized_result["comments"] = result[alt_field] break else: - normalized_result["comments"] = "无评价意见" + normalized_result["comments"] = "No evaluation comments were provided by the model. The code may require manual review." # 处理嵌套的评论结构 - 无论是否在上面的循环中设置 if field == "comments" and isinstance(normalized_result.get("comments"), dict): @@ -812,17 +872,44 @@ def _validate_scores(self, result: Dict[str, Any]) -> Dict[str, Any]: normalized_result["comments"] = comments_str elif field == "overall_score": - # 如果缺少总分,计算其他分数的平均值 - score_fields = ["readability", "efficiency", "security", "structure", - "error_handling", "documentation", "code_style"] - available_scores = [normalized_result.get(f, 5) for f in score_fields if f in normalized_result] - if available_scores: - normalized_result["overall_score"] = round(sum(available_scores) / len(available_scores), 1) + # 检查原始结果中是否有overall_score字段 + if "overall_score" in result: + # 使用原始结果中的值 + normalized_result["overall_score"] = result["overall_score"] else: - normalized_result["overall_score"] = 5.0 + # 如果原始结果中没有该字段,计算其他分数的平均值 + logger.warning("overall_score not found in original result, calculating from other scores") + score_fields = ["readability", "efficiency", "security", "structure", + "error_handling", "documentation", "code_style"] + # 使用原始结果中的值计算平均分 + available_scores = [] + for f in score_fields: + if f in result: + try: + if isinstance(result[f], str): + available_scores.append(float(result[f].strip())) + else: + available_scores.append(float(result[f])) + except (ValueError, TypeError): + # 如果转换失败,跳过该字段 + pass + elif f in normalized_result: + available_scores.append(normalized_result[f]) + + if available_scores: + normalized_result["overall_score"] = round(sum(available_scores) / len(available_scores), 1) + else: + logger.warning("No scores available to calculate overall_score, using default value 5.0") + normalized_result["overall_score"] = 5.0 else: - # 对于其他评分字段,使用默认值5 - normalized_result[field] = 5 + # 检查原始结果中是否有该字段 + if field in result: + # 使用原始结果中的值 + normalized_result[field] = result[field] + else: + # 如果原始结果中没有该字段,才使用默认值5 + logger.warning(f"Field {field} not found in original result, using default value 5") + normalized_result[field] = 5 # 确保分数在有效范围内 score_fields = ["readability", "efficiency", "security", "structure", @@ -839,7 +926,24 @@ def _validate_scores(self, result: Dict[str, Any]) -> Dict[str, Any]: normalized_result[field] = max(1, min(10, score)) except (ValueError, TypeError): - normalized_result[field] = 5 + # 检查原始结果中是否有该字段 + if field in result: + # 尝试使用原始结果中的值 + try: + # 尝试转换为整数 + if isinstance(result[field], str): + normalized_result[field] = int(result[field].strip()) + elif isinstance(result[field], float): + normalized_result[field] = round(result[field]) + else: + normalized_result[field] = result[field] + except (ValueError, TypeError): + # 如果转换失败,使用原始值 + normalized_result[field] = result[field] + else: + # 如果原始结果中没有该字段,才使用默认值5 + logger.warning(f"Field {field} not found in original result or could not be parsed, using default value 5") + normalized_result[field] = 5 # 确保overall_score是浮点数并在1-10范围内 try: @@ -849,15 +953,31 @@ def _validate_scores(self, result: Dict[str, Any]) -> Dict[str, Any]: normalized_result["overall_score"] = max(1.0, min(10.0, float(overall))) except (ValueError, TypeError): - normalized_result["overall_score"] = 5.0 - - # 检查所有分数是否相同,如果是,则稍微调整以增加差异性 - scores = [normalized_result[field] for field in score_fields] - if len(set(scores)) <= 1: - # 所有分数相同,添加一些随机变化 - for field in score_fields[:3]: # 只修改前几个字段 - adjustment = random.choice([-1, 1]) - normalized_result[field] = max(1, min(10, normalized_result[field] + adjustment)) + # 检查原始结果中是否有overall_score字段 + if "overall_score" in result: + # 尝试使用原始结果中的值 + try: + # 尝试转换为浮点数 + if isinstance(result["overall_score"], str): + normalized_result["overall_score"] = float(result["overall_score"].strip()) + else: + normalized_result["overall_score"] = float(result["overall_score"]) + except (ValueError, TypeError): + # 如果转换失败,使用原始值 + normalized_result["overall_score"] = result["overall_score"] + else: + # 如果原始结果中没有该字段,才使用默认值5.0 + logger.warning("overall_score not found in original result or could not be parsed, using default value 5.0") + normalized_result["overall_score"] = 5.0 + + # 禁用分数调整功能,保持LLM原始输出 + # 原始代码:检查所有分数是否相同,如果是,则稍微调整以增加差异性 + # scores = [normalized_result[field] for field in score_fields] + # if len(set(scores)) <= 1: + # # 所有分数相同,添加一些随机变化 + # for field in score_fields[:3]: # 只修改前几个字段 + # adjustment = random.choice([-1, 1]) + # normalized_result[field] = max(1, min(10, normalized_result[field] + adjustment)) # 确保comments字段是字符串类型 if "comments" in normalized_result: @@ -888,16 +1008,44 @@ def _validate_scores(self, result: Dict[str, Any]) -> Dict[str, Any]: normalized_result["comments"] = str(normalized_result["comments"]) except Exception as e: logger.error(f"Error converting comments to string: {e}") - normalized_result["comments"] = f"评论转换错误: {str(e)}" - - # 确保评论不为空 - if not normalized_result["comments"]: - normalized_result["comments"] = "无评价意见" + normalized_result["comments"] = f"Error converting evaluation comments: {str(e)}. The code may require manual review." + + # 确保评论不为空且不是简单的数字或过短的字符串 + if not normalized_result["comments"] or normalized_result["comments"].strip().isdigit() or len(normalized_result["comments"].strip()) < 10: + logger.warning(f"Comments field is empty, a digit, or too short: '{normalized_result['comments']}'. Attempting to extract more detailed comments from the original result.") + + # 尝试从原始结果中提取更详细的评论 + detailed_comments = None + + # 检查原始结果中是否有更详细的评论 + if "comments" in result and isinstance(result["comments"], str) and len(result["comments"]) > 10 and not result["comments"].strip().isdigit(): + detailed_comments = result["comments"] + elif "evaluation" in result and isinstance(result["evaluation"], str) and len(result["evaluation"]) > 10: + detailed_comments = result["evaluation"] + elif "analysis" in result and isinstance(result["analysis"], str) and len(result["analysis"]) > 10: + detailed_comments = result["analysis"] + + # 如果找到了更详细的评论,使用它 + if detailed_comments: + logger.info(f"Found more detailed comments in the original result: {detailed_comments[:100]}...") + normalized_result["comments"] = detailed_comments + else: + normalized_result["comments"] = "No evaluation comments were provided by the model. The code may require manual review." # 使用from_dict方法创建CodeEvaluation实例进行最终验证 try: + # 记录最终结果 + logger.info(f"Final normalized result: {normalized_result}") + print(f"DEBUG: Final normalized result: {normalized_result}") + evaluation = CodeEvaluation.from_dict(normalized_result) - return evaluation.model_dump() + final_result = evaluation.model_dump() + + # 记录最终模型结果 + logger.info(f"Final model result: {final_result}") + print(f"DEBUG: Final model result: {final_result}") + + return final_result except Exception as e: logger.error(f"Error creating CodeEvaluation: {e}") logger.error(f"Normalized result: {normalized_result}") @@ -910,7 +1058,14 @@ def _validate_scores(self, result: Dict[str, Any]) -> Dict[str, Any]: def _generate_default_scores(self, error_message: str) -> Dict[str, Any]: """Generate default scores when evaluation fails.""" - return { + logger.warning(f"Generating default scores due to error: {error_message[:200]}...") + + # 记录调用栈,以便了解是从哪里调用的 + import traceback + stack_trace = traceback.format_stack() + logger.debug(f"Default scores generated from:\n{''.join(stack_trace[-5:-1])}") + + default_scores = { "readability": 5, "efficiency": 5, "security": 5, @@ -920,9 +1075,12 @@ def _generate_default_scores(self, error_message: str) -> Dict[str, Any]: "code_style": 5, "overall_score": 5.0, "estimated_hours": 0.0, - "comments": error_message + "comments": f"Evaluation failed: {error_message}. The code may require manual review." } + logger.info(f"Default scores generated: {default_scores}") + return default_scores + def _estimate_default_hours(self, additions: int, deletions: int) -> float: """Estimate default working hours based on additions and deletions. @@ -1120,9 +1278,13 @@ def _extract_json(self, text: str) -> str: return "" # 打印原始文本的类型和长度 + logger.info(f"Response type: {type(text)}, length: {len(text)}") print(f"DEBUG: Response type: {type(text)}, length: {len(text)}") print(f"DEBUG: First 100 chars: '{text[:100]}'") + # 记录完整响应用于调试 + logger.debug(f"Complete model response: {text}") + # 检查是否包含无法评估的提示(如Base64编码内容) unevaluable_patterns = [ r'Base64编码', @@ -1163,7 +1325,7 @@ def _extract_json(self, text: str) -> str: "documentation": 5, "code_style": 5, "overall_score": 5.0, - "comments": f"无法评估代码: {comment}" + "comments": f"The code could not be evaluated due to content issues: {comment}" } return json.dumps(default_json) @@ -1202,7 +1364,20 @@ def _extract_json(self, text: str) -> str: if improvement_match: scores_dict['comments'] = improvement_match.group(1).strip() else: - scores_dict['comments'] = "No detailed analysis provided." + # Try to extract any meaningful content from the response + overview_match = re.search(r'## Code Functionality Overview\s*\n([\s\S]*?)(?:\n##|\Z)', text) + if overview_match: + scores_dict['comments'] = overview_match.group(1).strip() + else: + # Look for any section that might contain useful information + for section_title in ["Summary", "Overview", "Analysis", "Evaluation", "Review", "Feedback"]: + section_match = re.search(f'## {section_title}\s*\n([\s\S]*?)(?:\n##|\Z)', text, re.IGNORECASE) + if section_match: + scores_dict['comments'] = section_match.group(1).strip() + break + else: + # If no sections found, use the first 500 characters of the response + scores_dict['comments'] = "No detailed analysis section found. Response excerpt: " + text[:500].strip() # 转换为 JSON 字符串 if scores_dict and len(scores_dict) >= 8: # 至少包含7个评分项和评论 @@ -1274,8 +1449,9 @@ def _fix_malformed_json(self, json_str: str) -> str: "code_style": 5, "overall_score": 5.0, "estimated_hours": 0.0, - "comments": "API返回空响应,显示默认分数。" + "comments": "No evaluation comments available. The API returned an empty response, so default scores are shown." } + logger.warning("Returning default scores due to empty response") return json.dumps(default_scores) # 检查是否是错误消息而不是JSON @@ -1302,7 +1478,7 @@ def _fix_malformed_json(self, json_str: str) -> str: "code_style": 5, "overall_score": 5.0, "estimated_hours": 0.0, - "comments": f"API返回错误消息: {json_str[:200]}..." + "comments": f"The evaluation could not be completed. The API returned an error message: {json_str[:200]}..." } return json.dumps(default_scores) @@ -1420,8 +1596,15 @@ def _fix_malformed_json(self, json_str: str) -> str: break if "comments" not in scores: - # 使用原始文本的一部分作为评论 - scores["comments"] = "JSON解析错误,显示提取的分数。原始响应: " + original_json[:200] + "..." + # Try to extract any meaningful content from the response + for section_title in ["Analysis", "Evaluation", "Review", "Feedback", "Comments", "Summary", "Overview"]: + section_match = re.search(f'{section_title}[:\s]+([\s\S]*?)(?=\n\w+[:\s]|\Z)', original_json, re.IGNORECASE) + if section_match: + scores["comments"] = section_match.group(1).strip() + break + else: + # If no sections found, use the original text + scores["comments"] = "Extracted scores from response, but could not find detailed comments. Response excerpt: " + original_json[:300] + "..." # 转换为JSON字符串 return json.dumps(scores) @@ -1441,7 +1624,7 @@ def _fix_malformed_json(self, json_str: str) -> str: "code_style": 5, "overall_score": 5.0, "estimated_hours": 0.0, - "comments": f"JSON解析错误,显示默认分数。错误: {str(e)}" + "comments": f"Unable to extract detailed evaluation comments. There was an error parsing the JSON response: {str(e)}. The code may require manual review." } return json.dumps(default_scores) @@ -1595,6 +1778,10 @@ async def _evaluate_diff_chunk(self, chunk: str) -> Dict[str, Any]: print(f"DEBUG: User input first 100 chars: '{user_message[:100]}...'") print(f"DEBUG: User input length: {len(user_message)}") + # Log the prompt to LLM_in.log + user_message = messages[0].content if len(messages) > 0 else "No user message" + log_llm_interaction(user_message, "", interaction_type="diff_chunk_evaluation_prompt") + # 调用模型 response = await self.model.agenerate(messages=[messages]) self._last_request_time = time.time() @@ -1602,8 +1789,8 @@ async def _evaluate_diff_chunk(self, chunk: str) -> Dict[str, Any]: # 获取响应文本 generated_text = response.generations[0][0].text - # 打印原始响应用于调试 - print(f"\n==== RAW OPENAI RESPONSE ====\n{generated_text}\n==== END RESPONSE ====\n") + # Log the response to LLM_out.log + log_llm_interaction("", generated_text, interaction_type="diff_chunk_evaluation_response") # 解析响应 try: @@ -1679,10 +1866,19 @@ async def _evaluate_diff_chunk(self, chunk: str) -> Dict[str, Any]: retry_count += 1 if retry_count >= 2: # 只重试两次 logger.error(f"DeepSeek API error after 2 retries, abandoning evaluation: {error_message}") - return self._generate_default_scores(f"DeepSeek API错误,放弃评估: {error_message}") + logger.error(f"Original error: {e}") + logger.error(f"Last response (if any): {generated_text[:500] if generated_text else 'No response'}") + + # 创建一个详细的错误消息 + error_detail = f"DeepSeek API错误,放弃评估: {error_message}\n" + error_detail += f"原始错误: {e}\n" + error_detail += f"最后响应: {generated_text[:200] if generated_text else '无响应'}" + + return self._generate_default_scores(error_detail) # 使用较短的等待时间 wait_time = 3 # 固定3秒等待时间 logger.warning(f"DeepSeek API error, retrying in {wait_time}s (attempt {retry_count}/2)") + logger.warning(f"Error details: {error_message}") await asyncio.sleep(wait_time) else: # 其他错误直接返回 @@ -1844,6 +2040,11 @@ async def evaluate_commit_file( logger.info(f"Sending request to model for {file_path}") start_time = time.time() + # Log the prompt to LLM_in.log + user_message = messages[0].content + log_llm_interaction(user_message, "", interaction_type="file_evaluation_prompt") + + # Call the model response = await self.model.agenerate(messages=[messages]) end_time = time.time() logger.info(f"Model response received in {end_time - start_time:.2f} seconds") @@ -1851,9 +2052,8 @@ async def evaluate_commit_file( generated_text = response.generations[0][0].text logger.debug(f"Response size: {len(generated_text)} characters") - # 打印原始响应用于调试 - logger.debug(f"Raw model response (first 200 chars): {generated_text[:200]}...") - print(f"\n==== RAW OPENAI RESPONSE ====\n{generated_text[:200]}...\n==== END RESPONSE ====\n") + # Log the response to LLM_out.log + log_llm_interaction("", generated_text, interaction_type="file_evaluation_response") # 尝试提取JSON部分 logger.info(f"Extracting JSON from response for {file_path}") @@ -1935,8 +2135,8 @@ async def evaluate_commit_file( most_common_score = max(score_counts, key=score_counts.get) most_common_count = score_counts[most_common_score] - # 如果所有分数都相同,或者大部分分数相同,则根据文件类型调整分数 - if most_common_count >= 5: # 如果至少5个分数相同 + # 禁用分数调整功能,保持LLM原始输出 + if False: # 原始条件: most_common_count >= 5 logger.warning(f"Most scores are identical ({most_common_score}, count: {most_common_count}), adjusting for variety") print(f"检测到评分缺乏差异性 ({most_common_score},{most_common_count}个相同),正在调整评分使其更具差异性") @@ -2024,7 +2224,38 @@ async def evaluate_commit_file( eval_data["code_style"] ]) / 7, 1) - logger.info(f"Adjusted scores: {eval_data}") + # 记录原始分数和调整后的分数 + original_scores = { + "readability": most_common_score, + "efficiency": most_common_score, + "security": most_common_score, + "structure": most_common_score, + "error_handling": most_common_score, + "documentation": most_common_score, + "code_style": most_common_score, + "overall_score": most_common_score + } + + adjusted_scores = { + "readability": eval_data["readability"], + "efficiency": eval_data["efficiency"], + "security": eval_data["security"], + "structure": eval_data["structure"], + "error_handling": eval_data["error_handling"], + "documentation": eval_data["documentation"], + "code_style": eval_data["code_style"], + "overall_score": eval_data["overall_score"] + } + + logger.info(f"Original scores: {original_scores}") + logger.info(f"Adjusted scores: {adjusted_scores}") + + # 在评论中添加分数调整说明 + adjustment_note = f"\n\n**Note**: Scores have been adjusted for differentiation. Original scores were all {most_common_score}." + if eval_data["comments"]: + eval_data["comments"] += adjustment_note + else: + eval_data["comments"] = adjustment_note # Calculate estimated hours if not provided if "estimated_hours" not in eval_data or not eval_data["estimated_hours"]: @@ -2130,11 +2361,16 @@ async def evaluate_file_diff( user_message = messages[0].content if len(messages) > 0 else "No user message" print(f"DEBUG: User input first 20 chars: '{user_message[:20]}...'") + # Log the prompt to LLM_in.log + user_message = messages[0].content + log_llm_interaction(user_message, "", interaction_type="file_evaluation_prompt") + + # Call the model response = await self.model.agenerate(messages=[messages]) generated_text = response.generations[0][0].text - # 打印原始响应用于调试 - print(f"\n==== RAW OPENAI RESPONSE ====\n{generated_text[:200]}...\n==== END RESPONSE ====\n") + # Log the response to LLM_out.log + log_llm_interaction("", generated_text, interaction_type="file_evaluation_response") # 尝试提取JSON部分 json_str = self._extract_json(generated_text) @@ -2238,8 +2474,8 @@ async def evaluate_file_diff( most_common_score = max(score_counts, key=score_counts.get) most_common_count = score_counts[most_common_score] - # 如果所有分数都相同,或者大部分分数相同,则根据文件类型调整分数 - if most_common_count >= 5: # 如果至少5个分数相同 + # 禁用分数调整功能,保持LLM原始输出 + if False: # 原始条件: most_common_count >= 5 logger.warning(f"Most scores are identical ({most_common_score}, count: {most_common_count}), adjusting for variety") print(f"检测到评分缺乏差异性 ({most_common_score},{most_common_count}个相同),正在调整评分使其更具差异性") @@ -2327,7 +2563,38 @@ async def evaluate_file_diff( evaluation.code_style ]) / 7, 1) - logger.info(f"Adjusted scores: {evaluation}") + # 记录原始分数和调整后的分数 + original_scores = { + "readability": most_common_score, + "efficiency": most_common_score, + "security": most_common_score, + "structure": most_common_score, + "error_handling": most_common_score, + "documentation": most_common_score, + "code_style": most_common_score, + "overall_score": most_common_score + } + + adjusted_scores = { + "readability": evaluation.readability, + "efficiency": evaluation.efficiency, + "security": evaluation.security, + "structure": evaluation.structure, + "error_handling": evaluation.error_handling, + "documentation": evaluation.documentation, + "code_style": evaluation.code_style, + "overall_score": evaluation.overall_score + } + + logger.info(f"Original scores: {original_scores}") + logger.info(f"Adjusted scores: {adjusted_scores}") + + # 在评论中添加分数调整说明 + adjustment_note = f"\n\n**Note**: Scores have been adjusted for differentiation. Original scores were all {most_common_score}." + if evaluation.comments: + evaluation.comments += adjustment_note + else: + evaluation.comments = adjustment_note # 创建并返回评价结果 return FileEvaluationResult( @@ -2621,11 +2888,19 @@ async def evaluate_commit_as_whole( logger.info("Sending request to model for combined diff evaluation") start_time = time.time() + # Log the prompt to LLM_in.log + user_message = messages[0].content + log_llm_interaction(user_message, "", interaction_type="commit_evaluation_prompt") + + # Call the model response = await self.model.agenerate(messages=[messages]) end_time = time.time() logger.info(f"Model response received in {end_time - start_time:.2f} seconds") generated_text = response.generations[0][0].text + + # Log the response to LLM_out.log + log_llm_interaction("", generated_text, interaction_type="commit_evaluation_response") logger.debug(f"Response size: {len(generated_text)} characters") # Extract JSON from response @@ -2795,11 +3070,19 @@ async def evaluate_commit( messages = [HumanMessage(content=summary_prompt)] logger.info("Sending summary request to model") start_time = time.time() + # Log the prompt to LLM_in.log + user_message = messages[0].content + log_llm_interaction(user_message, "", interaction_type="summary_prompt") + + # Call the model summary_response = await self.model.agenerate(messages=[messages]) end_time = time.time() logger.info(f"Summary response received in {end_time - start_time:.2f} seconds") summary_text = summary_response.generations[0][0].text + + # Log the response to LLM_out.log + log_llm_interaction("", summary_text, interaction_type="summary_response") logger.debug(f"Summary text size: {len(summary_text)} characters") logger.debug(f"Summary text (first 100 chars): {summary_text[:100]}...") @@ -2919,7 +3202,6 @@ def generate_evaluation_markdown(evaluation_results: List[FileEvaluationResult]) # Add total estimated working hours if available if total_scores["estimated_hours"] > 0: markdown += f"- **Total Estimated Working Hours**: {total_scores['estimated_hours']:.1f} hours\n" - markdown += f"- **Average Estimated Hours per File**: {avg_scores['estimated_hours']:.1f} hours\n" markdown += "\n" @@ -2983,7 +3265,22 @@ def generate_evaluation_markdown(evaluation_results: List[FileEvaluationResult]) markdown += f"| **Estimated Working Hours** | **{eval.estimated_hours:.1f}** |\n" markdown += "\n**Comments**:\n\n" - markdown += f"{eval.comments}\n\n" + + # Check if the comments contain the adjustment note + if "**Note**: Scores have been adjusted for differentiation" in eval.comments: + # Split the comments to separate the adjustment note + comments_parts = eval.comments.split("**Note**: Scores have been adjusted for differentiation") + main_comments = comments_parts[0].strip() + adjustment_note = "**Note**: Scores have been adjusted for differentiation" + comments_parts[1] + + # Add the main comments + markdown += f"{main_comments}\n\n" + + # Add the adjustment note with special formatting + markdown += f"
{adjustment_note}
\n\n" + else: + markdown += f"{eval.comments}\n\n" + markdown += "---\n\n" return markdown \ No newline at end of file diff --git a/codedog/utils/langchain_utils.py b/codedog/utils/langchain_utils.py index b4b1d1a..e15b2f4 100644 --- a/codedog/utils/langchain_utils.py +++ b/codedog/utils/langchain_utils.py @@ -263,11 +263,17 @@ async def _agenerate( # 提取消息内容 message = response_data["choices"][0]["message"]["content"] + # 记录完整的响应内容用于调试 + logger.info(f"DeepSeek API response received successfully") + logger.debug(f"DeepSeek API complete response: {json.dumps(response_data, ensure_ascii=False)}") + logger.debug(f"DeepSeek API message content: {message}") + # 更新令牌使用和成本 if "usage" in response_data: tokens = response_data["usage"].get("total_tokens", 0) self.total_tokens += tokens self.total_cost += self._calculate_cost(tokens) + logger.info(f"DeepSeek API token usage: {tokens}, total cost: ${self.total_cost:.6f}") # 创建并返回 ChatResult generation = ChatGeneration(message=AIMessage(content=message)) diff --git a/run_codedog.py b/run_codedog.py index f75c4f2..e386fc3 100755 --- a/run_codedog.py +++ b/run_codedog.py @@ -2,6 +2,7 @@ import asyncio import time import traceback +import logging from dotenv import load_dotenv from typing import Any, Dict, List, Optional, Tuple import os @@ -12,6 +13,9 @@ # Load environment variables from .env file load_dotenv() +# Configure logger +logger = logging.getLogger(__name__) + from github import Github from gitlab import Gitlab from langchain_community.callbacks.manager import get_openai_callback @@ -74,6 +78,20 @@ def parse_args(): help="Platform to use (github, gitlab, or local, defaults to local)") commit_parser.add_argument("--gitlab-url", help="GitLab URL (defaults to https://gitlab.com or GITLAB_URL env var)") + # Repository evaluation command + repo_eval_parser = subparsers.add_parser("repo-eval", help="Evaluate all commits in a repository within a time period for all committers") + repo_eval_parser.add_argument("repo", help="Git repository path or name (e.g. owner/repo for remote repositories)") + repo_eval_parser.add_argument("--start-date", help="Start date (YYYY-MM-DD), defaults to 7 days ago") + repo_eval_parser.add_argument("--end-date", help="End date (YYYY-MM-DD), defaults to today") + repo_eval_parser.add_argument("--include", help="Included file extensions, comma separated, e.g. .py,.js") + repo_eval_parser.add_argument("--exclude", help="Excluded file extensions, comma separated, e.g. .md,.txt") + repo_eval_parser.add_argument("--model", help="Evaluation model, defaults to CODE_REVIEW_MODEL env var or gpt-3.5") + repo_eval_parser.add_argument("--email", help="Email addresses to send the report to (comma-separated)") + repo_eval_parser.add_argument("--output-dir", help="Directory to save reports, defaults to codedog_repo_eval_") + repo_eval_parser.add_argument("--platform", choices=["github", "gitlab", "local"], default="local", + help="Platform to use (github, gitlab, or local, defaults to local)") + repo_eval_parser.add_argument("--gitlab-url", help="GitLab URL (defaults to https://gitlab.com or GITLAB_URL env var)") + return parser.parse_args() @@ -131,40 +149,67 @@ def get_remote_commit_diff( Returns: Dict[str, Dict[str, Any]]: Dictionary mapping file paths to their diffs and statistics """ + logger.info(f"Getting commit diff from {platform} for repository {repository_name}, commit {commit_hash}") + logger.info(f"Include extensions: {include_extensions}, Exclude extensions: {exclude_extensions}") + if platform.lower() == "github": # Initialize GitHub client - github_client = Github() # Will automatically load GITHUB_TOKEN from environment + github_token = os.environ.get("GITHUB_TOKEN", "") + if not github_token: + error_msg = "GITHUB_TOKEN environment variable is not set" + logger.error(error_msg) + print(error_msg) + return {} + + github_client = Github(github_token) print(f"Analyzing GitHub repository {repository_name} for commit {commit_hash}") + logger.info(f"Initialized GitHub client for repository {repository_name}") try: # Get repository + logger.info(f"Fetching repository {repository_name}") repo = github_client.get_repo(repository_name) # Get commit + logger.info(f"Fetching commit {commit_hash}") commit = repo.get_commit(commit_hash) + logger.info(f"Commit found: {commit.sha}, author: {commit.commit.author.name}, date: {commit.commit.author.date}") # Extract file diffs file_diffs = {} - for file in commit.files: + logger.info(f"Processing {len(commit.files)} files in commit") + + for i, file in enumerate(commit.files): + logger.info(f"Processing file {i+1}/{len(commit.files)}: {file.filename}") + # Filter by file extensions _, ext = os.path.splitext(file.filename) + logger.debug(f"File extension: {ext}") + if include_extensions and ext not in include_extensions: + logger.info(f"Skipping file {file.filename} - extension {ext} not in include list") continue if exclude_extensions and ext in exclude_extensions: + logger.info(f"Skipping file {file.filename} - extension {ext} in exclude list") continue if file.patch: + logger.info(f"Adding file {file.filename} to diff (status: {file.status}, additions: {file.additions}, deletions: {file.deletions})") file_diffs[file.filename] = { "diff": f"diff --git a/{file.filename} b/{file.filename}\n{file.patch}", "status": file.status, "additions": file.additions, "deletions": file.deletions, } + else: + logger.warning(f"No patch content for file {file.filename}") + logger.info(f"Processed {len(file_diffs)} files after filtering") return file_diffs except Exception as e: error_msg = f"Failed to retrieve GitHub commit: {str(e)}" + logger.error(error_msg, exc_info=True) print(error_msg) return {} @@ -173,41 +218,58 @@ def get_remote_commit_diff( gitlab_token = os.environ.get("GITLAB_TOKEN", "") if not gitlab_token: error_msg = "GITLAB_TOKEN environment variable is not set" + logger.error(error_msg) print(error_msg) return {} # Use provided GitLab URL or fall back to environment variable or default gitlab_url = gitlab_url or os.environ.get("GITLAB_URL", "https://gitlab.com") + logger.info(f"Using GitLab URL: {gitlab_url}") gitlab_client = Gitlab(url=gitlab_url, private_token=gitlab_token) print(f"Analyzing GitLab repository {repository_name} for commit {commit_hash}") + logger.info(f"Initialized GitLab client for repository {repository_name}") try: # Get repository + logger.info(f"Fetching project {repository_name}") project = gitlab_client.projects.get(repository_name) + logger.info(f"Project found: {project.name}, ID: {project.id}") # Get commit + logger.info(f"Fetching commit {commit_hash}") commit = project.commits.get(commit_hash) + logger.info(f"Commit found: {commit.id}, author: {commit.author_name}, date: {commit.created_at}") # Get commit diff + logger.info("Fetching commit diff") diff = commit.diff() + logger.info(f"Processing {len(diff)} files in commit diff") # Extract file diffs file_diffs = {} - for file_diff in diff: + for i, file_diff in enumerate(diff): file_path = file_diff.get('new_path', '') old_path = file_diff.get('old_path', '') diff_content = file_diff.get('diff', '') + logger.info(f"Processing file {i+1}/{len(diff)}: {file_path}") + logger.debug(f"Old path: {old_path}, New path: {file_path}") + # Skip if no diff content if not diff_content: + logger.warning(f"No diff content for file {file_path}, skipping") continue # Filter by file extensions _, ext = os.path.splitext(file_path) + logger.debug(f"File extension: {ext}") + if include_extensions and ext not in include_extensions: + logger.info(f"Skipping file {file_path} - extension {ext} not in include list") continue if exclude_extensions and ext in exclude_extensions: + logger.info(f"Skipping file {file_path} - extension {ext} in exclude list") continue # Determine file status @@ -218,13 +280,17 @@ def get_remote_commit_diff( else: status = 'M' # Modified + logger.debug(f"File status: {status}") + # Format diff content formatted_diff = f"diff --git a/{old_path} b/{file_path}\n{diff_content}" # Count additions and deletions additions = diff_content.count('\n+') deletions = diff_content.count('\n-') + logger.debug(f"Additions: {additions}, Deletions: {deletions}") + logger.info(f"Adding file {file_path} to diff (status: {status}, additions: {additions}, deletions: {deletions})") file_diffs[file_path] = { "diff": formatted_diff, "status": status, @@ -232,19 +298,289 @@ def get_remote_commit_diff( "deletions": deletions, } + logger.info(f"Processed {len(file_diffs)} files after filtering") return file_diffs except Exception as e: error_msg = f"Failed to retrieve GitLab commit: {str(e)}" + logger.error(error_msg, exc_info=True) print(error_msg) return {} else: error_msg = f"Unsupported platform: {platform}. Use 'github' or 'gitlab'." + logger.error(error_msg) print(error_msg) return {} +def get_all_remote_commits( + platform: str, + repository_name: str, + start_date: str, + end_date: str, + include_extensions: Optional[List[str]] = None, + exclude_extensions: Optional[List[str]] = None, + gitlab_url: Optional[str] = None, +) -> Dict[str, Tuple[List[Any], Dict[str, Dict[str, str]], Dict[str, int]]]: + """ + Get all commits from remote repositories (GitHub or GitLab) grouped by author. + + Args: + platform (str): Platform to use (github or gitlab) + repository_name (str): Repository name (e.g. owner/repo) + start_date (str): Start date (YYYY-MM-DD) + end_date (str): End date (YYYY-MM-DD) + include_extensions (Optional[List[str]], optional): File extensions to include. Defaults to None. + exclude_extensions (Optional[List[str]], optional): File extensions to exclude. Defaults to None. + gitlab_url (Optional[str], optional): GitLab URL. Defaults to None. + + Returns: + Dict[str, Tuple[List[Any], Dict[str, Dict[str, str]], Dict[str, int]]]: Dictionary mapping author names to their commits, file diffs, and code stats + """ + if platform.lower() == "github": + # Initialize GitHub client + github_token = os.environ.get("GITHUB_TOKEN", "") + if not github_token: + error_msg = "GITHUB_TOKEN environment variable is not set" + logger.error(error_msg) + print(error_msg) + return {} + + github_client = Github(github_token) + print(f"Analyzing GitHub repository {repository_name} for all commits") + logger.info(f"Initialized GitHub client for repository {repository_name}") + + try: + # Get repository + repo = github_client.get_repo(repository_name) + + # Convert dates to datetime objects + start_datetime = datetime.strptime(start_date, "%Y-%m-%d") + end_datetime = datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1) # Include the end date + + # Get all commits in the repository within the date range + all_commits = repo.get_commits(since=start_datetime, until=end_datetime) + + # Group commits by author + author_commits = {} + + for commit in all_commits: + author_name = commit.commit.author.name + author_email = commit.commit.author.email + + # Use email as part of the key to distinguish between authors with the same name + author_key = f"{author_name} <{author_email}>" if author_email else author_name + + if author_key not in author_commits: + author_commits[author_key] = { + "commits": [], + "file_diffs": {}, + "stats": { + "total_added_lines": 0, + "total_deleted_lines": 0, + "total_effective_lines": 0, + "total_files": set() + } + } + + # Create CommitInfo object + commit_info = CommitInfo( + hash=commit.sha, + author=author_name, + date=commit.commit.author.date, + message=commit.commit.message, + files=[file.filename for file in commit.files], + diff="\n".join([f"diff --git a/{file.filename} b/{file.filename}\n{file.patch}" for file in commit.files if file.patch]), + added_lines=sum(file.additions for file in commit.files), + deleted_lines=sum(file.deletions for file in commit.files), + effective_lines=sum(file.additions - file.deletions for file in commit.files) + ) + + author_commits[author_key]["commits"].append(commit_info) + + # Extract file diffs + file_diffs = {} + for file in commit.files: + if file.patch: + # Filter by file extensions + _, ext = os.path.splitext(file.filename) + if include_extensions and ext not in include_extensions: + continue + if exclude_extensions and ext in exclude_extensions: + continue + + file_diffs[file.filename] = file.patch + author_commits[author_key]["stats"]["total_files"].add(file.filename) + + author_commits[author_key]["file_diffs"][commit.sha] = file_diffs + + # Update stats + author_commits[author_key]["stats"]["total_added_lines"] += commit_info.added_lines + author_commits[author_key]["stats"]["total_deleted_lines"] += commit_info.deleted_lines + author_commits[author_key]["stats"]["total_effective_lines"] += commit_info.effective_lines + + # Convert the set of files to count + for author_key in author_commits: + author_commits[author_key]["stats"]["total_files"] = len(author_commits[author_key]["stats"]["total_files"]) + + # Convert to the expected return format + result = {} + for author_key, data in author_commits.items(): + result[author_key] = (data["commits"], data["file_diffs"], data["stats"]) + + return result + + except Exception as e: + error_msg = f"Failed to retrieve GitHub commits: {str(e)}" + logger.error(error_msg, exc_info=True) + print(error_msg) + return {} + + elif platform.lower() == "gitlab": + # Initialize GitLab client + gitlab_token = os.environ.get("GITLAB_TOKEN", "") + if not gitlab_token: + error_msg = "GITLAB_TOKEN environment variable is not set" + logger.error(error_msg) + print(error_msg) + return {} + + # Use provided GitLab URL or fall back to environment variable or default + gitlab_url = gitlab_url or os.environ.get("GITLAB_URL", "https://gitlab.com") + logger.info(f"Using GitLab URL: {gitlab_url}") + + gitlab_client = Gitlab(url=gitlab_url, private_token=gitlab_token) + print(f"Analyzing GitLab repository {repository_name} for all commits") + logger.info(f"Initialized GitLab client for repository {repository_name}") + + try: + # Get repository + project = gitlab_client.projects.get(repository_name) + logger.info(f"Project found: {project.name}, ID: {project.id}") + + # Convert dates to ISO format + start_iso = f"{start_date}T00:00:00Z" + end_iso = f"{end_date}T23:59:59Z" + + # Get all commits in the repository within the date range + all_commits = project.commits.list(all=True, since=start_iso, until=end_iso) + logger.info(f"Found {len(all_commits)} commits in the date range") + + # Group commits by author + author_commits = {} + + for commit in all_commits: + author_name = commit.author_name + author_email = commit.author_email + + # Use email as part of the key to distinguish between authors with the same name + author_key = f"{author_name} <{author_email}>" if author_email else author_name + + if author_key not in author_commits: + author_commits[author_key] = { + "commits": [], + "file_diffs": {}, + "stats": { + "total_added_lines": 0, + "total_deleted_lines": 0, + "total_effective_lines": 0, + "total_files": set() + } + } + + # Get commit details + commit_detail = project.commits.get(commit.id) + + # Get commit diff + diff = commit_detail.diff() + + # Filter files by extension + filtered_diff = [] + for file_diff in diff: + file_path = file_diff.get('new_path', '') + _, ext = os.path.splitext(file_path) + + if include_extensions and ext not in include_extensions: + continue + if exclude_extensions and ext in exclude_extensions: + continue + + filtered_diff.append(file_diff) + + # Skip if no files match the filter + if not filtered_diff: + continue + + # Get file content for each modified file + file_diffs = {} + for file_diff in filtered_diff: + file_path = file_diff.get('new_path', '') + old_path = file_diff.get('old_path', '') + diff_content = file_diff.get('diff', '') + + # Skip if no diff content + if not diff_content: + continue + + # Format diff content + formatted_diff = f"diff --git a/{old_path} b/{file_path}\n{diff_content}" + file_diffs[file_path] = formatted_diff + author_commits[author_key]["stats"]["total_files"].add(file_path) + + # Skip if no valid diffs + if not file_diffs: + continue + + # Count additions and deletions + added_lines = sum(diff_content.count('\n+') for diff_content in file_diffs.values()) + deleted_lines = sum(diff_content.count('\n-') for diff_content in file_diffs.values()) + effective_lines = added_lines - deleted_lines + + # Create CommitInfo object + commit_info = CommitInfo( + hash=commit.id, + author=author_name, + date=datetime.strptime(commit.created_at, "%Y-%m-%dT%H:%M:%S.%f%z") if '.' in commit.created_at else datetime.strptime(commit.created_at, "%Y-%m-%dT%H:%M:%SZ"), + message=commit.message, + files=list(file_diffs.keys()), + diff="\n\n".join(file_diffs.values()), + added_lines=added_lines, + deleted_lines=deleted_lines, + effective_lines=effective_lines + ) + + author_commits[author_key]["commits"].append(commit_info) + author_commits[author_key]["file_diffs"][commit.id] = file_diffs + + # Update stats + author_commits[author_key]["stats"]["total_added_lines"] += added_lines + author_commits[author_key]["stats"]["total_deleted_lines"] += deleted_lines + author_commits[author_key]["stats"]["total_effective_lines"] += effective_lines + + # Convert the set of files to count + for author_key in author_commits: + author_commits[author_key]["stats"]["total_files"] = len(author_commits[author_key]["stats"]["total_files"]) + + # Convert to the expected return format + result = {} + for author_key, data in author_commits.items(): + result[author_key] = (data["commits"], data["file_diffs"], data["stats"]) + + return result + + except Exception as e: + error_msg = f"Failed to retrieve GitLab commits: {str(e)}" + logger.error(error_msg, exc_info=True) + print(error_msg) + return {} + + else: + error_msg = f"Unsupported platform: {platform}. Use 'github' or 'gitlab'." + logger.error(error_msg) + print(error_msg) + return {} + def get_remote_commits( platform: str, repository_name: str, @@ -609,6 +945,215 @@ def get_remote_commits( return [], {}, {} +async def evaluate_repository_code( + repo_path: str, + start_date: str, + end_date: str, + include_extensions: Optional[List[str]] = None, + exclude_extensions: Optional[List[str]] = None, + model_name: str = "gpt-3.5", + output_dir: Optional[str] = None, + email_addresses: Optional[List[str]] = None, + platform: str = "local", + gitlab_url: Optional[str] = None, +): + """Evaluate all commits in a repository within a time period for all committers. + + Args: + repo_path: Repository path or name (e.g. owner/repo for remote repositories) + start_date: Start date (YYYY-MM-DD) + end_date: End date (YYYY-MM-DD) + include_extensions: List of file extensions to include + exclude_extensions: List of file extensions to exclude + model_name: Name of the model to use for evaluation + output_dir: Directory to save reports + email_addresses: List of email addresses to send the report to + platform: Platform to use (github, gitlab, or local) + gitlab_url: GitLab URL (for GitLab platform only) + + Returns: + Dict[str, str]: Dictionary mapping author names to their report paths + """ + # Generate default output directory if not provided + if not output_dir: + date_slug = datetime.now().strftime("%Y%m%d") + output_dir = f"codedog_repo_eval_{date_slug}" + + # Create output directory if it doesn't exist + os.makedirs(output_dir, exist_ok=True) + + # Get model + model = load_model_by_name(model_name) + + print(f"Evaluating repository {repo_path} commits from {start_date} to {end_date}...") + + # Get all commits grouped by author + if platform.lower() == "local": + # For local repositories, we need to get all authors first + # This is a simplified implementation - in a real scenario, you'd need to implement + # a function to get all authors from a local git repository + print("Local repository evaluation not implemented yet. Please use github or gitlab platform.") + return {} + else: + # Use remote repository (GitHub or GitLab) + author_commits = get_all_remote_commits( + platform, + repo_path, + start_date, + end_date, + include_extensions, + exclude_extensions, + gitlab_url + ) + + if not author_commits: + print(f"No commits found in repository {repo_path} for the specified time period") + return {} + + print(f"Found commits from {len(author_commits)} authors in the repository") + + # Initialize evaluator + evaluator = DiffEvaluator(model) + + # Dictionary to store report paths for each author + author_reports = {} + + # Summary of all authors + summary_report = f"# Repository Evaluation Summary\n\n" + summary_report += f"## Repository: {repo_path}\n" + summary_report += f"## Period: {start_date} to {end_date}\n\n" + summary_report += f"## Authors\n\n" + + # Process each author's commits + for author, (commits, commit_file_diffs, code_stats) in author_commits.items(): + if not commits: + continue + + print(f"\nEvaluating {len(commits)} commits by {author}...") + + # Generate output file name for this author + author_slug = author.replace("@", "_at_").replace(" ", "_").replace("/", "_").replace("<", "").replace(">", "") + output_file = os.path.join(output_dir, f"codedog_eval_{author_slug}.md") + + # Timing and statistics + start_time = time.time() + + try: + with get_openai_callback() as cb: + # Perform evaluation + print(f"Evaluating code commits for {author}...") + evaluation_results = await evaluator.evaluate_commits(commits, commit_file_diffs) + + # Generate Markdown report + report = generate_evaluation_markdown(evaluation_results) + + # Calculate cost and tokens + total_cost = cb.total_cost + total_tokens = cb.total_tokens + + # Add evaluation statistics + elapsed_time = time.time() - start_time + telemetry_info = ( + f"\n## Evaluation Statistics\n\n" + f"- **Evaluation Model**: {model_name}\n" + f"- **Evaluation Time**: {elapsed_time:.2f} seconds\n" + f"- **Tokens Used**: {total_tokens}\n" + f"- **Cost**: ${total_cost:.4f}\n" + f"\n## Code Statistics\n\n" + f"- **Total Files Modified**: {code_stats.get('total_files', 0)}\n" + f"- **Lines Added**: {code_stats.get('total_added_lines', 0)}\n" + f"- **Lines Deleted**: {code_stats.get('total_deleted_lines', 0)}\n" + f"- **Effective Lines**: {code_stats.get('total_effective_lines', 0)}\n" + ) + + report += telemetry_info + + # Save report immediately after evaluation is complete + with open(output_file, "w", encoding="utf-8") as f: + f.write(report) + + # Add to author reports dictionary + author_reports[author] = output_file + + # Print completion message with clear indication that the file is ready + print(f"\n✅ Evaluation for {author} completed and saved to {output_file}") + print(f" - Files: {code_stats.get('total_files', 0)}") + print(f" - Lines: +{code_stats.get('total_added_lines', 0)}/-{code_stats.get('total_deleted_lines', 0)}") + print(f" - Time: {elapsed_time:.2f} seconds") + print(f" - Cost: ${total_cost:.4f}") + + # Add to summary report + summary_report += f"### {author}\n\n" + summary_report += f"- **Commits**: {len(commits)}\n" + summary_report += f"- **Files Modified**: {code_stats.get('total_files', 0)}\n" + summary_report += f"- **Lines Added**: {code_stats.get('total_added_lines', 0)}\n" + summary_report += f"- **Lines Deleted**: {code_stats.get('total_deleted_lines', 0)}\n" + summary_report += f"- **Effective Lines**: {code_stats.get('total_effective_lines', 0)}\n" + summary_report += f"- **Report**: [{os.path.basename(output_file)}]({os.path.basename(output_file)})\n\n" + + # Update the summary file after each committer is evaluated + summary_file = os.path.join(output_dir, "summary.md") + with open(summary_file, "w", encoding="utf-8") as f: + f.write(summary_report) + logger.info(f"Updated summary report with {author}'s evaluation") + + except Exception as e: + # Log the error but continue with other authors + error_msg = f"Error evaluating {author}: {str(e)}" + logger.error(error_msg, exc_info=True) + print(f"\n❌ Error evaluating {author}: {str(e)}") + + # Create an error report for this author + error_report = f"# Evaluation Error for {author}\n\n" + error_report += f"## Error Details\n\n" + error_report += f"```\n{str(e)}\n```\n\n" + error_report += f"## Commit Statistics\n\n" + error_report += f"- **Commits**: {len(commits)}\n" + error_report += f"- **Files Modified**: {code_stats.get('total_files', 0)}\n" + error_report += f"- **Lines Added**: {code_stats.get('total_added_lines', 0)}\n" + error_report += f"- **Lines Deleted**: {code_stats.get('total_deleted_lines', 0)}\n" + + # Save the error report + with open(output_file, "w", encoding="utf-8") as f: + f.write(error_report) + + # Add to author reports dictionary + author_reports[author] = output_file + + # Add error entry to summary report + summary_report += f"### {author} ❌\n\n" + summary_report += f"- **Status**: Error during evaluation\n" + summary_report += f"- **Commits**: {len(commits)}\n" + summary_report += f"- **Files Modified**: {code_stats.get('total_files', 0)}\n" + summary_report += f"- **Report**: [{os.path.basename(output_file)}]({os.path.basename(output_file)})\n\n" + + # Update the summary file after each committer is evaluated (even if there's an error) + summary_file = os.path.join(output_dir, "summary.md") + with open(summary_file, "w", encoding="utf-8") as f: + f.write(summary_report) + logger.info(f"Updated summary report with error for {author}") + + # Final summary report is already saved incrementally, just print a message + summary_file = os.path.join(output_dir, "summary.md") + print(f"\nFinal summary report saved to {summary_file}") + + # Send email report if addresses provided + if email_addresses: + subject = f"[CodeDog] Repository Evaluation Report for {repo_path} ({start_date} to {end_date})" + + sent = send_report_email( + to_emails=email_addresses, + subject=subject, + markdown_content=summary_report, + ) + + if sent: + print(f"Summary report sent to {', '.join(email_addresses)}") + else: + print("Failed to send email notification") + + return author_reports + async def evaluate_developer_code( author: str, start_date: str, @@ -880,13 +1425,22 @@ async def review_commit( platform: Platform to use (github, gitlab, or local) gitlab_url: GitLab URL (for GitLab platform only) """ + logger.info(f"Starting commit review for {commit_hash}") + logger.info(f"Parameters: repo_path={repo_path}, platform={platform}, model={model_name}") + logger.info(f"Include extensions: {include_extensions}, Exclude extensions: {exclude_extensions}") + # Generate default output file name if not provided if not output_file: date_slug = datetime.now().strftime("%Y%m%d") output_file = f"codedog_commit_{commit_hash[:8]}_{date_slug}.md" + logger.info(f"Generated output file name: {output_file}") + else: + logger.info(f"Using provided output file: {output_file}") # Get model + logger.info(f"Loading model: {model_name}") model = load_model_by_name(model_name) + logger.info(f"Model loaded: {model.__class__.__name__}") print(f"Reviewing commit {commit_hash}...") @@ -895,17 +1449,26 @@ async def review_commit( if platform.lower() == "local": # Use local git repository + logger.info(f"Using local git repository: {repo_path or 'current directory'}") try: + logger.info(f"Getting commit diff for {commit_hash}") commit_diff = get_commit_diff(commit_hash, repo_path, include_extensions, exclude_extensions) + logger.info(f"Successfully retrieved commit diff with {len(commit_diff)} files") except Exception as e: - print(f"Error getting commit diff: {str(e)}") + error_msg = f"Error getting commit diff: {str(e)}" + logger.error(error_msg, exc_info=True) + print(error_msg) return elif platform.lower() in ["github", "gitlab"]: # Use remote repository + logger.info(f"Using remote {platform} repository: {repo_path}") if not repo_path or "/" not in repo_path: - print(f"Error: Repository name must be in the format 'owner/repo' for {platform} platform") + error_msg = f"Error: Repository name must be in the format 'owner/repo' for {platform} platform" + logger.error(error_msg) + print(error_msg) return + logger.info(f"Getting remote commit diff for {commit_hash} from {platform}") commit_diff = get_remote_commit_diff( platform=platform, repository_name=repo_path, @@ -914,36 +1477,156 @@ async def review_commit( exclude_extensions=exclude_extensions, gitlab_url=gitlab_url, ) + logger.info(f"Retrieved remote commit diff with {len(commit_diff)} files") else: - print(f"Error: Unsupported platform '{platform}'. Use 'local', 'github', or 'gitlab'.") + error_msg = f"Error: Unsupported platform '{platform}'. Use 'local', 'github', or 'gitlab'." + logger.error(error_msg) + print(error_msg) return if not commit_diff: + logger.warning(f"No changes found in commit {commit_hash}") print(f"No changes found in commit {commit_hash}") return + # Log detailed information about the files + logger.info(f"Found {len(commit_diff)} modified files:") + for file_path, diff_info in commit_diff.items(): + logger.info(f" - {file_path} (status: {diff_info.get('status', 'unknown')}, " + + f"additions: {diff_info.get('additions', 0)}, " + + f"deletions: {diff_info.get('deletions', 0)})") + # Log the size of the diff content + diff_content = diff_info.get('diff', '') + logger.debug(f" Diff content size: {len(diff_content)} characters, " + + f"~{len(diff_content.split())} words") + print(f"Found {len(commit_diff)} modified files") # Initialize evaluator + logger.info("Initializing DiffEvaluator") evaluator = DiffEvaluator(model) + logger.info(f"DiffEvaluator initialized with model: {model.__class__.__name__}") # Timing and statistics start_time = time.time() + logger.info(f"Starting evaluation at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") with get_openai_callback() as cb: # Perform review print("Reviewing code changes...") + logger.info(f"Starting commit evaluation for {commit_hash}") review_results = await evaluator.evaluate_commit(commit_hash, commit_diff) + logger.info(f"Commit evaluation completed, got results for {len(review_results.get('files', []))} files") + + # Log evaluation results summary + logger.info(f"Statistics: {review_results.get('statistics', {})}") # Generate Markdown report - report = generate_evaluation_markdown(review_results) + logger.info("Generating Markdown report") + report = f"# Commit Review Report\n\n" + report += f"## Commit: {commit_hash}\n\n" + report += f"## Summary\n\n{review_results.get('summary', 'No summary available.')}\n\n" + report += f"## Statistics\n\n" + + # Get statistics + total_files = review_results.get('statistics', {}).get('total_files', 0) + total_additions = review_results.get('statistics', {}).get('total_additions', 0) + total_deletions = review_results.get('statistics', {}).get('total_deletions', 0) + + report += f"- Total files: {total_files}\n" + report += f"- Total additions: {total_additions}\n" + report += f"- Total deletions: {total_deletions}\n\n" + report += f"## Files\n\n" + + # Log detailed file statistics + logger.info(f"Report statistics: {total_files} files, {total_additions} additions, {total_deletions} deletions") + + for file in review_results.get('files', []): + file_path = file.get('path', 'Unknown file') + file_status = file.get('status', 'Unknown') + file_additions = file.get('additions', 0) + file_deletions = file.get('deletions', 0) + overall_score = file.get('overall_score', 'N/A') + + logger.info(f"File report: {file_path} (status: {file_status}, score: {overall_score})") + + report += f"### {file_path}\n\n" + report += f"- Status: {file_status}\n" + report += f"- Additions: {file_additions}\n" + report += f"- Deletions: {file_deletions}\n" + report += f"- Overall Score: {overall_score}\n\n" + report += f"#### Scores\n\n" + + # Get all scores + readability = file.get('readability', 'N/A') + efficiency = file.get('efficiency', 'N/A') + security = file.get('security', 'N/A') + structure = file.get('structure', 'N/A') + error_handling = file.get('error_handling', 'N/A') + documentation = file.get('documentation', 'N/A') + code_style = file.get('code_style', 'N/A') + + # Log detailed scores + logger.debug(f"File scores: {file_path} - " + + f"readability: {readability}, " + + f"efficiency: {efficiency}, " + + f"security: {security}, " + + f"structure: {structure}, " + + f"error_handling: {error_handling}, " + + f"documentation: {documentation}, " + + f"code_style: {code_style}") + + report += f"- Readability: {readability}\n" + report += f"- Efficiency: {efficiency}\n" + report += f"- Security: {security}\n" + report += f"- Structure: {structure}\n" + report += f"- Error Handling: {error_handling}\n" + report += f"- Documentation: {documentation}\n" + report += f"- Code Style: {code_style}\n\n" + + comments = file.get('comments', 'No comments.') + report += f"#### Comments\n\n{comments}\n\n" + report += f"---\n\n" # Calculate cost and tokens total_cost = cb.total_cost total_tokens = cb.total_tokens + logger.info(f"API usage: {total_tokens} tokens, ${total_cost:.4f}") # Add review statistics elapsed_time = time.time() - start_time + logger.info(f"Review completed in {elapsed_time:.2f} seconds") + + # Add whole commit evaluation section if available + if "whole_commit_evaluation" in review_results: + whole_eval = review_results["whole_commit_evaluation"] + whole_eval_section = f"\n## Whole Commit Evaluation\n\n" + whole_eval_section += f"### Scores\n\n" + whole_eval_section += f"| Dimension | Score |\n" + whole_eval_section += f"|-----------|-------|\n" + whole_eval_section += f"| Readability | {whole_eval.get('readability', 'N/A')}/10 |\n" + whole_eval_section += f"| Efficiency | {whole_eval.get('efficiency', 'N/A')}/10 |\n" + whole_eval_section += f"| Security | {whole_eval.get('security', 'N/A')}/10 |\n" + whole_eval_section += f"| Structure | {whole_eval.get('structure', 'N/A')}/10 |\n" + whole_eval_section += f"| Error Handling | {whole_eval.get('error_handling', 'N/A')}/10 |\n" + whole_eval_section += f"| Documentation | {whole_eval.get('documentation', 'N/A')}/10 |\n" + whole_eval_section += f"| Code Style | {whole_eval.get('code_style', 'N/A')}/10 |\n" + whole_eval_section += f"| **Overall Score** | **{whole_eval.get('overall_score', 'N/A')}/10** |\n\n" + + # Add analysis from whole commit evaluation + whole_eval_section += f"### Analysis\n\n{whole_eval.get('comments', 'No comments available.')}\n\n" + + # Insert the whole commit evaluation section after the summary + report = report.replace("## Files\n\n", whole_eval_section + "## Files\n\n") + + # Add estimated working hours if available + estimated_hours_info = "" + if "estimated_hours" in review_results: + estimated_hours_info = ( + f"- **Estimated Working Hours**: {review_results['estimated_hours']} hours " + f"(for an experienced programmer with 5-10+ years of experience)\n" + ) + telemetry_info = ( f"\n## Review Statistics\n\n" f"- **Review Model**: {model_name}\n" @@ -954,36 +1637,68 @@ async def review_commit( f"- **Total Files Modified**: {len(commit_diff)}\n" f"- **Lines Added**: {sum(diff.get('additions', 0) for diff in commit_diff.values())}\n" f"- **Lines Deleted**: {sum(diff.get('deletions', 0) for diff in commit_diff.values())}\n" + f"{estimated_hours_info}" ) report += telemetry_info # Save report - with open(output_file, "w", encoding="utf-8") as f: - f.write(report) - print(f"Report saved to {output_file}") + logger.info(f"Saving report to {output_file}") + try: + with open(output_file, "w", encoding="utf-8") as f: + f.write(report) + logger.info(f"Report successfully saved to {output_file}") + print(f"Report saved to {output_file}") + except Exception as e: + error_msg = f"Error saving report to {output_file}: {str(e)}" + logger.error(error_msg, exc_info=True) + print(error_msg) # Send email report if addresses provided if email_addresses: + logger.info(f"Sending report to {email_addresses}") + print(f"Sending report to {', '.join(email_addresses)}...") subject = f"[CodeDog] Code Review for Commit {commit_hash[:8]}" - sent = send_report_email( - to_emails=email_addresses, - subject=subject, - markdown_content=report, - ) + try: + sent = send_report_email( + to_emails=email_addresses, + subject=subject, + markdown_content=report, + ) - if sent: - print(f"Report sent to {', '.join(email_addresses)}") - else: - print("Failed to send email notification") + if sent: + logger.info(f"Report successfully sent to {email_addresses}") + print(f"Report sent to {', '.join(email_addresses)}") + else: + logger.error("Failed to send email notification") + print("Failed to send email notification") + except Exception as e: + error_msg = f"Error sending email: {str(e)}" + logger.error(error_msg, exc_info=True) + print(error_msg) + logger.info("Commit review completed successfully") return report def main(): """Main function to parse arguments and run the appropriate command.""" + # Configure logging + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler("codedog.log"), + logging.StreamHandler() + ] + ) + logger.info("Starting CodeDog") + + # Parse arguments args = parse_args() + logger.info(f"Command: {args.command}") + logger.debug(f"Arguments: {args}") if args.command == "pr": # Review a GitHub or GitLab pull request @@ -1066,42 +1781,141 @@ def main(): print("\n===================== Report End =====================\n") elif args.command == "commit": + logger.info(f"Running commit review for {args.commit_hash}") + # Process file extension parameters include_extensions = None if args.include: include_extensions = parse_extensions(args.include) + logger.info(f"Using provided include extensions: {include_extensions}") elif os.environ.get("DEV_EVAL_DEFAULT_INCLUDE"): include_extensions = parse_extensions(os.environ.get("DEV_EVAL_DEFAULT_INCLUDE")) + logger.info(f"Using default include extensions from environment: {include_extensions}") exclude_extensions = None if args.exclude: exclude_extensions = parse_extensions(args.exclude) + logger.info(f"Using provided exclude extensions: {exclude_extensions}") elif os.environ.get("DEV_EVAL_DEFAULT_EXCLUDE"): exclude_extensions = parse_extensions(os.environ.get("DEV_EVAL_DEFAULT_EXCLUDE")) + logger.info(f"Using default exclude extensions from environment: {exclude_extensions}") # Get model model_name = args.model or os.environ.get("CODE_REVIEW_MODEL", "gpt-3.5") + logger.info(f"Using model: {model_name}") # Get email addresses email_addresses = parse_emails(args.email or os.environ.get("NOTIFICATION_EMAILS", "")) + if email_addresses: + logger.info(f"Will send report to: {email_addresses}") + + # Log platform information + if args.platform != "local": + logger.info(f"Using {args.platform} platform with repository: {args.repo}") + if args.platform == "gitlab" and args.gitlab_url: + logger.info(f"Using GitLab URL: {args.gitlab_url}") + else: + logger.info(f"Using local repository: {args.repo or 'current directory'}") # Run commit review - report = asyncio.run(review_commit( - commit_hash=args.commit_hash, - repo_path=args.repo, - include_extensions=include_extensions, - exclude_extensions=exclude_extensions, - model_name=model_name, - output_file=args.output, - email_addresses=email_addresses, - platform=args.platform, - gitlab_url=args.gitlab_url, - )) + logger.info("Starting commit review process") + try: + report = asyncio.run(review_commit( + commit_hash=args.commit_hash, + repo_path=args.repo, + include_extensions=include_extensions, + exclude_extensions=exclude_extensions, + model_name=model_name, + output_file=args.output, + email_addresses=email_addresses, + platform=args.platform, + gitlab_url=args.gitlab_url, + )) + + logger.info("Commit review completed successfully") + + if report: + logger.info("Report generated successfully") + print("\n===================== Commit Review Report =====================\n") + print("Report generated successfully. See output file for details.") + print("\n===================== Report End =====================\n") + except Exception as e: + logger.error(f"Error during commit review: {str(e)}", exc_info=True) + print(f"Error during commit review: {str(e)}") - if report: - print("\n===================== Commit Review Report =====================\n") - print("Report generated successfully. See output file for details.") - print("\n===================== Report End =====================\n") + elif args.command == "repo-eval": + logger.info(f"Running repository evaluation for {args.repo}") + + # Set default dates if not provided + if not args.start_date: + args.start_date = (datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d") + logger.info(f"Using default start date: {args.start_date}") + + if not args.end_date: + args.end_date = datetime.now().strftime("%Y-%m-%d") + logger.info(f"Using default end date: {args.end_date}") + + # Process file extension parameters + include_extensions = None + if args.include: + include_extensions = parse_extensions(args.include) + logger.info(f"Using provided include extensions: {include_extensions}") + elif os.environ.get("DEV_EVAL_DEFAULT_INCLUDE"): + include_extensions = parse_extensions(os.environ.get("DEV_EVAL_DEFAULT_INCLUDE")) + logger.info(f"Using default include extensions from environment: {include_extensions}") + + exclude_extensions = None + if args.exclude: + exclude_extensions = parse_extensions(args.exclude) + logger.info(f"Using provided exclude extensions: {exclude_extensions}") + elif os.environ.get("DEV_EVAL_DEFAULT_EXCLUDE"): + exclude_extensions = parse_extensions(os.environ.get("DEV_EVAL_DEFAULT_EXCLUDE")) + logger.info(f"Using default exclude extensions from environment: {exclude_extensions}") + + # Get model + model_name = args.model or os.environ.get("CODE_REVIEW_MODEL", "gpt-3.5") + logger.info(f"Using model: {model_name}") + + # Get email addresses + email_addresses = parse_emails(args.email or os.environ.get("NOTIFICATION_EMAILS", "")) + if email_addresses: + logger.info(f"Will send report to: {email_addresses}") + + # Log platform information + if args.platform != "local": + logger.info(f"Using {args.platform} platform with repository: {args.repo}") + if args.platform == "gitlab" and args.gitlab_url: + logger.info(f"Using GitLab URL: {args.gitlab_url}") + else: + logger.info(f"Using local repository: {args.repo}") + + # Run repository evaluation + logger.info("Starting repository evaluation process") + try: + author_reports = asyncio.run(evaluate_repository_code( + repo_path=args.repo, + start_date=args.start_date, + end_date=args.end_date, + include_extensions=include_extensions, + exclude_extensions=exclude_extensions, + model_name=model_name, + output_dir=args.output_dir, + email_addresses=email_addresses, + platform=args.platform, + gitlab_url=args.gitlab_url, + )) + + logger.info("Repository evaluation completed successfully") + + if author_reports: + logger.info(f"Generated reports for {len(author_reports)} authors") + print("\n===================== Repository Evaluation Report =====================\n") + print(f"Reports generated successfully for {len(author_reports)} authors.") + print("See output directory for details.") + print("\n===================== Report End =====================\n") + except Exception as e: + logger.error(f"Error during repository evaluation: {str(e)}", exc_info=True) + print(f"Error during repository evaluation: {str(e)}") else: # No command specified, show usage @@ -1113,12 +1927,23 @@ def main(): print("Example: python run_codedog.py commit abc123def # Review local commit") print("Example: python run_codedog.py commit abc123def --repo owner/repo --platform github # Review GitHub commit") print("Example: python run_codedog.py commit abc123def --repo owner/repo --platform gitlab # Review GitLab commit") + print("Example: python run_codedog.py repo-eval owner/repo --start-date 2023-01-01 --end-date 2023-01-31 --platform github # Evaluate all commits in a GitHub repo") + print("Example: python run_codedog.py repo-eval owner/repo --start-date 2023-01-01 --end-date 2023-01-31 --platform gitlab # Evaluate all commits in a GitLab repo") if __name__ == "__main__": try: main() except Exception as e: - print(f"Error: {str(e)}") + error_msg = f"Error: {str(e)}" + print(error_msg) print("\nDetailed error information:") - traceback.print_exc() \ No newline at end of file + traceback.print_exc() + + # Log the error if logging is configured + try: + logger.error(error_msg, exc_info=True) + logger.error("Program terminated with error") + except: + # If logging is not yet configured, just print + pass \ No newline at end of file