Skip to content

Commit d1d06e8

Browse files
authored
Merge pull request #38 from data-exp-lab/fe-gexf
Optimize Gexf cretaion, Edge creation
2 parents daf1bc6 + 5319c1e commit d1d06e8

File tree

13 files changed

+1073
-1133
lines changed

13 files changed

+1073
-1133
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,5 @@ dist
1212
*repo_metadata.json
1313
__pycache__
1414
*.duckdb
15-
*.gexf
15+
*.gexf
16+
*.gexf.gz

backend/app/main.py

Lines changed: 137 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,12 @@ def suggest_topics():
193193
# 2. Prioritizes exact matches and high-frequency topics
194194
# 3. Uses word boundary matching for better relevance
195195
sql_query = """
196-
WITH ranked_topics AS (
196+
WITH split_topics AS (
197+
SELECT
198+
unnest(string_split(topics, '|')) as topic
199+
FROM repo_topics
200+
),
201+
ranked_topics AS (
197202
SELECT
198203
topic,
199204
COUNT(*) as count,
@@ -202,7 +207,7 @@ def suggest_topics():
202207
WHEN LOWER(topic) LIKE ? THEN 2 -- Starts with query gets second priority
203208
ELSE 1 -- Contains query gets lowest priority
204209
END as match_priority
205-
FROM repo_topics
210+
FROM split_topics
206211
WHERE LOWER(topic) LIKE ?
207212
GROUP BY topic
208213
)
@@ -246,6 +251,13 @@ def finalized_node_gexf():
246251
topics = data.get("topics", [])
247252
gexf_path = gexf_node_service.generate_gexf_nodes_for_topics(topics)
248253
# print(topics)
254+
255+
if gexf_path is None:
256+
return jsonify({
257+
"success": False,
258+
"error": "No repositories found for the given topics"
259+
}), 404
260+
249261
# Read the GEXF file content
250262
with open(gexf_path, "r", encoding="utf-8") as f:
251263
gexf_content = f.read()
@@ -463,14 +475,16 @@ def get_unique_repos():
463475
placeholders = ",".join(["?"] * len(topics_lower))
464476

465477
# Query to get unique repositories that have ANY of the given topics
466-
query = f"""
478+
# Create a single search pattern that matches any of the topics
479+
search_pattern = '%' + '%'.join(topics_lower) + '%'
480+
query = """
467481
SELECT COUNT(DISTINCT r.nameWithOwner) as count
468482
FROM repos r
469483
JOIN repo_topics t ON r.nameWithOwner = t.repo
470-
WHERE LOWER(t.topic) IN ({placeholders})
484+
WHERE LOWER(t.topics) LIKE ?
471485
"""
472486

473-
result = topic_service.con.execute(query, topics_lower).fetchone()
487+
result = topic_service.con.execute(query, [search_pattern]).fetchone()
474488
count = result[0] if result else 0
475489

476490
return jsonify({
@@ -485,6 +499,124 @@ def get_unique_repos():
485499
}), 500
486500

487501

502+
@app.route("/api/create-edges-on-graph", methods=["POST"])
503+
def create_edges_on_graph():
504+
"""
505+
Create edges on an existing graph based on specified criteria.
506+
507+
Expected request body:
508+
{
509+
"gexfContent": "existing GEXF content",
510+
"criteria_config": {
511+
"topic_based_linking": true,
512+
"topic_threshold": 2,
513+
"contributor_overlap_enabled": true,
514+
"contributor_threshold": 1,
515+
"shared_organization_enabled": false,
516+
"common_stargazers_enabled": true,
517+
"stargazer_threshold": 5,
518+
"use_and_logic": false
519+
}
520+
}
521+
"""
522+
try:
523+
data = request.get_json()
524+
gexf_content = data.get("gexfContent", "")
525+
criteria_config = data.get("criteria_config", {})
526+
527+
if not gexf_content:
528+
return jsonify({
529+
"success": False,
530+
"error": "No GEXF content provided"
531+
}), 400
532+
533+
# Parse the existing GEXF content
534+
import tempfile
535+
import networkx as nx
536+
537+
# Create a temporary file to parse the GEXF
538+
with tempfile.NamedTemporaryFile(mode='w', suffix='.gexf', delete=False) as temp_file:
539+
temp_file.write(gexf_content)
540+
temp_file_path = temp_file.name
541+
542+
try:
543+
# Read the existing graph
544+
G = nx.read_gexf(temp_file_path)
545+
finally:
546+
# Clean up temporary file
547+
import os
548+
os.unlink(temp_file_path)
549+
550+
if not G.nodes():
551+
return jsonify({
552+
"success": False,
553+
"error": "No nodes found in the provided GEXF content"
554+
}), 404
555+
556+
# Validate that at least one criterion is enabled
557+
enabled_criteria = [
558+
criteria_config.get('topic_based_linking', False),
559+
criteria_config.get('contributor_overlap_enabled', False),
560+
criteria_config.get('shared_organization_enabled', False),
561+
criteria_config.get('common_stargazers_enabled', False)
562+
]
563+
564+
if not any(enabled_criteria):
565+
return jsonify({
566+
"success": False,
567+
"error": "At least one edge creation criterion must be enabled"
568+
}), 400
569+
570+
# Create a new service instance for edge creation on existing graphs
571+
from services.edge_generation_service import EdgeGenerationService
572+
edge_service = EdgeGenerationService()
573+
574+
# Create edges based on the criteria
575+
edges_created = edge_service.create_edges_on_existing_graph(G, criteria_config)
576+
577+
# Save the updated graph
578+
import hashlib
579+
from datetime import datetime
580+
581+
# Create hash from criteria
582+
criteria_str = json.dumps(criteria_config, sort_keys=True)
583+
hash_object = hashlib.md5(criteria_str.encode())
584+
hash_hex = hash_object.hexdigest()[:12]
585+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
586+
filename = f"updated_graph_{hash_hex}_{timestamp}.gexf"
587+
588+
# Save to gexf directory
589+
gexf_dir = os.path.join(os.path.dirname(__file__), "gexf")
590+
os.makedirs(gexf_dir, exist_ok=True)
591+
gexf_path = os.path.join(gexf_dir, filename)
592+
593+
# Save updated graph
594+
edge_service.save_graph_with_edges(G, gexf_path)
595+
596+
# Read the updated GEXF file content
597+
with open(gexf_path, "r", encoding="utf-8") as f:
598+
updated_gexf_content = f.read()
599+
600+
# Get statistics
601+
graph_stats = edge_service.get_edge_statistics(G)
602+
603+
return jsonify({
604+
"success": True,
605+
"gexfContent": updated_gexf_content,
606+
"filename": filename,
607+
"edgesCreated": edges_created,
608+
"graph_statistics": graph_stats
609+
})
610+
611+
except Exception as e:
612+
print(f"Error creating edges on graph: {str(e)}")
613+
return jsonify({
614+
"success": False,
615+
"error": str(e),
616+
"message": "An error occurred while creating edges on the graph"
617+
}), 500
618+
619+
488620
@app.route("/")
489621
def home():
490622
return "Hello World!"

0 commit comments

Comments
 (0)