Skip to content

Commit

Permalink
Fix: issue #40 : Bug: Branch names with "/" in the branch name are no…
Browse files Browse the repository at this point in the history
…t cloned correctly (#52)

* Enhance URL parsing to better handle branch names and commit hashes
  • Loading branch information
harshit-wadhwani authored Dec 28, 2024
1 parent eb73a0c commit 16def8a
Showing 1 changed file with 25 additions and 7 deletions.
32 changes: 25 additions & 7 deletions src/gitingest/parse_query.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import os
import uuid
from typing import Any, Dict, List, Optional, Union

from urllib.parse import unquote
from typing import Any, Dict, List, Optional, Union
from gitingest.ignore_patterns import DEFAULT_IGNORE_PATTERNS

TMP_BASE_PATH = "../tmp"
Expand All @@ -22,6 +23,8 @@ def parse_url(url: str) -> Dict[str, Any]:
}

url = url.split(" ")[0]
url = unquote(url) # Decode URL-encoded characters

if not url.startswith('https://'):
url = 'https://' + url

Expand All @@ -36,19 +39,34 @@ def parse_url(url: str) -> Dict[str, Any]:
parsed["user_name"] = path_parts[0]
parsed["repo_name"] = path_parts[1]

# Keep original URL format
# Keep original URL format but with decoded components
parsed["url"] = f"https://{domain}/{parsed['user_name']}/{parsed['repo_name']}"
parsed['slug'] = f"{parsed['user_name']}-{parsed['repo_name']}"
parsed["id"] = str(uuid.uuid4())
parsed["local_path"] = f"{TMP_BASE_PATH}/{parsed['id']}/{parsed['slug']}"

if len(path_parts) > 3:
parsed["type"] = path_parts[2]
parsed["branch"] = path_parts[3]
if len(parsed['branch']) == 40 and all(c in '0123456789abcdefABCDEF' for c in parsed['branch']):
parsed["commit"] = parsed['branch']

parsed["subpath"] = "/" + "/".join(path_parts[4:])
parsed["type"] = path_parts[2] # Usually 'tree' or 'blob'

# Find the commit hash or reconstruct the branch name
remaining_parts = path_parts[3:]
if remaining_parts[0] and len(remaining_parts[0]) == 40 and all(c in '0123456789abcdefABCDEF' for c in remaining_parts[0]):
parsed["commit"] = remaining_parts[0]
parsed["subpath"] = "/" + "/".join(remaining_parts[1:]) if len(remaining_parts) > 1 else "/"
else:
# Handle branch names with slashes and special characters
for i, part in enumerate(remaining_parts):
if part in ('tree', 'blob'):
# Found another type indicator, everything before this was the branch name
parsed["branch"] = "/".join(remaining_parts[:i])
parsed["subpath"] = "/" + "/".join(remaining_parts[i+2:]) if len(remaining_parts) > i+2 else "/"
break
else:
# No additional type indicator found, assume everything is part of the branch name
parsed["branch"] = "/".join(remaining_parts)
parsed["subpath"] = "/"


return parsed

Expand Down

0 comments on commit 16def8a

Please sign in to comment.