-
Notifications
You must be signed in to change notification settings - Fork 362
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: make parser domain-agnostic to support multiple Git hosts
- added list of known domains/Git hosts in `query_parser.py` - fixed bug from [#115](#115): corrected case handling for URL components—scheme, domain, username, and repository are case-insensitive, but paths beyond (e.g., file names, branches) are case-sensitive - implemented `try_domains_for_user_and_repo` in `query_parser.py` to iteratively guess the correct domain until success or supported hosts are exhausted - added helper functions `_get_user_and_repo_from_path`, `_validate_host`, and `_validate_scheme` in `query_parser.py` - extended `_parse_repo_source` in `query_parser.py` to be Git host agnostic by using `try_domains_for_user_and_repo` - added tests `test_parse_url_unsupported_host` and `test_parse_query_with_branch` in `test_query_parser.py` - created new file `test_git_host_agnostic.py` to verify domain/Git host agnostic behavior
- Loading branch information
1 parent
a57f614
commit 9bdee8f
Showing
4 changed files
with
251 additions
and
52 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
""" Tests to verify that the query parser is Git host agnostic. """ | ||
|
||
import pytest | ||
|
||
from gitingest.query_parser import parse_query | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"urls, expected_user, expected_repo, expected_url", | ||
[ | ||
( | ||
[ | ||
"https://github.com/tiangolo/fastapi", | ||
"github.com/tiangolo/fastapi", | ||
"tiangolo/fastapi", | ||
], | ||
"tiangolo", | ||
"fastapi", | ||
"https://github.com/tiangolo/fastapi", | ||
), | ||
( | ||
[ | ||
"https://gitlab.com/gitlab-org/gitlab-runner", | ||
"gitlab.com/gitlab-org/gitlab-runner", | ||
"gitlab-org/gitlab-runner", | ||
], | ||
"gitlab-org", | ||
"gitlab-runner", | ||
"https://gitlab.com/gitlab-org/gitlab-runner", | ||
), | ||
( | ||
[ | ||
"https://bitbucket.org/na-dna/llm-knowledge-share", | ||
"bitbucket.org/na-dna/llm-knowledge-share", | ||
"na-dna/llm-knowledge-share", | ||
], | ||
"na-dna", | ||
"llm-knowledge-share", | ||
"https://bitbucket.org/na-dna/llm-knowledge-share", | ||
), | ||
( | ||
[ | ||
"https://gitea.com/xorm/xorm", | ||
"gitea.com/xorm/xorm", | ||
"xorm/xorm", | ||
], | ||
"xorm", | ||
"xorm", | ||
"https://gitea.com/xorm/xorm", | ||
), | ||
], | ||
) | ||
@pytest.mark.asyncio | ||
async def test_parse_query_without_host( | ||
urls: list[str], | ||
expected_user: str, | ||
expected_repo: str, | ||
expected_url: str, | ||
) -> None: | ||
for url in urls: | ||
result = await parse_query(url, max_file_size=50, from_web=True) | ||
# Common assertions for all cases | ||
assert result["user_name"] == expected_user | ||
assert result["repo_name"] == expected_repo | ||
assert result["url"] == expected_url | ||
assert result["slug"] == f"{expected_user}-{expected_repo}" | ||
assert result["id"] is not None | ||
assert result["subpath"] == "/" | ||
assert result["branch"] is None | ||
assert result["commit"] is None | ||
assert result["type"] is None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters