Skip to content

Commit 6a6dbff

Browse files
authored
Merge pull request #2930 from chaoss/dev
Release 0.76.3: Fixes GitLab URL nesting Issue
2 parents c8eba65 + 4a4d121 commit 6a6dbff

File tree

9 files changed

+63
-43
lines changed

9 files changed

+63
-43
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Augur NEW Release v0.76.2
1+
# Augur NEW Release v0.76.3
22

33
Augur is primarily a data engineering tool that makes it possible for data scientists to gather open source software community data. Less data carpentry for everyone else!
44
The primary way of looking at Augur data is through [8Knot](https://github.com/oss-aspen/8knot) ... A public instance of 8Knot is available at https://metrix.chaoss.io ... That is tied to a public instance of Augur at https://ai.chaoss.io
@@ -10,7 +10,7 @@ The primary way of looking at Augur data is through [8Knot](https://github.com/o
1010
## NEW RELEASE ALERT!
1111
### [If you want to jump right in, updated docker build/compose and bare metal installation instructions are available here](docs/new-install.md)
1212

13-
Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.76.2
13+
Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.76.3
1414

1515
- The `main` branch is a stable version of our new architecture, which features:
1616
- Dramatic improvement in the speed of large scale data collection (100,000+ repos). All data is obtained for 100k+ repos within 2 weeks.

augur/api/view/api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def av_add_user_repo():
106106
# matches https://gitlab.com/{org}/{repo}/ or http://gitlab.com/{org}/{repo}
107107
elif Repo.parse_gitlab_repo_url(url)[0]:
108108

109-
org_name, repo_name = Repo.parse_github_repo_url(url)
109+
org_name, repo_name = Repo.parse_gitlab_repo_url(url)
110110
repo_git = f"https://gitlab.com/{org_name}/{repo_name}"
111111

112112
# TODO: gitlab ensure the whole repo git is inserted so it can be found here

augur/application/db/models/augur_data.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import logging
2727
import re
2828
import json
29+
import urllib.parse
2930

3031

3132
from augur.application.db.models.base import Base
@@ -971,7 +972,7 @@ def is_valid_gitlab_repo(gl_session, url: str) -> bool:
971972
return False, {"status": "Invalid repo URL"}
972973

973974
# Encode namespace and project name for the API request
974-
project_identifier = f"{owner}%2F{repo}"
975+
project_identifier = urllib.parse.quote(f"{owner}/{repo}", safe='')
975976
url = REPO_ENDPOINT.format(project_identifier)
976977

977978
attempts = 0
@@ -1030,7 +1031,7 @@ def parse_gitlab_repo_url(url: str) -> tuple:
10301031
Tuple of owner and repo. Or a tuple of None and None if the url is invalid.
10311032
"""
10321033

1033-
result = re.search(r"https?:\/\/gitlab\.com\/([A-Za-z0-9 \- _]+)\/([A-Za-z0-9 \- _ \.]+)(.git)?\/?$", url)
1034+
result = re.search(r"https?:\/\/gitlab\.com\/([A-Za-z0-9\-_\/]+)\/([A-Za-z0-9\-_]+)(\.git)?\/?$", url)
10341035

10351036
if not result:
10361037
return None, None

augur/tasks/github/util/util.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Utility functions that are useful for several Github tasks"""
22
from typing import Any, List, Tuple
33
import logging
4+
import urllib.parse
45
import json
56
import httpx
67
from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth
@@ -46,6 +47,10 @@ def get_owner_repo(git_url: str) -> Tuple[str, str]:
4647

4748
return owner, repo
4849

50+
def get_gitlab_repo_identifier(owner, repo):
51+
52+
return urllib.parse.quote(f"{owner}/{repo}", safe='')
53+
4954

5055
def parse_json_response(logger: logging.Logger, response: httpx.Response) -> dict:
5156
# try to get json from response

augur/tasks/gitlab/events_task.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask
88
from augur.tasks.gitlab.gitlab_api_handler import GitlabApiHandler
99
from augur.application.db.data_parse import extract_gitlab_mr_event_data, extract_gitlab_issue_event_data
10-
from augur.tasks.github.util.util import get_owner_repo
11-
from augur.application.db.models import Issue, IssueEvent, PullRequest, PullRequestEvent
10+
from augur.tasks.github.util.util import get_gitlab_repo_identifier
11+
from augur.application.db.models import Issue, IssueEvent, PullRequest, PullRequestEvent, Repo
1212
from augur.application.db.lib import bulk_insert_dicts, get_repo_by_repo_git, get_session
1313
from augur.tasks.gitlab.gitlab_random_key_auth import GitlabRandomKeyAuth
1414

@@ -24,7 +24,7 @@ def collect_gitlab_issue_events(repo_git) -> int:
2424
repo_git: the repo url string
2525
"""
2626

27-
owner, repo = get_owner_repo(repo_git)
27+
owner, repo = Repo.parse_gitlab_repo_url(repo_git)
2828

2929
logger = logging.getLogger(collect_gitlab_issue_events.__name__)
3030

@@ -52,7 +52,7 @@ def collect_gitlab_merge_request_events(repo_git) -> int:
5252
repo_git: the repo url string
5353
"""
5454

55-
owner, repo = get_owner_repo(repo_git)
55+
owner, repo = Repo.parse_gitlab_repo_url(repo_git)
5656

5757
logger = logging.getLogger(collect_gitlab_issue_events.__name__)
5858

@@ -82,11 +82,13 @@ def retrieve_all_gitlab_event_data(gtype, repo_git, logger, key_auth) -> None:
8282
key_auth: key auth cache and rotator object
8383
"""
8484

85-
owner, repo = get_owner_repo(repo_git)
85+
owner, repo = Repo.parse_gitlab_repo_url(repo_git)
86+
87+
repo_identifier = get_gitlab_repo_identifier(owner, repo)
8688

8789
logger.info(f"Collecting gitlab issue events for {owner}/{repo}")
8890

89-
url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/events?target_type={gtype}"
91+
url = f"https://gitlab.com/api/v4/projects/{repo_identifier}/events?target_type={gtype}"
9092
events = GitlabApiHandler(key_auth, logger)
9193

9294
all_data = []

augur/tasks/gitlab/issues_task.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask
99
from augur.tasks.gitlab.gitlab_api_handler import GitlabApiHandler
1010
from augur.application.db.data_parse import extract_needed_issue_data_from_gitlab_issue, extract_needed_gitlab_issue_label_data, extract_needed_gitlab_issue_assignee_data, extract_needed_gitlab_issue_message_ref_data, extract_needed_gitlab_message_data, extract_needed_gitlab_contributor_data
11-
from augur.tasks.github.util.util import get_owner_repo, add_key_value_pair_to_dicts
12-
from augur.application.db.models import Issue, IssueLabel, IssueAssignee, IssueMessageRef, Message, Contributor
11+
from augur.tasks.github.util.util import get_gitlab_repo_identifier, add_key_value_pair_to_dicts
12+
from augur.application.db.models import Issue, IssueLabel, IssueAssignee, IssueMessageRef, Message, Contributor, Repo
1313
from augur.tasks.util.worker_util import remove_duplicate_dicts
1414
from augur.application.db.lib import bulk_insert_dicts, get_repo_by_repo_git, get_session
1515
from augur.tasks.gitlab.gitlab_random_key_auth import GitlabRandomKeyAuth
@@ -32,7 +32,7 @@ def collect_gitlab_issues(repo_git : str) -> int:
3232
key_auth = GitlabRandomKeyAuth(logger)
3333

3434
try:
35-
owner, repo = get_owner_repo(repo_git)
35+
owner, repo = Repo.parse_gitlab_repo_url(repo_git)
3636

3737
issue_data = retrieve_all_gitlab_issue_data(repo_git, logger, key_auth)
3838

@@ -57,11 +57,13 @@ def retrieve_all_gitlab_issue_data(repo_git, logger, key_auth) -> None:
5757
key_auth: key auth cache and rotator object
5858
"""
5959

60-
owner, repo = get_owner_repo(repo_git)
60+
owner, repo = Repo.parse_gitlab_repo_url(repo_git)
61+
62+
repo_identifier = get_gitlab_repo_identifier(owner, repo)
6163

6264
logger.info(f"Collecting gitlab issues for {owner}/{repo}")
6365

64-
url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/issues?with_labels_details=True"
66+
url = f"https://gitlab.com/api/v4/projects/{repo_identifier}/issues?with_labels_details=True"
6567
issues = GitlabApiHandler(key_auth, logger)
6668

6769
all_data = []
@@ -207,7 +209,7 @@ def collect_gitlab_issue_comments(issue_ids, repo_git) -> int:
207209
repo_git: repo url
208210
"""
209211

210-
owner, repo = get_owner_repo(repo_git)
212+
owner, repo = Repo.parse_gitlab_repo_url(repo_git)
211213

212214
logger = logging.getLogger(collect_gitlab_issues.__name__)
213215

@@ -237,7 +239,9 @@ def retrieve_all_gitlab_issue_comments(key_auth, logger, issue_ids, repo_git):
237239
repo_git: repo url
238240
"""
239241

240-
owner, repo = get_owner_repo(repo_git)
242+
owner, repo = Repo.parse_gitlab_repo_url(repo_git)
243+
244+
repo_identifier = get_gitlab_repo_identifier(owner, repo)
241245

242246
all_comments = {}
243247
issue_count = len(issue_ids)
@@ -249,7 +253,7 @@ def retrieve_all_gitlab_issue_comments(key_auth, logger, issue_ids, repo_git):
249253

250254
logger.info(f"Collecting {owner}/{repo} gitlab issue comments for issue {index} of {issue_count}")
251255

252-
url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/issues/{id}/notes"
256+
url = f"https://gitlab.com/api/v4/projects/{repo_identifier}/issues/{id}/notes"
253257

254258
for page_data, _ in comments.iter_pages(url):
255259

augur/tasks/gitlab/merge_request_task.py

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask
55
from augur.tasks.gitlab.gitlab_api_handler import GitlabApiHandler
66
from augur.application.db.data_parse import extract_needed_pr_data_from_gitlab_merge_request, extract_needed_merge_request_assignee_data, extract_needed_mr_label_data, extract_needed_mr_reviewer_data, extract_needed_mr_commit_data, extract_needed_mr_file_data, extract_needed_mr_metadata, extract_needed_gitlab_mr_message_ref_data, extract_needed_gitlab_message_data, extract_needed_gitlab_contributor_data
7-
from augur.tasks.github.util.util import get_owner_repo, add_key_value_pair_to_dicts
7+
from augur.tasks.github.util.util import get_gitlab_repo_identifier, add_key_value_pair_to_dicts
88
from augur.application.db.models import PullRequest, PullRequestLabel, PullRequestMeta, PullRequestCommit, PullRequestFile, PullRequestMessageRef, Repo, Message, Contributor, PullRequestAssignee
99
from augur.tasks.gitlab.gitlab_random_key_auth import GitlabRandomKeyAuth
1010
from augur.tasks.util.worker_util import remove_duplicate_dicts
@@ -26,7 +26,7 @@ def collect_gitlab_merge_requests(repo_git: str) -> int:
2626

2727
repo_id = get_repo_by_repo_git(repo_git).repo_id
2828

29-
owner, repo = get_owner_repo(repo_git)
29+
owner, repo = Repo.parse_gitlab_repo_url(repo_git)
3030

3131
key_auth = GitlabRandomKeyAuth(logger)
3232

@@ -51,11 +51,13 @@ def retrieve_all_mr_data(repo_git: str, logger, key_auth) -> None:
5151
key_auth: key auth cache and rotator object
5252
"""
5353

54-
owner, repo = get_owner_repo(repo_git)
54+
owner, repo = Repo.parse_gitlab_repo_url(repo_git)
55+
56+
repo_identifier = get_gitlab_repo_identifier(owner, repo)
5557

5658
logger.info(f"Collecting pull requests for {owner}/{repo}")
5759

58-
url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests?with_labels_details=True"
60+
url = f"https://gitlab.com/api/v4/projects/{repo_identifier}/merge_requests?with_labels_details=True"
5961
mrs = GitlabApiHandler(key_auth, logger)
6062

6163
all_data = []
@@ -171,15 +173,17 @@ def collect_merge_request_comments(mr_ids, repo_git) -> int:
171173
repo_git: the repo url string
172174
"""
173175

174-
owner, repo = get_owner_repo(repo_git)
176+
owner, repo = Repo.parse_gitlab_repo_url(repo_git)
175177

176178
logger = logging.getLogger(collect_merge_request_comments.__name__)
177179

180+
repo_identifier = get_gitlab_repo_identifier(owner, repo)
181+
178182
repo_id = get_repo_by_repo_git(repo_git).repo_id
179183

180184
key_auth = GitlabRandomKeyAuth(logger)
181185

182-
url = "https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/notes".format(owner=owner, repo=repo, id="{id}")
186+
url = "https://gitlab.com/api/v4/projects/{repo_identifier}/merge_requests/{id}/notes".format(repo_identifier=repo_identifier, id="{id}")
183187
comments = retrieve_merge_request_data(mr_ids, url, "comments", owner, repo, key_auth, logger, response_type="list")
184188

185189
with get_session() as session:
@@ -282,15 +286,17 @@ def collect_merge_request_metadata(mr_ids, repo_git) -> int:
282286
repo_git: the repo url string
283287
"""
284288

285-
owner, repo = get_owner_repo(repo_git)
289+
owner, repo = Repo.parse_gitlab_repo_url(repo_git)
290+
291+
repo_identifier = get_gitlab_repo_identifier(owner, repo)
286292

287293
logger = logging.getLogger(collect_merge_request_metadata.__name__)
288294

289295
repo_id = get_repo_by_repo_git(repo_git).repo_id
290296

291297
key_auth = GitlabRandomKeyAuth(logger)
292298

293-
url = "https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}".format(owner=owner, repo=repo, id="{id}")
299+
url = "https://gitlab.com/api/v4/projects/{repo_identifier}/merge_requests/{id}".format(repo_identifier=repo_identifier, id="{id}")
294300
metadata_list = retrieve_merge_request_data(mr_ids, url, "metadata", owner, repo, key_auth, logger, response_type="dict")
295301

296302
with get_session() as session:
@@ -347,15 +353,17 @@ def collect_merge_request_reviewers(mr_ids, repo_git) -> int:
347353
repo_git: the repo url string
348354
"""
349355

350-
owner, repo = get_owner_repo(repo_git)
356+
owner, repo = Repo.parse_gitlab_repo_url(repo_git)
357+
358+
repo_identifier = get_gitlab_repo_identifier(owner, repo)
351359

352360
logger = logging.getLogger(collect_merge_request_reviewers.__name__)
353361

354362
repo_id = get_repo_by_repo_git(repo_git).repo_id
355363

356364
key_auth = GitlabRandomKeyAuth(logger)
357365

358-
url = "https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/approvals".format(owner=owner, repo=repo, id="{id}")
366+
url = "https://gitlab.com/api/v4/projects/{repo_identifier}/merge_requests/{id}/approvals".format(repo_identifier=repo_identifier, id="{id}")
359367
reviewers = retrieve_merge_request_data(mr_ids, url, "reviewers", owner, repo, key_auth, logger, response_type="dict")
360368

361369
with get_session() as session:
@@ -414,15 +422,17 @@ def collect_merge_request_commits(mr_ids, repo_git) -> int:
414422
repo_git: the repo url string
415423
"""
416424

417-
owner, repo = get_owner_repo(repo_git)
425+
owner, repo = Repo.parse_gitlab_repo_url(repo_git)
426+
427+
repo_identifier = get_gitlab_repo_identifier(owner, repo)
418428

419429
logger = logging.getLogger(collect_merge_request_commits.__name__)
420430

421431
repo_id = get_repo_by_repo_git(repo_git).repo_id
422432

423433
key_auth = GitlabRandomKeyAuth(logger)
424434

425-
url = "https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/commits".format(owner=owner, repo=repo, id="{id}")
435+
url = "https://gitlab.com/api/v4/projects/{repo_identifier}/merge_requests/{id}/commits".format(repo_identifier=repo_identifier, id="{id}")
426436
commits = retrieve_merge_request_data(mr_ids, url, "commits", owner, repo, key_auth, logger, response_type="list")
427437

428438
with get_session() as session:
@@ -484,13 +494,15 @@ def collect_merge_request_files(mr_ids, repo_git) -> int:
484494

485495
logger = logging.getLogger(collect_merge_request_files.__name__)
486496

487-
owner, repo = get_owner_repo(repo_git)
497+
owner, repo = Repo.parse_gitlab_repo_url(repo_git)
498+
499+
repo_identifier = get_gitlab_repo_identifier(owner, repo)
488500

489501
repo_id = get_repo_by_repo_git(repo_git).repo_id
490502

491503
key_auth = GitlabRandomKeyAuth(logger)
492504

493-
url = "https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/changes".format(owner=owner, repo=repo, id="{id}")
505+
url = "https://gitlab.com/api/v4/projects/{repo_identifier}/merge_requests/{id}/changes".format(repo_identifier=repo_identifier, id="{id}")
494506
files = retrieve_merge_request_data(mr_ids, url, "files", owner, repo, key_auth, logger, response_type="dict")
495507

496508
with get_session() as session:

augur/tasks/init/celery_app.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -217,9 +217,9 @@ def setup_periodic_tasks(sender, **kwargs):
217217
sender.add_periodic_task(collection_interval, augur_collection_monitor.s())
218218

219219
#Do longer tasks less often
220-
non_domain_collection_interval = collection_interval * 300
221-
logger.info(f"Scheduling non-repo-domain collection every {non_domain_collection_interval/60} minutes")
222-
sender.add_periodic_task(non_domain_collection_interval, non_repo_domain_tasks.s())
220+
logger.info(f"Scheduling data analysis every 30 days")
221+
thirty_days_in_seconds = 30*24*60*60
222+
sender.add_periodic_task(thirty_days_in_seconds, non_repo_domain_tasks.s())
223223

224224
mat_views_interval = int(config.get_value('Celery', 'refresh_materialized_views_interval_in_days'))
225225
logger.info(f"Scheduling refresh materialized view every night at 1am CDT")
@@ -231,10 +231,6 @@ def setup_periodic_tasks(sender, **kwargs):
231231
logger.info(f"Setting 404 repos to be marked for retry on midnight each day")
232232
sender.add_periodic_task(crontab(hour=0, minute=0),retry_errored_repos.s())
233233

234-
logger.info(f"Scheduling contributor breadth every 30 days")
235-
thirty_days_in_seconds = 30*24*60*60
236-
sender.add_periodic_task(thirty_days_in_seconds, contributor_breadth_model.s())
237-
238234
@after_setup_logger.connect
239235
def setup_loggers(*args,**kwargs):
240236
"""Override Celery loggers with our own."""

metadata.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55

66
__short_description__ = "Python 3 package for free/libre and open-source software community metrics, models & data collection"
77

8-
__version__ = "0.76.2"
9-
__release__ = "v0.76.2 (Pumpkin Space)"
8+
__version__ = "0.76.3"
9+
__release__ = "v0.76.3 (Pumpkin Laser)"
1010

1111
__license__ = "MIT"
12-
__copyright__ = "University of Missouri, University of Nebraska-Omaha, CHAOSS, Brian Warner & Augurlabs 2024"
12+
__copyright__ = "University of Missouri, University of Nebraska-Omaha, CHAOSS, Brian Warner & Augurlabs 2112"

0 commit comments

Comments
 (0)