Skip to content

Commit b6382bd

Browse files
authored
Merge branch 'dev' into main
Signed-off-by: Sean P. Goggins <[email protected]>
2 parents b3dcfe6 + 73c8fb3 commit b6382bd

File tree

11 files changed

+536
-144
lines changed

11 files changed

+536
-144
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@ We follow the [First Timers Only](https://www.firsttimersonly.com/) philosophy o
1111
## NEW RELEASE ALERT!
1212
**If you want to jump right in, the updated docker, docker-compose and bare metal installation instructions are available [here](docs/new-install.md)**.
1313

14+
1415
Augur is now releasing a dramatically improved new version to the ```main``` branch. It is also available [here](https://github.com/chaoss/augur/releases/tag/v0.76.2).
1516

17+
1618
- The `main` branch is a stable version of our new architecture, which features:
1719
- Dramatic improvement in the speed of large scale data collection (100,000+ repos). All data is obtained for 100k+ repos within 2 weeks.
1820
- A new job management architecture that uses Celery and Redis to manage queues, and enables users to run a Flower job monitoring dashboard.

augur/api/view/api.py

Lines changed: 47 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import re
33
from flask_login import current_user, login_required
44
from augur.application.db.models import Repo, RepoGroup, UserGroup, UserRepo
5-
from augur.tasks.frontend import add_org_repo_list, parse_org_and_repo_name, parse_org_name
5+
from augur.tasks.frontend import add_github_orgs_and_repos, parse_org_and_repo_name, parse_org_name, add_gitlab_repos
66
from .utils import *
77
from ..server import app
88
from augur.application.db.session import DatabaseSession
@@ -14,18 +14,6 @@ def cache(file=None):
1414
return redirect(url_for('static', filename="cache"))
1515
return redirect(url_for('static', filename="cache/" + toCacheFilename(file, False)))
1616

17-
18-
def add_existing_repo_to_group(session, user_id, group_name, repo_id):
19-
20-
logger.info("Adding existing repo to group")
21-
22-
group_id = UserGroup.convert_group_name_to_id(session, user_id, group_name)
23-
if group_id is None:
24-
return False
25-
26-
result = UserRepo.insert(session, repo_id, group_id)
27-
if not result:
28-
return False
2917

3018
def add_existing_org_to_group(session, user_id, group_name, rg_id):
3119

@@ -48,6 +36,8 @@ def add_existing_org_to_group(session, user_id, group_name, rg_id):
4836
@login_required
4937
def av_add_user_repo():
5038

39+
print("Adding user repos")
40+
5141
urls = request.form.get('urls')
5242
group = request.form.get("group_name")
5343

@@ -68,58 +58,51 @@ def av_add_user_repo():
6858

6959
invalid_urls = []
7060

71-
with DatabaseSession(logger, current_app.engine) as session:
72-
for url in urls:
73-
74-
# matches https://github.com/{org}/ or htts://github.com/{org}
75-
if (org_name := Repo.parse_github_org_url(url)):
76-
rg_obj = RepoGroup.get_by_name(session, org_name)
77-
if rg_obj:
78-
# add the orgs repos to the group
79-
add_existing_org_to_group(session, current_user.user_id, group, rg_obj.repo_group_id)
80-
81-
# matches https://github.com/{org}/{repo}/ or htts://github.com/{org}/{repo}
82-
elif Repo.parse_github_repo_url(url)[0]:
83-
org_name, repo_name = Repo.parse_github_repo_url(url)
84-
repo_git = f"https://github.com/{org_name}/{repo_name}"
85-
repo_obj = Repo.get_by_repo_git(session, repo_git)
86-
if repo_obj:
87-
add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id)
88-
89-
# matches /{org}/{repo}/ or /{org}/{repo} or {org}/{repo}/ or {org}/{repo}
90-
elif (match := parse_org_and_repo_name(url)):
91-
org, repo = match.groups()
92-
repo_git = f"https://github.com/{org}/{repo}"
93-
repo_obj = Repo.get_by_repo_git(session, repo_git)
94-
if repo_obj:
95-
add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id)
61+
orgs = []
62+
repo_urls = []
63+
gitlab_repo_urls = []
64+
for url in urls:
65+
66+
# matches https://github.com/{org}/ or htts://github.com/{org}
67+
if (org_name := Repo.parse_github_org_url(url)):
68+
orgs.append(org_name)
69+
70+
# matches https://github.com/{org}/{repo}/ or htts://github.com/{org}/{repo}
71+
elif Repo.parse_github_repo_url(url)[0]:
72+
repo_urls.append(url)
73+
74+
# matches /{org}/{repo}/ or /{org}/{repo} or {org}/{repo}/ or {org}/{repo}
75+
elif (match := parse_org_and_repo_name(url)):
76+
org, repo = match.groups()
77+
repo_git = f"https://github.com/{org}/{repo}"
78+
repo_urls.append(repo_git)
79+
80+
# matches /{org}/ or /{org} or {org}/ or {org}
81+
elif (match := parse_org_name(url)):
82+
org_name = match.group(1)
83+
orgs.append(org_name)
84+
85+
# matches https://gitlab.com/{org}/{repo}/ or http://gitlab.com/{org}/{repo}
86+
elif Repo.parse_gitlab_repo_url(url)[0]:
87+
88+
org_name, repo_name = Repo.parse_gitlab_repo_url(url)
89+
repo_git = f"https://gitlab.com/{org_name}/{repo_name}"
9690

97-
# matches /{org}/ or /{org} or {org}/ or {org}
98-
elif (match := parse_org_name(url)):
99-
org_name = match.group(1)
100-
rg_obj = RepoGroup.get_by_name(session, org_name)
101-
logger.info(rg_obj)
102-
if rg_obj:
103-
# add the orgs repos to the group
104-
add_existing_org_to_group(session, current_user.user_id, group, rg_obj.repo_group_id)
105-
106-
# matches https://gitlab.com/{org}/{repo}/ or http://gitlab.com/{org}/{repo}
107-
elif Repo.parse_gitlab_repo_url(url)[0]:
108-
109-
org_name, repo_name = Repo.parse_gitlab_repo_url(url)
110-
repo_git = f"https://gitlab.com/{org_name}/{repo_name}"
111-
112-
# TODO: gitlab ensure the whole repo git is inserted so it can be found here
113-
repo_obj = Repo.get_by_repo_git(session, repo_git)
114-
if repo_obj:
115-
add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id)
116-
117-
else:
118-
invalid_urls.append(url)
119-
120-
if urls:
121-
urls = [url.lower() for url in urls]
122-
add_org_repo_list.si(current_user.user_id, group, urls).apply_async()
91+
gitlab_repo_urls.append(repo_git)
92+
else:
93+
invalid_urls.append(url)
94+
95+
96+
97+
if orgs or repo_urls:
98+
repo_urls = [url.lower() for url in repo_urls]
99+
orgs = [url.lower() for url in orgs]
100+
flash(f"Adding repos: {repo_urls}")
101+
flash(f"Adding orgs: {orgs}")
102+
add_github_orgs_and_repos.si(current_user.user_id, group, orgs, repo_urls).apply_async()
103+
104+
if gitlab_repo_urls:
105+
add_gitlab_repos(current_user.user_id, group, gitlab_repo_urls)
123106

124107
flash("Adding repos and orgs in the background")
125108

augur/application/db/lib.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from psycopg2.errors import DeadlockDetected
1010
from typing import List, Any, Optional, Union
1111

12-
from augur.application.db.models import Config, Repo, Commit, WorkerOauth, Issue, PullRequest, PullRequestReview, ContributorsAlias,UnresolvedCommitEmail, Contributor, CollectionStatus
12+
from augur.application.db.models import Config, Repo, Commit, WorkerOauth, Issue, PullRequest, PullRequestReview, ContributorsAlias,UnresolvedCommitEmail, Contributor, CollectionStatus, UserGroup, RepoGroup
1313
from augur.tasks.util.collection_state import CollectionState
1414
from augur.application.db import get_session, get_engine
1515
from augur.application.db.util import execute_session_query
@@ -144,6 +144,25 @@ def get_repo_by_repo_id(repo_id):
144144

145145
return repo
146146

147+
def get_github_repo_by_src_id(src_id):
148+
149+
with get_session() as session:
150+
151+
query = session.query(Repo).filter(Repo.repo_src_id == src_id, Repo.repo_git.ilike(f'%https://github.com%'))
152+
repo = execute_session_query(query, 'first')
153+
154+
return repo
155+
156+
def get_gitlab_repo_by_src_id(src_id):
157+
158+
with get_session() as session:
159+
160+
query = session.query(Repo).filter(Repo.repo_src_id == src_id, Repo.repo_git.ilike(f'%https://gitlab.com%'))
161+
repo = execute_session_query(query, 'first')
162+
163+
return repo
164+
165+
147166
def remove_working_commits_by_repo_id_and_hashes(repo_id, commit_hashes):
148167

149168
remove_working_commits = s.sql.text("""DELETE FROM working_commits
@@ -553,3 +572,24 @@ def get_updated_issues(repo_id, since):
553572
with get_session() as session:
554573
return session.query(Issue).filter(Issue.repo_id == repo_id, Issue.updated_at >= since).order_by(Issue.gh_issue_number).all()
555574

575+
576+
577+
def get_group_by_name(user_id, group_name):
578+
579+
580+
with get_session() as session:
581+
582+
try:
583+
user_group = session.query(UserGroup).filter(UserGroup.user_id == user_id, UserGroup.name == group_name).one()
584+
except s.orm.exc.NoResultFound:
585+
return None
586+
587+
return user_group
588+
589+
def get_repo_group_by_name(name):
590+
591+
592+
with get_session() as session:
593+
594+
return session.query(RepoGroup).filter(RepoGroup.rg_name == name).first()
595+

augur/application/db/models/augur_data.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -870,6 +870,7 @@ class Repo(Base):
870870
data_collection_date = Column(
871871
TIMESTAMP(precision=0), server_default=text("CURRENT_TIMESTAMP")
872872
)
873+
repo_src_id = Column(BigInteger)
873874

874875
repo_group = relationship("RepoGroup", back_populates="repo")
875876
user_repo = relationship("UserRepo", back_populates="repo")
@@ -1064,7 +1065,7 @@ def parse_github_org_url(url):
10641065
return result.groups()[0]
10651066

10661067
@staticmethod
1067-
def insert_gitlab_repo(session, url: str, repo_group_id: int, tool_source):
1068+
def insert_gitlab_repo(session, url: str, repo_group_id: int, tool_source, repo_src_id):
10681069
"""Add a repo to the repo table.
10691070
10701071
Args:
@@ -1098,7 +1099,8 @@ def insert_gitlab_repo(session, url: str, repo_group_id: int, tool_source):
10981099
"repo_type": None,
10991100
"tool_source": tool_source,
11001101
"tool_version": "1.0",
1101-
"data_source": "Git"
1102+
"data_source": "Git",
1103+
"repo_src_id": repo_src_id
11021104
}
11031105

11041106
repo_unique = ["repo_git"]
@@ -1111,7 +1113,7 @@ def insert_gitlab_repo(session, url: str, repo_group_id: int, tool_source):
11111113
return result[0]["repo_id"]
11121114

11131115
@staticmethod
1114-
def insert_github_repo(session, url: str, repo_group_id: int, tool_source, repo_type):
1116+
def insert_github_repo(session, url: str, repo_group_id: int, tool_source, repo_type, repo_src_id):
11151117
"""Add a repo to the repo table.
11161118
11171119
Args:
@@ -1146,7 +1148,8 @@ def insert_github_repo(session, url: str, repo_group_id: int, tool_source, repo_
11461148
"repo_type": repo_type,
11471149
"tool_source": tool_source,
11481150
"tool_version": "1.0",
1149-
"data_source": "Git"
1151+
"data_source": "Git",
1152+
"repo_src_id": repo_src_id
11501153
}
11511154

11521155
repo_unique = ["repo_git"]
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
"""Add repo src id
2+
3+
Revision ID: 30
4+
Revises: 29
5+
Create Date: 2024-08-30
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
11+
# revision identifiers, used by Alembic.
12+
revision = '30'
13+
down_revision = '29'
14+
branch_labels = None
15+
depends_on = None
16+
17+
18+
def upgrade():
19+
op.add_column('repo', sa.Column('repo_src_id', sa.BigInteger(), nullable=True), schema='augur_data')
20+
op.create_unique_constraint('repo_src_id_unique', 'repo', ['repo_src_id'], schema='augur_data')
21+
22+
23+
def downgrade():
24+
op.drop_constraint('repo_src_id_unique', 'repo', schema='augur_data', type_='unique')
25+
op.drop_column('repo', 'repo_src_id', schema='augur_data')

0 commit comments

Comments
 (0)