Skip to content

Commit c8eba65

Browse files
authored
Merge pull request #2898 from chaoss/augur-release-0.76.2
Augur release 0.76.2
2 parents fe11b0e + ef39e84 commit c8eba65

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+15473
-2391
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ augur_export_env.sh
1010
config.yml
1111
reports.yml
1212
*.pid
13+
*.sock
1314

1415
node_modules/
1516
.idea/

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Augur NEW Release v0.76.1
1+
# Augur NEW Release v0.76.2
22

33
Augur is primarily a data engineering tool that makes it possible for data scientists to gather open source software community data. Less data carpentry for everyone else!
44
The primary way of looking at Augur data is through [8Knot](https://github.com/oss-aspen/8knot) ... A public instance of 8Knot is available at https://metrix.chaoss.io ... That is tied to a public instance of Augur at https://ai.chaoss.io
@@ -10,7 +10,7 @@ The primary way of looking at Augur data is through [8Knot](https://github.com/o
1010
## NEW RELEASE ALERT!
1111
### [If you want to jump right in, updated docker build/compose and bare metal installation instructions are available here](docs/new-install.md)
1212

13-
Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.76.1
13+
Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.76.2
1414

1515
- The `main` branch is a stable version of our new architecture, which features:
1616
- Dramatic improvement in the speed of large scale data collection (100,000+ repos). All data is obtained for 100k+ repos within 2 weeks.

augur/api/metrics/deps.py

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,4 +77,162 @@ def deps(repo_group_id, repo_id=None, period='day', begin_date=None, end_date=No
7777
return results
7878

7979

80+
@register_metric()
81+
def libyear(repo_group_id, repo_id=None, period='day', begin_date=None, end_date=None):
82+
"""
83+
Returns a list of all the dependencies in a project/repo/repo_group.
84+
85+
:param repo_id: The repository's id
86+
:param repo_group_id: The repository's group id
87+
:param period: To set the periodicity to 'day', 'week', 'month' or 'year', defaults to 'day'
88+
:param begin_date: Specifies the begin date, defaults to '1970-1-1 00:00:00'
89+
:param end_date: Specifies the end date, defaults to datetime.now()
90+
:return: DataFrame of persons/period
91+
"""
92+
93+
if not begin_date:
94+
begin_date = '1970-1-1 00:00:01'
95+
if not end_date:
96+
end_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
97+
98+
if repo_id:
99+
100+
libyearSQL = s.sql.text("""
101+
SELECT
102+
rg_name,
103+
repo_group_id,
104+
repo_name,
105+
d.repo_id,
106+
repo_git,
107+
forked_from,
108+
repo_archived,
109+
c.name,
110+
c.libyear,
111+
MAX ( C.data_collection_date ) AS most_recent_collection
112+
FROM
113+
(
114+
SELECT A.rg_name AS rg_name,
115+
A.repo_group_id AS repo_group_id,
116+
b.repo_name AS repo_name,
117+
b.repo_id AS repo_id,
118+
b.repo_git AS repo_git,
119+
b.forked_from AS forked_from,
120+
b.repo_archived AS repo_archived
121+
FROM
122+
repo_groups A,
123+
repo b
124+
WHERE
125+
A.repo_group_id = b.repo_group_id
126+
ORDER BY
127+
rg_name,
128+
repo_name
129+
) d,
130+
(
131+
SELECT DISTINCT
132+
f.repo_id,
133+
f.NAME,
134+
f.libyear,
135+
f.data_collection_date
136+
FROM
137+
( SELECT repo_id, NAME, MAX ( data_collection_date ) AS data_collection_date FROM augur_data.repo_deps_libyear WHERE repo_id = :repo_id GROUP BY repo_id, NAME ORDER BY NAME ) e,
138+
augur_data.repo_deps_libyear f
139+
WHERE
140+
e.data_collection_date = f.data_collection_date and
141+
e.repo_id = f.repo_id
142+
ORDER BY
143+
NAME
144+
) C
145+
WHERE
146+
d.repo_id = C.repo_id
147+
AND C.repo_id = :repo_id
148+
GROUP BY
149+
rg_name,
150+
repo_git,
151+
repo_group_id,
152+
repo_name,
153+
d.repo_id,
154+
forked_from,
155+
repo_archived,
156+
c.name,
157+
c.libyear
158+
ORDER BY
159+
repo_id;
160+
""")
161+
162+
with current_app.engine.connect() as conn:
163+
results = pd.read_sql(libyearSQL, conn, params={'repo_id': repo_id})
164+
165+
else:
166+
167+
libyearSQL = s.sql.text("""
168+
Select w.* from
169+
(
170+
SELECT
171+
rg_name,
172+
repo_group_id,
173+
repo_name,
174+
d.repo_id,
175+
repo_git,
176+
forked_from,
177+
repo_archived,
178+
c.name,
179+
c.libyear,
180+
MAX ( C.data_collection_date ) AS most_recent_collection
181+
FROM
182+
(
183+
SELECT A.rg_name AS rg_name,
184+
A.repo_group_id AS repo_group_id,
185+
b.repo_name AS repo_name,
186+
b.repo_id AS repo_id,
187+
b.repo_git AS repo_git,
188+
b.forked_from AS forked_from,
189+
b.repo_archived AS repo_archived
190+
FROM
191+
repo_groups A,
192+
repo b
193+
WHERE
194+
A.repo_group_id = b.repo_group_id
195+
ORDER BY
196+
rg_name,
197+
repo_name
198+
) d,
199+
(
200+
SELECT DISTINCT
201+
f.repo_id,
202+
f.NAME,
203+
f.libyear,
204+
f.data_collection_date
205+
FROM
206+
( SELECT repo_id, NAME, MAX ( data_collection_date ) AS data_collection_date FROM augur_data.repo_deps_libyear GROUP BY repo_id, NAME ORDER BY NAME ) e,
207+
augur_data.repo_deps_libyear f
208+
WHERE
209+
e.data_collection_date = f.data_collection_date and
210+
e.repo_id = f.repo_id
211+
ORDER BY
212+
NAME
213+
) C
214+
WHERE
215+
d.repo_id = C.repo_id
216+
GROUP BY
217+
rg_name,
218+
repo_git,
219+
repo_group_id,
220+
repo_name,
221+
d.repo_id,
222+
forked_from,
223+
repo_archived,
224+
c.name,
225+
c.libyear
226+
ORDER BY
227+
repo_id) w,
228+
repo_groups y,
229+
repo z
230+
where w.repo_id=z.repo_id and
231+
y.repo_group_id=z.repo_group_id
232+
and z.repo_group_id = :repo_group_id
233+
""")
234+
235+
with current_app.engine.connect() as conn:
236+
results = pd.read_sql(libyearSQL, conn, params={'repo_group_id': repo_group_id})
237+
return results
80238

augur/api/routes/pull_request_reports.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@
2121
from bokeh.models.glyphs import Rect
2222
from bokeh.transform import dodge, factor_cmap, transform
2323

24+
# from selenium.webdriver import Firefox, FirefoxOptions
25+
# options = FirefoxOptions()
26+
# options.headless = True
27+
# webdriver = Firefox(options=options)
28+
#export_png(item, path, webdriver=webdriver)
29+
2430
warnings.filterwarnings('ignore')
2531

2632
from augur.api.routes import AUGUR_API_VERSION
@@ -604,6 +610,7 @@ def average_commits_per_PR():
604610
# opts = FirefoxOptions()
605611
# opts.add_argument("--headless")
606612
# driver = webdriver.Firefox(firefox_options=opts)
613+
# filename = export_png(grid, timeout=180, webdriver=webdriver)
607614
filename = export_png(grid, timeout=180)
608615

609616
return send_file(filename)

augur/api/view/init.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,4 +91,4 @@ def write_settings(current_settings):
9191
# Initialize logging
9292
def init_logging():
9393
global logger
94-
logger = AugurLogger("augur_view", reset_logfiles=True).get_logger()
94+
logger = AugurLogger("augur_view", reset_logfiles=False).get_logger()

augur/application/cli/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def new_func(ctx, *args, **kwargs):
3232
You are not connected to the internet.\n \
3333
Please connect to the internet to run Augur\n \
3434
Consider setting http_proxy variables for limited access installations.")
35-
sys.exit()
35+
sys.exit(-1)
3636

3737
return update_wrapper(new_func, function_internet_connection)
3838

@@ -78,7 +78,7 @@ def new_func(ctx, *args, **kwargs):
7878
print(f"\n\n{usage} command setup failed\nERROR: connecting to database\nHINT: The {incorrect_values} may be incorrectly specified in {location}\n")
7979

8080
engine.dispose()
81-
sys.exit()
81+
sys.exit(-2)
8282

8383
return update_wrapper(new_func, function_db_connection)
8484

augur/application/cli/_multicommand.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ def get_command(self, ctx, name):
3030

3131
# Check that the command exists before importing
3232
if not cmdfile.is_file():
33-
3433
return
3534

3635
# Prefer to raise exception instead of silcencing it

augur/application/cli/api.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,16 @@
1414

1515
from augur.application.db.session import DatabaseSession
1616
from augur.application.logs import AugurLogger
17-
from augur.application.cli import test_connection, test_db_connection, with_database
17+
from augur.application.cli import test_connection, test_db_connection, with_database, DatabaseContext
1818
from augur.application.cli._cli_util import _broadcast_signal_to_processes, raise_open_file_limit, clear_redis_caches, clear_rabbitmq_messages
1919
from augur.application.db.lib import get_value
2020

21-
logger = AugurLogger("augur", reset_logfiles=True).get_logger()
21+
logger = AugurLogger("augur", reset_logfiles=False).get_logger()
2222

2323
@click.group('api', short_help='Commands for controlling the backend API server')
24-
def cli():
25-
pass
24+
@click.pass_context
25+
def cli(ctx):
26+
ctx.obj = DatabaseContext()
2627

2728
@cli.command("start")
2829
@click.option("--development", is_flag=True, default=False, help="Enable development mode")

augur/application/cli/backend.py

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ def cli(ctx):
4747
@click.pass_context
4848
def start(ctx, disable_collection, development, pidfile, port):
4949
"""Start Augur's backend server."""
50-
with open(pidfile, "w") as pidfile:
51-
pidfile.write(str(os.getpid()))
50+
with open(pidfile, "w") as pidfile_io:
51+
pidfile_io.write(str(os.getpid()))
5252

5353
try:
5454
if os.environ.get('AUGUR_DOCKER_DEPLOY') != "1":
@@ -63,6 +63,8 @@ def start(ctx, disable_collection, development, pidfile, port):
6363
if development:
6464
os.environ["AUGUR_DEV"] = "1"
6565
logger.info("Starting in development mode")
66+
67+
os.environ["AUGUR_PIDFILE"] = pidfile
6668

6769
try:
6870
gunicorn_location = os.getcwd() + "/augur/api/gunicorn_conf.py"
@@ -74,6 +76,11 @@ def start(ctx, disable_collection, development, pidfile, port):
7476
if not port:
7577
port = get_value("Server", "port")
7678

79+
os.environ["AUGUR_PORT"] = str(port)
80+
81+
if disable_collection:
82+
os.environ["AUGUR_DISABLE_COLLECTION"] = "1"
83+
7784
worker_vmem_cap = get_value("Celery", 'worker_process_vmem_cap')
7885

7986
gunicorn_command = f"gunicorn -c {gunicorn_location} -b {host}:{port} augur.api.server:app --log-file gunicorn.log"
@@ -128,7 +135,7 @@ def start(ctx, disable_collection, development, pidfile, port):
128135
augur_collection_monitor.si().apply_async()
129136

130137
else:
131-
logger.info("Collection disabled")
138+
logger.info("Collection disabled")
132139

133140
try:
134141
server.wait()
@@ -153,6 +160,8 @@ def start(ctx, disable_collection, development, pidfile, port):
153160
cleanup_after_collection_halt(logger, ctx.obj.engine)
154161
except RedisConnectionError:
155162
pass
163+
164+
os.unlink(pidfile)
156165

157166
def start_celery_worker_processes(vmem_cap_ratio, disable_collection=False):
158167

@@ -185,7 +194,7 @@ def determine_worker_processes(ratio,maximum):
185194
sleep_time += 6
186195

187196
#60% of estimate, Maximum value of 45 : Reduced because it can be lower
188-
core_num_processes = determine_worker_processes(.40, 50)
197+
core_num_processes = determine_worker_processes(.40, 90)
189198
logger.info(f"Starting core worker processes with concurrency={core_num_processes}")
190199
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h"
191200
process_list.append(subprocess.Popen(core_worker.split(" ")))
@@ -224,6 +233,54 @@ def stop(ctx):
224233

225234
augur_stop(signal.SIGTERM, logger, ctx.obj.engine)
226235

236+
@cli.command('stop-collection-blocking')
237+
@test_connection
238+
@test_db_connection
239+
@with_database
240+
@click.pass_context
241+
def stop_collection(ctx):
242+
"""
243+
Stop collection tasks if they are running, block until complete
244+
"""
245+
processes = get_augur_processes()
246+
247+
stopped = []
248+
249+
p: psutil.Process
250+
for p in processes:
251+
if p.name() == "celery":
252+
stopped.append(p)
253+
p.terminate()
254+
255+
if not len(stopped):
256+
logger.info("No collection processes found")
257+
return
258+
259+
_, alive = psutil.wait_procs(stopped, 5,
260+
lambda p: logger.info(f"STOPPED: {p.pid}"))
261+
262+
killed = []
263+
while True:
264+
for i in range(len(alive)):
265+
if alive[i].status() == psutil.STATUS_ZOMBIE:
266+
logger.info(f"KILLING ZOMBIE: {alive[i].pid}")
267+
alive[i].kill()
268+
killed.append(i)
269+
elif not alive[i].is_running():
270+
logger.info(f"STOPPED: {p.pid}")
271+
killed.append(i)
272+
273+
for i in reversed(killed):
274+
alive.pop(i)
275+
276+
if not len(alive):
277+
break
278+
279+
logger.info(f"Waiting on [{', '.join(str(p.pid for p in alive))}]")
280+
time.sleep(0.5)
281+
282+
cleanup_after_collection_halt(logger, ctx.obj.engine)
283+
227284
@cli.command('kill')
228285
@test_connection
229286
@test_db_connection
@@ -388,7 +445,7 @@ def processes():
388445
Outputs the name/PID of all Augur server & worker processes"""
389446
augur_processes = get_augur_processes()
390447
for process in augur_processes:
391-
logger.info(f"Found process {process.pid}")
448+
logger.info(f"Found process {process.pid} [{process.name()}] -> Parent: {process.parent().pid}")
392449

393450
def get_augur_processes():
394451
augur_processes = []

0 commit comments

Comments
 (0)