From 33275b460bcf2adac422fe63dba3b6162404a016 Mon Sep 17 00:00:00 2001 From: StanGirard Date: Mon, 20 Jul 2020 13:51:04 +0200 Subject: [PATCH 01/13] [CELERY] Added celery --- config.py | 4 ++++ toolkit/__init__.py | 11 ++++++---- toolkit/celery/__init__.py | 14 +++++++++++++ toolkit/celery/tasks.py | 14 +++++++++++++ toolkit/routes/audit/api.py | 41 +++++++++++++++++++++++++++++++++++++ wsgi.py | 3 ++- 6 files changed, 82 insertions(+), 5 deletions(-) create mode 100644 toolkit/celery/__init__.py create mode 100644 toolkit/celery/tasks.py diff --git a/config.py b/config.py index 2f248c1..e47bb8d 100644 --- a/config.py +++ b/config.py @@ -15,6 +15,10 @@ class Config: FLASK_APP = environ.get('FLASK_APP', "SEOToolkit") FLASK_ENV = environ.get('FLASK_ENV', 'development') GOOGLE_API_KEY = environ.get('GOOGLE_API_KEY', "None") + + # Celery + CELERY_BROKER_URL = environ.get('CELERY_BROKER_URL','redis://localhost:6379/0') + CELERY_RESULT_BACKEND = environ.get('CELERY_RESULT_BACKEND','redis://localhost:6379/0') # Database SQLALCHEMY_DATABASE_URI = environ.get("SQLALCHEMY_DATABASE_URI", "sqlite:///database.db") diff --git a/toolkit/__init__.py b/toolkit/__init__.py index bbd01bd..c7f4c2c 100644 --- a/toolkit/__init__.py +++ b/toolkit/__init__.py @@ -1,15 +1,18 @@ from flask import Flask from flask_sqlalchemy import SQLAlchemy +from toolkit.celery import make_celery +from config import Config +from celery import Celery dbAlchemy = SQLAlchemy() - - +celery = Celery(__name__, broker=Config.CELERY_BROKER_URL,backend=Config.CELERY_RESULT_BACKEND, include=["toolkit.celery.tasks"]) def create_app(): """Construct the core application.""" app = Flask(__name__) app.config.from_object('config.Config') dbAlchemy.init_app(app) - + celery = make_celery(app) with app.app_context(): import toolkit.routes # Import routes dbAlchemy.create_all() # Create sql tables for our data models - return app \ No newline at end of file + + return app, celery \ No newline at end of file diff --git a/toolkit/celery/__init__.py b/toolkit/celery/__init__.py new file mode 100644 index 0000000..0d3fa0a --- /dev/null +++ b/toolkit/celery/__init__.py @@ -0,0 +1,14 @@ +from celery import Celery + +def make_celery(app): + celery = Celery(app.import_name, backend=app.config['CELERY_RESULT_BACKEND'], + broker=app.config['CELERY_BROKER_URL']) + celery.conf.update(app.config) + TaskBase = celery.Task + class ContextTask(TaskBase): + abstract = True + def __call__(self, *args, **kwargs): + with app.app_context(): + return TaskBase.__call__(self, *args, **kwargs) + celery.Task = ContextTask + return celery \ No newline at end of file diff --git a/toolkit/celery/tasks.py b/toolkit/celery/tasks.py new file mode 100644 index 0000000..4f6546d --- /dev/null +++ b/toolkit/celery/tasks.py @@ -0,0 +1,14 @@ +from toolkit import celery +import time + +@celery.task(bind=True, name="toolkit.routes.audit.api.my_background_task") +def my_background_task(self,url): + print(url) + for i in range(100): + self.update_state(state='PROGRESS', + meta={'current': i, 'total': 100, + 'status': "Hello " + str(i)}) + time.sleep(1) + return {'current': 100, 'total': 100, 'status': 'Task completed!', + 'result': 42} + diff --git a/toolkit/routes/audit/api.py b/toolkit/routes/audit/api.py index 6ad544a..edbf13d 100644 --- a/toolkit/routes/audit/api.py +++ b/toolkit/routes/audit/api.py @@ -11,6 +11,47 @@ from toolkit.models import Audit, LighthouseScore from toolkit.lib.api_tools import generate_answer +from toolkit import celery +from toolkit.celery.tasks import my_background_task + + +import time + + +@app.route('/status/') +def taskstatus(task_id): + task = celery.AsyncResult(task_id) + if task.state == 'PENDING': + # job did not start yet + response = { + 'state': task.state, + 'current': 0, + 'total': 1, + 'status': 'Pending...' + } + elif task.state != 'FAILURE': + response = { + 'state': task.state, + 'current': task.info.get('current', 0), + 'total': task.info.get('total', 1), + 'status': task.info.get('status', '') + } + if 'result' in task.info: + response['result'] = task.info['result'] + else: + # something went wrong in the background job + response = { + 'state': task.state, + 'current': 1, + 'total': 1, + 'status': str(task.info), # this is the exception raised + } + return json.dumps(response) + +@app.route('/api/audit/lighthouse/score/test', methods=["GET"]) +def testi_test(): + task = my_background_task.delay("https://test.com") + return {"id":task.id} @app.route('/api/audit/lighthouse/score', methods=["POST"]) def post_audit_lighthouse_score(): diff --git a/wsgi.py b/wsgi.py index 6e11243..03deb2e 100644 --- a/wsgi.py +++ b/wsgi.py @@ -1,7 +1,8 @@ """App entry point.""" from toolkit import create_app -app = create_app() +app,celery = create_app() if __name__ == "__main__": + app.celery = celery app.run(host='0.0.0.0', port=5000) \ No newline at end of file From e78224eb769688144d80f3d4fce5963026825216 Mon Sep 17 00:00:00 2001 From: StanGirard Date: Wed, 22 Jul 2020 19:38:17 +0200 Subject: [PATCH 02/13] [FACTORY] --- toolkit/__init__.py | 36 ++++++++++++++---- toolkit/celery/tasks.py | 14 ------- toolkit/{celery => celeryapp}/__init__.py | 0 toolkit/celeryapp/tasks.py | 35 +++++++++++++++++ toolkit/extension.py | 38 +++++++++++++++++++ toolkit/models/score.py | 7 ++++ toolkit/routes/audit/api.py | 20 ++-------- .../audit/lighthouse/lighthouse_all.jinja2 | 8 ++++ wsgi.py | 8 ---- 9 files changed, 121 insertions(+), 45 deletions(-) delete mode 100644 toolkit/celery/tasks.py rename toolkit/{celery => celeryapp}/__init__.py (100%) create mode 100644 toolkit/celeryapp/tasks.py create mode 100644 toolkit/extension.py delete mode 100644 wsgi.py diff --git a/toolkit/__init__.py b/toolkit/__init__.py index c7f4c2c..8ae95d8 100644 --- a/toolkit/__init__.py +++ b/toolkit/__init__.py @@ -1,18 +1,40 @@ from flask import Flask from flask_sqlalchemy import SQLAlchemy -from toolkit.celery import make_celery +from toolkit.celeryapp import make_celery from config import Config from celery import Celery +# from toolkit.extension import celery, dbAlchemy dbAlchemy = SQLAlchemy() -celery = Celery(__name__, broker=Config.CELERY_BROKER_URL,backend=Config.CELERY_RESULT_BACKEND, include=["toolkit.celery.tasks"]) +# celery = Celery(__name__, broker=Config.CELERY_BROKER_URL,backend=Config.CELERY_RESULT_BACKEND, include=["toolkit.celery.tasks"]) + + + +def make_celery(app): + celery = Celery( + app.import_name, + backend=app.config['CELERY_RESULT_BACKEND'], + broker=app.config['CELERY_BROKER_URL'] + ) + celery.conf.update(app.config) + + class ContextTask(celery.Task): + def __call__(self, *args, **kwargs): + with app.app_context(): + return self.run(*args, **kwargs) + + celery.Task = ContextTask + return celery + + def create_app(): """Construct the core application.""" app = Flask(__name__) - app.config.from_object('config.Config') - dbAlchemy.init_app(app) + app.config.from_object(Config) celery = make_celery(app) + dbAlchemy.init_app(app) + celery.init_app(app) with app.app_context(): - import toolkit.routes # Import routes + # Import routes + import toolkit.routes dbAlchemy.create_all() # Create sql tables for our data models - - return app, celery \ No newline at end of file + return app \ No newline at end of file diff --git a/toolkit/celery/tasks.py b/toolkit/celery/tasks.py deleted file mode 100644 index 4f6546d..0000000 --- a/toolkit/celery/tasks.py +++ /dev/null @@ -1,14 +0,0 @@ -from toolkit import celery -import time - -@celery.task(bind=True, name="toolkit.routes.audit.api.my_background_task") -def my_background_task(self,url): - print(url) - for i in range(100): - self.update_state(state='PROGRESS', - meta={'current': i, 'total': 100, - 'status': "Hello " + str(i)}) - time.sleep(1) - return {'current': 100, 'total': 100, 'status': 'Task completed!', - 'result': 42} - diff --git a/toolkit/celery/__init__.py b/toolkit/celeryapp/__init__.py similarity index 100% rename from toolkit/celery/__init__.py rename to toolkit/celeryapp/__init__.py diff --git a/toolkit/celeryapp/tasks.py b/toolkit/celeryapp/tasks.py new file mode 100644 index 0000000..2fedf84 --- /dev/null +++ b/toolkit/celeryapp/tasks.py @@ -0,0 +1,35 @@ +import time +from datetime import datetime +import math + +from flask import current_app as app +from toolkit import celery +from toolkit import dbAlchemy as db +from toolkit.controller.seo.lighthouse import audit_google_lighthouse_full +from toolkit.models import LighthouseScore +from celery.signals import worker_process_init, task_prerun + + +@task_prerun.connect +def celery_prerun(*args, **kwargs): + #print g + print("HHHHHHHHHHHHHHHH") + +@celery.task(bind=True,name="Lighthouse") +def LighthouseAudit(self,url): + new_score = LighthouseScore( + url = url,status_job="RUNNING", accessibility=0,pwa=0,seo=0, best_practices=0,performance=0, begin_date=datetime.now() + ) + db.session.add(new_score) + db.session.commit() + value = audit_google_lighthouse_full(url) + accessibility = int(math.floor(value["lighthouseResult"]["categories"]["accessibility"]["score"] * 100)) + seo = int(math.floor(value["lighthouseResult"]["categories"]["seo"]["score"] * 100)) + pwa = int(math.floor(value["lighthouseResult"]["categories"]["pwa"]["score"] * 100)) + best_practices = int(math.floor(value["lighthouseResult"]["categories"]["best-practices"]["score"] * 100)) + performance = int(math.floor(value["lighthouseResult"]["categories"]["performance"]["score"] * 100)) + conn = db.engine.connect() + smt = update(LighthouseScore).where(LighthouseScore.url == url).values(accessibility=accessibility,pwa=pwa,seo=seo, best_practices=best_practices,performance=performance, status_job="FINISHED") + conn.execute(smt) + return {'current': 100, 'total': 100, 'status': 'Task completed!', + 'result': 42} diff --git a/toolkit/extension.py b/toolkit/extension.py new file mode 100644 index 0000000..3f8ff9e --- /dev/null +++ b/toolkit/extension.py @@ -0,0 +1,38 @@ +import flask +from flask_sqlalchemy import SQLAlchemy +from celery import Celery +from config import Config + +class FlaskCelery(Celery): + + def __init__(self, *args, **kwargs): + + super(FlaskCelery, self).__init__(*args, **kwargs) + self.patch_task() + + if 'app' in kwargs: + self.init_app(kwargs['app']) + + def patch_task(self): + TaskBase = self.Task + _celery = self + + class ContextTask(TaskBase): + abstract = True + + def __call__(self, *args, **kwargs): + if flask.has_app_context(): + return TaskBase.__call__(self, *args, **kwargs) + else: + with _celery.app.app_context(): + return TaskBase.__call__(self, *args, **kwargs) + + self.Task = ContextTask + + def init_app(self, app): + self.app = app + self.config_from_object(app.config) + print(app.config) + +celery = FlaskCelery(__name__, broker=Config.BROKER_URL,backend=Config.RESULT_BACKEND, include=["toolkit.celeryapp.tasks"]) +dbAlchemy = SQLAlchemy() \ No newline at end of file diff --git a/toolkit/models/score.py b/toolkit/models/score.py index 288a9b5..d47fcb0 100644 --- a/toolkit/models/score.py +++ b/toolkit/models/score.py @@ -45,6 +45,13 @@ class LighthouseScore(dbAlchemy.Model): unique=False, nullable=False ) + status_job = dbAlchemy.Column( + dbAlchemy.String(20), + index=False, + unique=False, + nullable=True, + default="FINISHED" + ) begin_date = dbAlchemy.Column( dbAlchemy.DateTime, index=False, diff --git a/toolkit/routes/audit/api.py b/toolkit/routes/audit/api.py index edbf13d..98895d2 100644 --- a/toolkit/routes/audit/api.py +++ b/toolkit/routes/audit/api.py @@ -12,8 +12,7 @@ from toolkit.lib.api_tools import generate_answer from toolkit import celery -from toolkit.celery.tasks import my_background_task - +from toolkit.celeryapp.tasks import LighthouseAudit import time @@ -50,7 +49,7 @@ def taskstatus(task_id): @app.route('/api/audit/lighthouse/score/test', methods=["GET"]) def testi_test(): - task = my_background_task.delay("https://test.com") + task = LighthouseAudit.delay("https://test.com") return {"id":task.id} @app.route('/api/audit/lighthouse/score', methods=["POST"]) @@ -58,19 +57,8 @@ def post_audit_lighthouse_score(): try: url = request.form['url'] if url: - value = audit_google_lighthouse_full(url) - accessibility = int(math.floor(value["lighthouseResult"]["categories"]["accessibility"]["score"] * 100)) - seo = int(math.floor(value["lighthouseResult"]["categories"]["seo"]["score"] * 100)) - pwa = int(math.floor(value["lighthouseResult"]["categories"]["pwa"]["score"] * 100)) - best_practices = int(math.floor(value["lighthouseResult"]["categories"]["best-practices"]["score"] * 100)) - performance = int(math.floor(value["lighthouseResult"]["categories"]["performance"]["score"] * 100)) - new_score = LighthouseScore( - url = url, accessibility=accessibility,pwa=pwa,seo=seo, best_practices=best_practices,performance=performance, begin_date=datetime.now() - ) - db.session.add(new_score) - db.session.commit() - print("Good") - return generate_answer() + task = LighthouseAudit.delay(url) + return generate_answer(data={"id":task.id}) else: return generate_answer(success=False) except Exception as e: diff --git a/toolkit/templates/audit/lighthouse/lighthouse_all.jinja2 b/toolkit/templates/audit/lighthouse/lighthouse_all.jinja2 index 66bca43..69cfa51 100644 --- a/toolkit/templates/audit/lighthouse/lighthouse_all.jinja2 +++ b/toolkit/templates/audit/lighthouse/lighthouse_all.jinja2 @@ -43,6 +43,7 @@ + {%if item.status_job == "FINISHED" %}
{{item.accessibility}}
@@ -95,6 +96,13 @@
+ {% else %} + +
+ {{item.status_job}} + + + {% endif %}} {{item.begin_date|formatdatetime}} diff --git a/wsgi.py b/wsgi.py deleted file mode 100644 index 03deb2e..0000000 --- a/wsgi.py +++ /dev/null @@ -1,8 +0,0 @@ -"""App entry point.""" -from toolkit import create_app - -app,celery = create_app() - -if __name__ == "__main__": - app.celery = celery - app.run(host='0.0.0.0', port=5000) \ No newline at end of file From fba2406763a98271d6358ceee65e427bf4ab14d6 Mon Sep 17 00:00:00 2001 From: StanGirard Date: Wed, 22 Jul 2020 19:57:26 +0200 Subject: [PATCH 03/13] [CELERY] Working on audits --- celery_worker.py | 5 +++ run.py | 6 +++ toolkit/__init__.py | 42 +++---------------- toolkit/celery_utils.py | 8 ++++ toolkit/celeryapp/tasks.py | 1 + toolkit/factory.py | 21 ++++++++++ toolkit/routes/audit/api.py | 20 ++++----- .../audit/lighthouse/lighthouse_all.jinja2 | 8 ++-- 8 files changed, 61 insertions(+), 50 deletions(-) create mode 100644 celery_worker.py create mode 100644 run.py create mode 100644 toolkit/celery_utils.py create mode 100644 toolkit/factory.py diff --git a/celery_worker.py b/celery_worker.py new file mode 100644 index 0000000..09e6d98 --- /dev/null +++ b/celery_worker.py @@ -0,0 +1,5 @@ +from toolkit import celery +from toolkit.factory import create_app +from toolkit.celery_utils import init_celery +app = create_app() +init_celery(celery, app) \ No newline at end of file diff --git a/run.py b/run.py new file mode 100644 index 0000000..a263bfc --- /dev/null +++ b/run.py @@ -0,0 +1,6 @@ +from toolkit import factory +import toolkit + +if __name__ == "__main__": + app = factory.create_app(celery=toolkit.celery) + app.run() \ No newline at end of file diff --git a/toolkit/__init__.py b/toolkit/__init__.py index 8ae95d8..281dcde 100644 --- a/toolkit/__init__.py +++ b/toolkit/__init__.py @@ -1,40 +1,10 @@ -from flask import Flask -from flask_sqlalchemy import SQLAlchemy -from toolkit.celeryapp import make_celery -from config import Config from celery import Celery -# from toolkit.extension import celery, dbAlchemy +from flask_sqlalchemy import SQLAlchemy dbAlchemy = SQLAlchemy() -# celery = Celery(__name__, broker=Config.CELERY_BROKER_URL,backend=Config.CELERY_RESULT_BACKEND, include=["toolkit.celery.tasks"]) - - - -def make_celery(app): - celery = Celery( - app.import_name, - backend=app.config['CELERY_RESULT_BACKEND'], - broker=app.config['CELERY_BROKER_URL'] - ) - celery.conf.update(app.config) - - class ContextTask(celery.Task): - def __call__(self, *args, **kwargs): - with app.app_context(): - return self.run(*args, **kwargs) - - celery.Task = ContextTask - return celery +def make_celery(app_name=__name__): + backend = "redis://localhost:6379/0" + broker = backend.replace("0", "1") + return Celery(app_name, backend=backend, broker=broker) -def create_app(): - """Construct the core application.""" - app = Flask(__name__) - app.config.from_object(Config) - celery = make_celery(app) - dbAlchemy.init_app(app) - celery.init_app(app) - with app.app_context(): - # Import routes - import toolkit.routes - dbAlchemy.create_all() # Create sql tables for our data models - return app \ No newline at end of file +celery = make_celery() \ No newline at end of file diff --git a/toolkit/celery_utils.py b/toolkit/celery_utils.py new file mode 100644 index 0000000..5cfb50a --- /dev/null +++ b/toolkit/celery_utils.py @@ -0,0 +1,8 @@ +def init_celery(celery, app): + celery.conf.update(app.config) + TaskBase = celery.Task + class ContextTask(TaskBase): + def __call__(self, *args, **kwargs): + with app.app_context(): + return TaskBase.__call__(self, *args, **kwargs) + celery.Task = ContextTask \ No newline at end of file diff --git a/toolkit/celeryapp/tasks.py b/toolkit/celeryapp/tasks.py index 2fedf84..4568ede 100644 --- a/toolkit/celeryapp/tasks.py +++ b/toolkit/celeryapp/tasks.py @@ -8,6 +8,7 @@ from toolkit.controller.seo.lighthouse import audit_google_lighthouse_full from toolkit.models import LighthouseScore from celery.signals import worker_process_init, task_prerun +from sqlalchemy import update @task_prerun.connect diff --git a/toolkit/factory.py b/toolkit/factory.py new file mode 100644 index 0000000..f507689 --- /dev/null +++ b/toolkit/factory.py @@ -0,0 +1,21 @@ +from flask import Flask + +from config import Config +from celery import Celery +from .celery_utils import init_celery +from toolkit import dbAlchemy + + + +def create_app(**kwargs): + """Construct the core application.""" + app = Flask(__name__) + app.config.from_object(Config) + dbAlchemy.init_app(app) + if kwargs.get("celery"): + init_celery(kwargs.get("celery"), app) + with app.app_context(): + # Import routes + import toolkit.routes + dbAlchemy.create_all() # Create sql tables for our data models + return app \ No newline at end of file diff --git a/toolkit/routes/audit/api.py b/toolkit/routes/audit/api.py index 98895d2..681c239 100644 --- a/toolkit/routes/audit/api.py +++ b/toolkit/routes/audit/api.py @@ -1,20 +1,18 @@ import json import math +import time from datetime import datetime from flask import current_app as app -from flask import redirect, request, url_for, render_template -from sqlalchemy import func +from flask import redirect, render_template, request, url_for +from sqlalchemy import func, update +from toolkit import celery from toolkit import dbAlchemy as db +from toolkit.celeryapp.tasks import LighthouseAudit from toolkit.controller.seo.lighthouse import audit_google_lighthouse_full -from toolkit.models import Audit, LighthouseScore - from toolkit.lib.api_tools import generate_answer -from toolkit import celery -from toolkit.celeryapp.tasks import LighthouseAudit - -import time +from toolkit.models import Audit, LighthouseScore @app.route('/status/') @@ -69,13 +67,13 @@ def post_audit_lighthouse_score(): def get_all_audit_lighthouse_score(): try: LS = LighthouseScore - quer = db.session.query(LS.id,LS.url, LS.accessibility, LS.pwa, LS.seo, LS.best_practices, LS.performance, func.max(LS.begin_date).label('begin_date')).group_by(LS.url) + quer = db.session.query(LS.id,LS.url, LS.accessibility, LS.pwa, LS.seo, LS.best_practices, LS.performance,LS.status_job, func.max(LS.begin_date).label('begin_date')).group_by(LS.url) results = quer.all() result_arr={"results": [], "google_error":False} if app.config['GOOGLE_API_KEY'] == "None": result_arr["google_error"] = True for i in results: - result_arr["results"].append({"id": i.id, "url": i.url, "accessibility": i.accessibility, "pwa": i.pwa, "seo": i.seo, "best_practices": i.best_practices, "performance": i.performance, "begin_date": i.begin_date}) + result_arr["results"].append({"id": i.id, "url": i.url, "accessibility": i.accessibility, "pwa": i.pwa, "seo": i.seo, "best_practices": i.best_practices, "performance": i.performance,"status_job": i.status_job, "begin_date": i.begin_date}) return generate_answer(data=result_arr) except Exception as e: print(e) @@ -121,4 +119,4 @@ def get_audit_lighthouse_score_all(): return generate_answer(data=result_arr) except Exception as e: print(e) - return generate_answer(success=False) \ No newline at end of file + return generate_answer(success=False) diff --git a/toolkit/templates/audit/lighthouse/lighthouse_all.jinja2 b/toolkit/templates/audit/lighthouse/lighthouse_all.jinja2 index 69cfa51..d0d60d7 100644 --- a/toolkit/templates/audit/lighthouse/lighthouse_all.jinja2 +++ b/toolkit/templates/audit/lighthouse/lighthouse_all.jinja2 @@ -42,8 +42,10 @@ {{item.url}} - + {%if item.status_job == "FINISHED" %} + +
{{item.accessibility}}
@@ -97,12 +99,12 @@ {% else %} - +
{{item.status_job}} - {% endif %}} + {% endif %} {{item.begin_date|formatdatetime}} From 7d07538bcca6daf09d8a71ef2e409b98c065755f Mon Sep 17 00:00:00 2001 From: StanGirard Date: Wed, 22 Jul 2020 20:05:36 +0200 Subject: [PATCH 04/13] [REQUIREMENTS] Added Celery --- requirements.txt | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/requirements.txt b/requirements.txt index 68c28e6..0ff9b3b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,18 +1,19 @@ -SQLAlchemy==1.3.13 -matplotlib==3.1.2 -inscriptis==0.0.4.1.1 -seaborn==0.10.1 -numpy==1.18.1 +beautifulsoup4==4.9.1 bokeh==2.0.2 -requests==2.20.0 -nltk==3.4.5 -Flask_SQLAlchemy==2.4.1 -pandas==1.0.1 +celery==4.4.6 +Flask_SQLAlchemy==2.4.3 Flask==1.1.1 -networkx==2.4 -scipy==1.4.0 -google==2.0.3 gensim==3.8.1 -beautifulsoup4==4.9.1 -python-dotenv==0.13.0 +google==2.0.3 +inscriptis==0.0.4.1.1 +matplotlib==3.1.2 +networkx==2.4 +nltk==3.4.5 +numpy==1.18.1 +pandas==1.0.1 +python-dotenv==0.14.0 +requests==2.20.0 scikit_learn==0.23.1 +scipy==1.4.0 +seaborn==0.10.1 +SQLAlchemy==1.3.18 From 1e19808a8eeeca14469542c5e68d0886cffbaf2b Mon Sep 17 00:00:00 2001 From: StanGirard Date: Wed, 22 Jul 2020 20:05:49 +0200 Subject: [PATCH 05/13] [RUN] Added script to run everything --- run.sh | 5 +++++ 1 file changed, 5 insertions(+) create mode 100755 run.sh diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..f489d10 --- /dev/null +++ b/run.sh @@ -0,0 +1,5 @@ +redis-server & +python3 run.py & +celery worker -A celery_worker.celery --loglevel=info --pool=solo + + From b2a9166baf8c2b6df6c96d62f9d57348e179d38c Mon Sep 17 00:00:00 2001 From: StanGirard Date: Wed, 22 Jul 2020 20:06:13 +0200 Subject: [PATCH 06/13] [GITIGNORE] Added redis dump --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 0f8b6b4..33f7fea 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ visited.db test.py .idea/* .env +dump.rdb From 68ade6c075a49f5bbc60536b5d7ceff67f1ee221 Mon Sep 17 00:00:00 2001 From: StanGirard Date: Thu, 23 Jul 2020 02:01:05 +0200 Subject: [PATCH 07/13] [CELERY][AUDIT] added ajax to html --- toolkit/celeryapp/tasks.py | 13 +- toolkit/models/score.py | 6 + toolkit/routes/audit/api.py | 30 ++- toolkit/routes/audit/dashboard.py | 9 +- .../audit/lighthouse/lighthouse_all.jinja2 | 185 ++++++++++-------- toolkit/templates/base.jinja2 | 8 +- 6 files changed, 158 insertions(+), 93 deletions(-) diff --git a/toolkit/celeryapp/tasks.py b/toolkit/celeryapp/tasks.py index 4568ede..6f74f74 100644 --- a/toolkit/celeryapp/tasks.py +++ b/toolkit/celeryapp/tasks.py @@ -11,15 +11,15 @@ from sqlalchemy import update -@task_prerun.connect -def celery_prerun(*args, **kwargs): - #print g - print("HHHHHHHHHHHHHHHH") +# @task_prerun.connect +# def celery_prerun(*args, **kwargs): +# #print g +# print("Launching Celery App") @celery.task(bind=True,name="Lighthouse") def LighthouseAudit(self,url): new_score = LighthouseScore( - url = url,status_job="RUNNING", accessibility=0,pwa=0,seo=0, best_practices=0,performance=0, begin_date=datetime.now() + url = url,status_job="RUNNING",task_id=str(self.request.id), accessibility=0,pwa=0,seo=0, best_practices=0,performance=0, begin_date=datetime.now() ) db.session.add(new_score) db.session.commit() @@ -32,5 +32,4 @@ def LighthouseAudit(self,url): conn = db.engine.connect() smt = update(LighthouseScore).where(LighthouseScore.url == url).values(accessibility=accessibility,pwa=pwa,seo=seo, best_practices=best_practices,performance=performance, status_job="FINISHED") conn.execute(smt) - return {'current': 100, 'total': 100, 'status': 'Task completed!', - 'result': 42} + return {'url': url, 'status': 'Task completed!'} diff --git a/toolkit/models/score.py b/toolkit/models/score.py index d47fcb0..ee98b8a 100644 --- a/toolkit/models/score.py +++ b/toolkit/models/score.py @@ -52,6 +52,12 @@ class LighthouseScore(dbAlchemy.Model): nullable=True, default="FINISHED" ) + task_id = dbAlchemy.Column( + dbAlchemy.String(40), + index=False, + unique=False, + nullable=True, + ) begin_date = dbAlchemy.Column( dbAlchemy.DateTime, index=False, diff --git a/toolkit/routes/audit/api.py b/toolkit/routes/audit/api.py index 681c239..ae0e818 100644 --- a/toolkit/routes/audit/api.py +++ b/toolkit/routes/audit/api.py @@ -63,21 +63,47 @@ def post_audit_lighthouse_score(): print(e) return generate_answer(success=False) +@app.route('/api/audit/lighthouse/score/status', methods=["POST"]) +def get_audit_status_by_task(): + try: + task_id = request.form['task'] + result = LighthouseScore.query.filter(LighthouseScore.task_id == task_id).first() + if result and result.status_job == "FINISHED": + return generate_answer(success=True) + else: + return generate_answer(success=False) + except Exception as e: + print(e) + return generate_answer(success=False) + @app.route('/api/audit/lighthouse/score') def get_all_audit_lighthouse_score(): try: LS = LighthouseScore - quer = db.session.query(LS.id,LS.url, LS.accessibility, LS.pwa, LS.seo, LS.best_practices, LS.performance,LS.status_job, func.max(LS.begin_date).label('begin_date')).group_by(LS.url) + quer = db.session.query(LS.id,LS.url, LS.accessibility, LS.pwa, LS.seo, LS.best_practices, LS.performance,LS.status_job, LS.task_id, func.max(LS.begin_date).label('begin_date')).group_by(LS.url) results = quer.all() result_arr={"results": [], "google_error":False} if app.config['GOOGLE_API_KEY'] == "None": result_arr["google_error"] = True for i in results: - result_arr["results"].append({"id": i.id, "url": i.url, "accessibility": i.accessibility, "pwa": i.pwa, "seo": i.seo, "best_practices": i.best_practices, "performance": i.performance,"status_job": i.status_job, "begin_date": i.begin_date}) + result_arr["results"].append({"id": i.id, "url": i.url, "accessibility": i.accessibility, "pwa": i.pwa, "seo": i.seo, "best_practices": i.best_practices, "performance": i.performance,"status_job": i.status_job,"task_id": i.task_id, "begin_date": i.begin_date}) return generate_answer(data=result_arr) except Exception as e: print(e) return generate_answer(success=False) + +@app.route('/api/audit/lighthouse/score/delete', methods=["POST"]) +def post_delete_lighthouse_score(): + try: + id = request.form['id'] + + result = LighthouseScore().query.filter(LighthouseScore.id == id).first() + LighthouseScore().query.filter(LighthouseScore.url == result.url).delete() + db.session.commit() + return generate_answer(success=True) + except Exception as e: + print(e) + return generate_answer(success=False) @app.route('/api/audit/lighthouse/score/', methods=["GET"]) def get_audit_lighthouse_score_by_id(id): diff --git a/toolkit/routes/audit/dashboard.py b/toolkit/routes/audit/dashboard.py index b0e0de0..c7cf520 100644 --- a/toolkit/routes/audit/dashboard.py +++ b/toolkit/routes/audit/dashboard.py @@ -1,5 +1,6 @@ import json import math +import time from datetime import datetime from flask import current_app as app @@ -21,6 +22,7 @@ def audit_home(): @app.route('/audit/lighthouse/score', methods=["POST"]) def add_audit_lighthouse_score(): result = post_request_api('/api/audit/lighthouse/score', request.form) + time.sleep(.300) return redirect(url_for('dashboard_audit_lighthouse_score')) @app.route('/audit/lighthouse/score') @@ -36,7 +38,12 @@ def dashboard_audit_lighthouse_score_get_id(id): return render_template("audit/lighthouse/lighthouse.jinja2", url=result["url"], id=result["id"], result=result["results"], seo_list=result["table"]["seo_list"], accessibility_list=result["table"]["accessibility_list"],pwa_list=result["table"]["pwa_list"], best_list=result["table"]["best_list"], performance_list=result["table"]["performance_list"], labels=result["table"]["labels"]) - +@app.route('/audit/lighthouse/score/delete', methods=["GET"]) +def delete_audit_lighthouse(): + id = request.args.get('id') + print("HHHHHHHHHHHHHHHHHHHHH") + result = post_request_api('/api/audit/lighthouse/score/delete', {"id": id}) + return redirect(url_for('dashboard_audit_lighthouse_score')) @app.route('/audit/lighthouse/score/all') def dashboard_audit_lighthouse_score_all(): diff --git a/toolkit/templates/audit/lighthouse/lighthouse_all.jinja2 b/toolkit/templates/audit/lighthouse/lighthouse_all.jinja2 index d0d60d7..51954fa 100644 --- a/toolkit/templates/audit/lighthouse/lighthouse_all.jinja2 +++ b/toolkit/templates/audit/lighthouse/lighthouse_all.jinja2 @@ -19,108 +19,131 @@ {% endif %}
- - - - - - - - - - - - - - - {% for item in result %} +
URLAccessibilityBest PracticesPerformancesSEOPWAUpdate Time Update
+ + + + + + + + + + + + + + {% for item in result %} - + - - - {%if item.status_job == "FINISHED" %} - - + - + - + - + - {% else %} - - {% endif %} - - + {% else %} + - - {% endfor %} - + + + {% endif %} + + + +{% endfor %} +
URLAccessibilityBest PracticesPerformancesSEOPWAUpdate Time Update
- + + {{item.url}} - - -
+ + + {%if item.status_job == "FINISHED" %} +
+ +
{{item.accessibility}}
-
-
-
+
+
+
-
-
-
+
+
+
{{item.best_practices}}
-
-
-
+
+
+
-
-
-
+
+
+
{{item.performance}}
-
-
-
+
+
+
-
- -
-
+
+ +
+
{{item.seo}}
-
-
-
+
+
+
-
- -
-
+
+ +
+
{{item.pwa}}
-
-
-
+
+
+
-
- -
-
- {{item.status_job}} - -
+ - {{item.begin_date|formatdatetime}} - -
- - - - +
+
+ Loading...
- -
+ + {{item.begin_date|formatdatetime}} + +
+ + + + + +
+
+ + + + + +
+
{% endblock %} \ No newline at end of file diff --git a/toolkit/templates/base.jinja2 b/toolkit/templates/base.jinja2 index 3d1590d..2cec85b 100644 --- a/toolkit/templates/base.jinja2 +++ b/toolkit/templates/base.jinja2 @@ -18,6 +18,8 @@ + + @@ -121,8 +123,9 @@
- - + + + @@ -130,6 +133,7 @@ + {% block script_js%} {% endblock%} From 8366300c21800f1a8c82677d9dab0adaf2dc4cc4 Mon Sep 17 00:00:00 2001 From: StanGirard Date: Thu, 23 Jul 2020 13:01:22 +0200 Subject: [PATCH 08/13] [REQUIREMENTS] Redis --- requirements.txt | 1 + run.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 0ff9b3b..7c03f24 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,6 +13,7 @@ numpy==1.18.1 pandas==1.0.1 python-dotenv==0.14.0 requests==2.20.0 +redis==3.5.3 scikit_learn==0.23.1 scipy==1.4.0 seaborn==0.10.1 diff --git a/run.py b/run.py index a263bfc..824907f 100644 --- a/run.py +++ b/run.py @@ -3,4 +3,4 @@ if __name__ == "__main__": app = factory.create_app(celery=toolkit.celery) - app.run() \ No newline at end of file + app.run(host='0.0.0.0') \ No newline at end of file From 7e489ebfdee124e80a0725c909e17eabcd17e96f Mon Sep 17 00:00:00 2001 From: StanGirard Date: Thu, 23 Jul 2020 14:00:09 +0200 Subject: [PATCH 09/13] [CELERY] Graphs Internal Links --- toolkit/celeryapp/tasks.py | 6 ++ toolkit/controller/graphs/core.py | 80 ++++++++++++++-------- toolkit/models/graphs.py | 6 ++ toolkit/routes/graphs/api.py | 68 +++++------------- toolkit/routes/graphs/dashboard.py | 1 - toolkit/templates/graphs/graphs_all.jinja2 | 20 +++++- 6 files changed, 100 insertions(+), 81 deletions(-) diff --git a/toolkit/celeryapp/tasks.py b/toolkit/celeryapp/tasks.py index 6f74f74..0ce2762 100644 --- a/toolkit/celeryapp/tasks.py +++ b/toolkit/celeryapp/tasks.py @@ -6,6 +6,7 @@ from toolkit import celery from toolkit import dbAlchemy as db from toolkit.controller.seo.lighthouse import audit_google_lighthouse_full +from toolkit.controller.graphs.core import generate_interactive_graph from toolkit.models import LighthouseScore from celery.signals import worker_process_init, task_prerun from sqlalchemy import update @@ -33,3 +34,8 @@ def LighthouseAudit(self,url): smt = update(LighthouseScore).where(LighthouseScore.url == url).values(accessibility=accessibility,pwa=pwa,seo=seo, best_practices=best_practices,performance=performance, status_job="FINISHED") conn.execute(smt) return {'url': url, 'status': 'Task completed!'} + +@celery.task(bind=True,name="Graphs") +def GraphsGenerate(self,domain): + result = generate_interactive_graph(domain,str(self.request.id), False, 500) + return {'url': domain, 'status': 'Task completed!'} diff --git a/toolkit/controller/graphs/core.py b/toolkit/controller/graphs/core.py index 3db62da..163a32b 100644 --- a/toolkit/controller/graphs/core.py +++ b/toolkit/controller/graphs/core.py @@ -1,22 +1,26 @@ +import logging import math +import urllib +import urllib.parse from datetime import datetime, timedelta -from bokeh.embed import components -import urllib.parse import networkx as nx +import seaborn as sns +from bokeh.embed import components +from bokeh.layouts import row +from bokeh.models import (BoxZoomTool, Circle, ColorBar, ColumnDataSource, + DataTable, HoverTool, MultiLine, Range1d, ResetTool, + TableColumn) +from bokeh.models.graphs import NodesAndLinkedEdges +from bokeh.palettes import Spectral4, Spectral6, Spectral8 from bokeh.plotting import figure, from_networkx -from bokeh.models import (BoxZoomTool, Circle, HoverTool, - MultiLine, Range1d, ResetTool, ColorBar, - ColumnDataSource, DataTable, TableColumn) from bokeh.transform import linear_cmap -from bokeh.palettes import Spectral4, Spectral8, Spectral6 -from bokeh.models.graphs import NodesAndLinkedEdges -from bokeh.layouts import row -from flask import render_template - +from flask import render_template, request +from sqlalchemy import update +from toolkit import dbAlchemy as db +from toolkit.lib.api_tools import generate_answer from toolkit.lib.http_tools import request_parse, request_status_code -import seaborn as sns -import logging +from toolkit.models import Graphs palette = sns.color_palette("hls", 99) pal_hex_lst = palette.as_hex() @@ -153,33 +157,49 @@ def update_or_insert_graph_in_db(conn, urls, maximum, update=False): return render_template("graphs/bokeh.jinja2", script=script, div=div, domain=domain, template="Flask", time=datetime.now().strftime("%m/%d/%Y, %H:%M:%S")) -def generate_interactive_graph(conn, urls, relaunch, maxi_urls): + + + + +def update_or_insert_graph_in_db( urls, maximum, updating=False): + plot, domain = generate_graph_internal_link_interactive(urls, maximum) + script, div = components(plot) + conn = db.engine.connect() + smt = update(Graphs).where(Graphs.urls == urls).values(script= script, + div = div, begin_date=datetime.now(), status_job="FINISHED") + conn.execute(smt) + return render_template("graphs/bokeh.jinja2", script=script, div=div, domain=domain, template="Flask", time=datetime.now()) + +def generate_interactive_graph(urls, relaunch,task, maxi_urls): if urls is None: return "Empty Url paramaters" maximum_urls = 500 if maxi_urls is not None: maximum_urls = int(maxi_urls) - stopped, already_exists = graphs.check_status_url(conn, urls, "FINISHED") - - if stopped == True: - - # If not first time - if already_exists: - query_result = graphs.select_visited(conn, urls) + urls_exists = Graphs.query.filter(Graphs.urls == urls).count() + if urls_exists > 0: + stopped = Graphs.query.filter(Graphs.urls == urls and Graphs.status_job == "RUNNING").first() + if stopped.status_job == "FINISHED": + query_result = Graphs.query.filter(Graphs.urls == urls and Graphs.status_job == "RUNNING").first() # ALREADY VISITED IN THE LAST 24 HOURS - if datetime.strptime(query_result[0][2], '%m/%d/%Y, %H:%M:%S') + timedelta(hours=24) > datetime.now() and relaunch != "True": - return render_template("graphs/bokeh.jinja2", script=query_result[0][3], div=query_result[0][4], domain=urllib.parse.urlparse(query_result[0][1]).netloc, template="Flask", time=datetime.strptime(query_result[0][2], '%m/%d/%Y, %H:%M:%S')) + if query_result.begin_date + timedelta(hours=24) > datetime.now() and relaunch != "True": + return render_template("graphs/bokeh.jinja2", script=query_result.script, div=query_result.div, domain=urllib.parse.urlparse(query_result.urls).netloc, template="Flask", time=query_result.begin_date) # More than 24 hours or parameter redo is True - if (datetime.strptime(query_result[0][2], '%m/%d/%Y, %H:%M:%S') + timedelta(hours=24) < datetime.now() or relaunch == "True"): - graphs.update_running_db(conn, ("RUNNING", urls)) - return update_or_insert_graph_in_db(conn, urls, maximum_urls, True) + if query_result.begin_date + timedelta(hours=24) < datetime.now() or relaunch == "True": + conn = db.engine.connect() + smt = update(Graphs).where(Graphs.urls == urls).values(status_job="RUNNING") + conn.execute(smt) + return update_or_insert_graph_in_db(urls, maximum_urls, True) - # If first time else: - graphs.insert_url_db(conn, (urls, datetime.now().strftime( - "%m/%d/%Y, %H:%M:%S"), "", "", "RUNNING")) - return update_or_insert_graph_in_db(conn, urls, maximum_urls) + return {"error": "You graph is being generated. Please wait"} + else: - return "JOB IS ALREADY RUNNING. PLEASE WAIT AND REFRESH." + new_graph = Graphs( + urls = urls, script="", div="", status_job = "RUNNING",task_id=task, begin_date=datetime.now() + ) + db.session.add(new_graph) + db.session.commit() + return update_or_insert_graph_in_db(urls, maximum_urls) diff --git a/toolkit/models/graphs.py b/toolkit/models/graphs.py index cd9ae4e..adfae05 100644 --- a/toolkit/models/graphs.py +++ b/toolkit/models/graphs.py @@ -32,6 +32,12 @@ class Graphs(dbAlchemy.Model): unique=False, nullable=True ) + task_id = dbAlchemy.Column( + dbAlchemy.String(40), + index=False, + unique=False, + nullable=True, + ) begin_date = dbAlchemy.Column( dbAlchemy.DateTime, index=False, diff --git a/toolkit/routes/graphs/api.py b/toolkit/routes/graphs/api.py index a09a9b7..f30036a 100644 --- a/toolkit/routes/graphs/api.py +++ b/toolkit/routes/graphs/api.py @@ -4,72 +4,29 @@ from datetime import datetime, timedelta from toolkit import dbAlchemy as db from toolkit.models import Graphs +import time import urllib from toolkit.controller.graphs.core import generate_graph_internal_link_interactive from toolkit.lib.api_tools import generate_answer +from toolkit.celeryapp.tasks import GraphsGenerate from sqlalchemy import update - -def update_or_insert_graph_in_db( urls, maximum, updating=False): - plot, domain = generate_graph_internal_link_interactive(urls, maximum) - script, div = components(plot) - conn = db.engine.connect() - smt = update(Graphs).where(Graphs.urls == urls).values(script= script, - div = div, begin_date=datetime.now(), status_job="FINISHED") - conn.execute(smt) - return render_template("graphs/bokeh.jinja2", script=script, div=div, domain=domain, template="Flask", time=datetime.now()) - -def generate_interactive_graph(urls, relaunch, maxi_urls): - if urls is None: - return "Empty Url paramaters" - maximum_urls = 500 - if maxi_urls is not None: - maximum_urls = int(maxi_urls) - urls_exists = Graphs.query.filter(Graphs.urls == urls).count() - if urls_exists > 0: - stopped = Graphs.query.filter(Graphs.urls == urls and Graphs.status_job == "RUNNING").first() - if stopped.status_job == "FINISHED": - query_result = Graphs.query.filter(Graphs.urls == urls and Graphs.status_job == "RUNNING").first() - # ALREADY VISITED IN THE LAST 24 HOURS - - if query_result.begin_date + timedelta(hours=24) > datetime.now() and relaunch != "True": - return render_template("graphs/bokeh.jinja2", script=query_result.script, div=query_result.div, domain=urllib.parse.urlparse(query_result.urls).netloc, template="Flask", time=query_result.begin_date) - - # More than 24 hours or parameter redo is True - if query_result.begin_date + timedelta(hours=24) < datetime.now() or relaunch == "True": - conn = db.engine.connect() - smt = update(Graphs).where(Graphs.urls == urls).values(status_job="RUNNING") - conn.execute(smt) - return update_or_insert_graph_in_db(urls, maximum_urls, True) - - else: - return {"error": "You graph is being generated. Please wait"} - - else: - new_graph = Graphs( - urls = urls, script="", div="", status_job = "RUNNING", begin_date=datetime.now() - ) - db.session.add(new_graph) - db.session.commit() - return update_or_insert_graph_in_db(urls, maximum_urls) - - @app.route('/api/graphs', methods=["POST", "GET"]) def get_post_graphs(): try: error = None if request.method == "POST": domain = request.form["domain"] - result = generate_interactive_graph(domain, False, 500) - if "error" in result: - error = result + result = GraphsGenerate.delay(domain) + time.sleep(0.3) results = Graphs.query.all() result_arr= {"results":[]} + print(result_arr) for i in results: - result_arr["results"].append({"id": i.id, "urls": i.urls, "status_job": i.status_job, "begin_date": i.begin_date}) + result_arr["results"].append({"id": i.id, "urls": i.urls, "status_job": i.status_job,"task_id": i.task_id, "begin_date": i.begin_date}) return generate_answer(data=result_arr) except Exception as e: print(e) @@ -96,3 +53,16 @@ def post_delete_graph(): except Exception as e: print(e) return generate_answer(success=False) + +@app.route('/api/graphs/status', methods=["POST"]) +def get_graphs_status_by_task(): + try: + task_id = request.form['task'] + result = Graphs.query.filter(Graphs.task_id == task_id).first() + if result and result.status_job == "FINISHED": + return generate_answer(success=True) + else: + return generate_answer(success=False) + except Exception as e: + print(e) + return generate_answer(success=False) \ No newline at end of file diff --git a/toolkit/routes/graphs/dashboard.py b/toolkit/routes/graphs/dashboard.py index 0cd949a..7b6c638 100644 --- a/toolkit/routes/graphs/dashboard.py +++ b/toolkit/routes/graphs/dashboard.py @@ -5,7 +5,6 @@ from toolkit import dbAlchemy from toolkit.models import Graphs -from toolkit.routes.graphs.api import generate_interactive_graph from toolkit.lib.api_tools import post_request_api, get_request_api diff --git a/toolkit/templates/graphs/graphs_all.jinja2 b/toolkit/templates/graphs/graphs_all.jinja2 index e14083d..5edfc3e 100644 --- a/toolkit/templates/graphs/graphs_all.jinja2 +++ b/toolkit/templates/graphs/graphs_all.jinja2 @@ -40,7 +40,25 @@ {% if item.status_job == "RUNNING" %} - {{item["status_job"]}} {% else %} + {{item["status_job"]}} + + + {% else %} {{item["status_job"]}} {% endif %} From a618c8d67f9dc06bee2aa6c4c2aa9ffa6503ae28 Mon Sep 17 00:00:00 2001 From: StanGirard Date: Thu, 23 Jul 2020 17:00:02 +0200 Subject: [PATCH 10/13] [CELERY][SERP] Added celery --- toolkit/celeryapp/tasks.py | 6 +++ toolkit/controller/serp/__init__.py | 0 toolkit/controller/serp/core.py | 52 +++++++++++++++++++++++++ toolkit/models/serp.py | 12 ++++++ toolkit/routes/serp/api.py | 59 ++++++++++------------------- toolkit/routes/serp/dashboard.py | 1 - toolkit/templates/serp/rank.jinja2 | 24 +++++++++++- 7 files changed, 112 insertions(+), 42 deletions(-) create mode 100644 toolkit/controller/serp/__init__.py create mode 100644 toolkit/controller/serp/core.py diff --git a/toolkit/celeryapp/tasks.py b/toolkit/celeryapp/tasks.py index 0ce2762..80229f6 100644 --- a/toolkit/celeryapp/tasks.py +++ b/toolkit/celeryapp/tasks.py @@ -7,6 +7,7 @@ from toolkit import dbAlchemy as db from toolkit.controller.seo.lighthouse import audit_google_lighthouse_full from toolkit.controller.graphs.core import generate_interactive_graph +from toolkit.controller.serp.core import query_domain_serp from toolkit.models import LighthouseScore from celery.signals import worker_process_init, task_prerun from sqlalchemy import update @@ -39,3 +40,8 @@ def LighthouseAudit(self,url): def GraphsGenerate(self,domain): result = generate_interactive_graph(domain,str(self.request.id), False, 500) return {'url': domain, 'status': 'Task completed!'} + +@celery.task(bind=True,name="SerpRank") +def SerpRank(self,query, domain, lang, tld): + result = query_domain_serp(query, domain, lang, tld, str(self.request.id)) + return {'url': domain, 'status': 'Task completed!'} diff --git a/toolkit/controller/serp/__init__.py b/toolkit/controller/serp/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/toolkit/controller/serp/core.py b/toolkit/controller/serp/core.py new file mode 100644 index 0000000..5d3f04e --- /dev/null +++ b/toolkit/controller/serp/core.py @@ -0,0 +1,52 @@ +from datetime import datetime, timedelta +from urllib.parse import urlparse + +from flask import current_app as app +from flask import request +from sqlalchemy import update + +from toolkit import dbAlchemy as db +from toolkit.controller.seo.rank import rank +from toolkit.lib.api_tools import generate_answer +from toolkit.models import Serp + + +def query_domain_serp( query, domain, lang, tld,task): + domain = urlparse(domain).netloc + urlparse(domain).path + if query and domain: + existing_serp_count= Serp.query.filter( + Serp.query_text == query and Serp.domain == domain + ).count() + + if existing_serp_count > 0: + existing_serp = Serp.query.filter( + Serp.query_text == query and Serp.domain == domain + ).all() + if existing_serp[0].begin_date + timedelta(hours=24) < datetime.now(): + conn = db.engine.connect() + smt = update(Serp).where(Serp.query_text==query and Serp.domain==domain).values(begin_date=datetime.now(),url=result["url"], status_job="RUNNING", pos=result["pos"], task_id=task) + conn.execute(smt) + result = rank(domain, query, lang=lang, tld=tld) + smt = update(Serp).where(Serp.query_text==query and Serp.domain==domain).values(begin_date=datetime.now(),url=result["url"], status_job="FINISHED", pos=result["pos"]) + conn.execute(smt) + return result + else: + return {"pos": existing_serp[0].pos, "url": existing_serp[0].url, "query": existing_serp[0].query_text} + + all_results_count = Serp.query.order_by(Serp.begin_date.desc()).count() + if all_results_count >= 5: + all_results = Serp.query.order_by(Serp.begin_date.desc()).all() + if all_results[4].begin_date+ timedelta(hours=1) > datetime.now(): + waiting = datetime.now() - all_results[4].begin_date + secs = 3600 - int(waiting.total_seconds()) + minutes = int(secs / 60) % 60 + return {"limit": "Imposing a limit of 5 query per hour to avoid Google Ban", "waiting_time": str(minutes) + "m " + str(int(secs % 60)) + "s" } + + new_result = Serp(query_text=query,pos=-20, domain=domain, url=None, begin_date=datetime.now(), task_id=task, status_job="RUNNING" ) + db.session.add(new_result) + db.session.commit() + result = rank(domain, query, lang=lang, tld=tld) + conn = db.engine.connect() + smt = update(Serp).where(Serp.query_text==query and Serp.domain==domain).values(begin_date=datetime.now(),url=result["url"], status_job="FINISHED", pos=result["pos"]) + conn.execute(smt) + return result \ No newline at end of file diff --git a/toolkit/models/serp.py b/toolkit/models/serp.py index 526fe14..1c01749 100644 --- a/toolkit/models/serp.py +++ b/toolkit/models/serp.py @@ -32,6 +32,18 @@ class Serp(dbAlchemy.Model): unique=False, nullable=True ) + status_job = dbAlchemy.Column( + dbAlchemy.String(20), + index=False, + unique=False, + nullable=True + ) + task_id = dbAlchemy.Column( + dbAlchemy.String(40), + index=False, + unique=False, + nullable=True, + ) begin_date = dbAlchemy.Column( dbAlchemy.DateTime, index=False, diff --git a/toolkit/routes/serp/api.py b/toolkit/routes/serp/api.py index c81bbbe..5187d04 100644 --- a/toolkit/routes/serp/api.py +++ b/toolkit/routes/serp/api.py @@ -1,5 +1,6 @@ from datetime import datetime, timedelta from urllib.parse import urlparse +import time from flask import current_app as app from flask import request @@ -9,41 +10,7 @@ from toolkit.controller.seo.rank import rank from toolkit.lib.api_tools import generate_answer from toolkit.models import Serp - - -def query_domain_serp( query, domain, lang, tld): - domain = urlparse(domain).netloc + urlparse(domain).path - if query and domain: - existing_serp_count= Serp.query.filter( - Serp.query_text == query and Serp.domain == domain - ).count() - - if existing_serp_count > 0: - existing_serp = Serp.query.filter( - Serp.query_text == query and Serp.domain == domain - ).all() - if existing_serp[0].begin_date + timedelta(hours=24) < datetime.now(): - result = rank(domain, query, lang=lang, tld=tld) - update(Serp).where(Serp.query_text==query and Serp.domain==domain).values(begin_date=datetime.now(),url=result["url"], pos=result["pos"]) - return result - else: - return {"pos": existing_serp[0].pos, "url": existing_serp[0].url, "query": existing_serp[0].query_text} - - all_results_count = Serp.query.order_by(Serp.begin_date.desc()).count() - if all_results_count >= 5: - all_results = Serp.query.order_by(Serp.begin_date.desc()).all() - if all_results[4].begin_date+ timedelta(hours=1) > datetime.now(): - waiting = datetime.now() - all_results[4].begin_date - secs = 3600 - int(waiting.total_seconds()) - minutes = int(secs / 60) % 60 - return {"limit": "Imposing a limit of 5 query per hour to avoid Google Ban", "waiting_time": str(minutes) + "m " + str(int(secs % 60)) + "s" } - - result = rank(domain, query, lang=lang, tld=tld) - new_result = Serp(query_text=result["query"],pos=result["pos"], domain=domain, url=result["url"], begin_date=datetime.now() ) - db.session.add(new_result) - db.session.commit() - return result - +from toolkit.celeryapp.tasks import SerpRank @app.route('/api/rank', methods=["POST", "GET"]) def get_post_rank(): @@ -54,14 +21,12 @@ def get_post_rank(): domain = request.form["domain"] if not (domain.startswith('//') or domain.startswith('http://') or domain.startswith('https://')): domain = '//' + domain - result = query_domain_serp( query, domain, "en", "com") - - if result and "limit" in result: - error = result + result = SerpRank.delay(query, domain, "en", "com") + time.sleep(.300) result = Serp.query.order_by(Serp.begin_date.desc()).all() result_list = {"results": [], "error": error} for i in result: - result_list["results"].append({"id": i.id, "domain": i.domain, "pos": i.pos, "url": i.pos, "query": i.query_text, "time": i.begin_date}) + result_list["results"].append({"id": i.id, "domain": i.domain, "pos": i.pos, "url": i.pos, "query": i.query_text, "time": i.begin_date, "status_job": i.status_job, "task_id": i.task_id}) return generate_answer(data=result_list) except Exception as e: print(e) @@ -77,3 +42,17 @@ def post_delete_rank(): except Exception as e: print(e) return generate_answer(success=False) + + +@app.route('/api/rank/status', methods=["POST"]) +def get_rank_status_by_task(): + try: + task_id = request.form['task'] + result = Serp.query.filter(Serp.task_id == task_id).first() + if result and result.status_job == "FINISHED": + return generate_answer(success=True) + else: + return generate_answer(success=False) + except Exception as e: + print(e) + return generate_answer(success=False) \ No newline at end of file diff --git a/toolkit/routes/serp/dashboard.py b/toolkit/routes/serp/dashboard.py index c03319a..f9a1120 100644 --- a/toolkit/routes/serp/dashboard.py +++ b/toolkit/routes/serp/dashboard.py @@ -7,7 +7,6 @@ from toolkit import dbAlchemy from toolkit.models import Serp -from toolkit.routes.serp.api import query_domain_serp from toolkit.lib.api_tools import post_request_api, get_request_api diff --git a/toolkit/templates/serp/rank.jinja2 b/toolkit/templates/serp/rank.jinja2 index fe4fdfa..aa4b007 100644 --- a/toolkit/templates/serp/rank.jinja2 +++ b/toolkit/templates/serp/rank.jinja2 @@ -39,7 +39,29 @@ {{item.query}} - {{item.pos}} + {% if item.status_job == "FINISHED" %} + {{item.pos}} + {% else %} +
+ Loading... +
+ + {% endif%}
From ba642a1b11342101ac04c781f60c34310dc9fd6d Mon Sep 17 00:00:00 2001 From: StanGirard Date: Thu, 23 Jul 2020 22:40:36 +0200 Subject: [PATCH 11/13] [CELERY][KEYWORDS] Added task --- toolkit/celeryapp/tasks.py | 7 +++ toolkit/controller/keywords/__init__.py | 0 toolkit/controller/keywords/core.py | 30 +++++++++++++ toolkit/models/keywords.py | 6 +++ toolkit/routes/graphs/api.py | 5 ++- toolkit/routes/keywords/api.py | 44 ++++++++----------- toolkit/routes/keywords/dashboard.py | 1 - .../templates/keywords/keywords_all.jinja2 | 19 +++++++- 8 files changed, 84 insertions(+), 28 deletions(-) create mode 100644 toolkit/controller/keywords/__init__.py create mode 100644 toolkit/controller/keywords/core.py diff --git a/toolkit/celeryapp/tasks.py b/toolkit/celeryapp/tasks.py index 80229f6..29b8bd7 100644 --- a/toolkit/celeryapp/tasks.py +++ b/toolkit/celeryapp/tasks.py @@ -8,6 +8,7 @@ from toolkit.controller.seo.lighthouse import audit_google_lighthouse_full from toolkit.controller.graphs.core import generate_interactive_graph from toolkit.controller.serp.core import query_domain_serp +from toolkit.controller.keywords.core import get_query_results from toolkit.models import LighthouseScore from celery.signals import worker_process_init, task_prerun from sqlalchemy import update @@ -45,3 +46,9 @@ def GraphsGenerate(self,domain): def SerpRank(self,query, domain, lang, tld): result = query_domain_serp(query, domain, lang, tld, str(self.request.id)) return {'url': domain, 'status': 'Task completed!'} + +@celery.task(bind=True,name="Keywords") +def KeywordsGet(self,query): + result = get_query_results(query, str(self.request.id)) + return {'url': query, 'status': 'Task completed!'} + diff --git a/toolkit/controller/keywords/__init__.py b/toolkit/controller/keywords/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/toolkit/controller/keywords/core.py b/toolkit/controller/keywords/core.py new file mode 100644 index 0000000..a14265d --- /dev/null +++ b/toolkit/controller/keywords/core.py @@ -0,0 +1,30 @@ +import json +from datetime import datetime + +from sqlalchemy import update + +from toolkit import dbAlchemy as db +from toolkit.controller.analysis.keywords import generate_results +from toolkit.models import Keywords +from toolkit.lib.api_tools import generate_answer + + +def get_query_results(query,task, redo=False): + check_exist = Keywords.query.filter(Keywords.query_text==query).count() + if check_exist > 0: + result = Keywords.query.filter(Keywords.query_text==query).first() + if result.status_job == "RUNNING": + return {"error": "query is already running, please wait and then refresh"} + elif result.status_job == "FINISHED": + return json.loads(result.results) + else: + new_keywords = Keywords(query_text=query, results="", + status_job="RUNNING",task_id=task,begin_date=datetime.now()) + db.session.add(new_keywords) + db.session.commit() + results = generate_results(query, 20) + conn = db.engine.connect() + smt = update(Keywords).where(Keywords.query_text==query).values(results=json.dumps(results), status_job="FINISHED") + conn.execute(smt) + return results + return "error" diff --git a/toolkit/models/keywords.py b/toolkit/models/keywords.py index 35bb1c2..482d7dd 100644 --- a/toolkit/models/keywords.py +++ b/toolkit/models/keywords.py @@ -20,6 +20,12 @@ class Keywords(dbAlchemy.Model): unique=False, nullable=False ) + task_id = dbAlchemy.Column( + dbAlchemy.String(40), + index=False, + unique=False, + nullable=True, + ) status_job = dbAlchemy.Column( dbAlchemy.String(20), index=False, diff --git a/toolkit/routes/graphs/api.py b/toolkit/routes/graphs/api.py index f30036a..11bfa6f 100644 --- a/toolkit/routes/graphs/api.py +++ b/toolkit/routes/graphs/api.py @@ -20,7 +20,10 @@ def get_post_graphs(): error = None if request.method == "POST": domain = request.form["domain"] - result = GraphsGenerate.delay(domain) + if domain.startswith("https://") or domain.startswith("http://"): + result = GraphsGenerate.delay(domain) + else: + result = GraphsGenerate.delay("https://" + domain) time.sleep(0.3) results = Graphs.query.all() result_arr= {"results":[]} diff --git a/toolkit/routes/keywords/api.py b/toolkit/routes/keywords/api.py index 8a9bd12..daa7a9d 100644 --- a/toolkit/routes/keywords/api.py +++ b/toolkit/routes/keywords/api.py @@ -1,5 +1,6 @@ import json from datetime import datetime +import time from flask import current_app as app from flask import request @@ -9,29 +10,7 @@ from toolkit.controller.analysis.keywords import generate_results from toolkit.models import Keywords from toolkit.lib.api_tools import generate_answer - - -def get_query_results(query, redo=False): - check_exist = Keywords.query.filter(Keywords.query_text==query).count() - if check_exist > 0: - result = Keywords.query.filter(Keywords.query_text==query).first() - if result.status_job == "RUNNING": - return {"error": "query is already running, please wait and then refresh"} - elif result.status_job == "FINISHED": - return json.loads(result.results) - else: - new_keywords = Keywords(query_text=query, results="", - status_job="RUNNING",begin_date=datetime.now()) - db.session.add(new_keywords) - db.session.commit() - results = generate_results(query, 20) - conn = db.engine.connect() - smt = update(Keywords).where(Keywords.query_text==query).values(results=json.dumps(results), status_job="FINISHED") - conn.execute(smt) - - #Serp.update().where(query_text==query and domain==domain).values(begin_date=datetime.now(),url=result["url"], pos=result["pos"]) - return results - return "error" +from toolkit.celeryapp.tasks import KeywordsGet @app.route('/api/keywords', methods=["POST", "GET"]) @@ -39,11 +18,13 @@ def get_post_keywords(): try: if request.method == "POST": query = request.form["query"] - get_query_results(query) + result = KeywordsGet.delay(query) + time.sleep(.300) + keyw = Keywords.query.all() results = {"results":[]} for keyword in keyw: - results["results"].append({"id":keyword.id,"query": keyword.query_text, "status_job": keyword.status_job}) + results["results"].append({"id":keyword.id,"query": keyword.query_text, "status_job": keyword.status_job,"task_id": keyword.task_id}) return generate_answer(data=results) except Exception as e: print(e) @@ -72,4 +53,17 @@ def get_keywords_by_id(id): print(e) return generate_answer(success=False) +@app.route('/api/keywords/status', methods=["POST"]) +def get_keywords_status_by_task(): + try: + task_id = request.form['task'] + result = Keywords.query.filter(Keywords.task_id == task_id).first() + if result and result.status_job == "FINISHED": + return generate_answer(success=True) + else: + return generate_answer(success=False) + except Exception as e: + print(e) + return generate_answer(success=False) + diff --git a/toolkit/routes/keywords/dashboard.py b/toolkit/routes/keywords/dashboard.py index 52d4a95..37d2fd3 100644 --- a/toolkit/routes/keywords/dashboard.py +++ b/toolkit/routes/keywords/dashboard.py @@ -5,7 +5,6 @@ from toolkit import dbAlchemy from toolkit.models import Keywords -from toolkit.routes.keywords.api import get_query_results from toolkit.lib.api_tools import post_request_api, get_request_api diff --git a/toolkit/templates/keywords/keywords_all.jinja2 b/toolkit/templates/keywords/keywords_all.jinja2 index 0f0406f..41112a1 100644 --- a/toolkit/templates/keywords/keywords_all.jinja2 +++ b/toolkit/templates/keywords/keywords_all.jinja2 @@ -33,7 +33,24 @@ {% if item.status_job == "RUNNING" %} - {{item["status_job"]}} {% else %} + {{item["status_job"]}} + + {% else %} {{item["status_job"]}} {% endif %} From 0e9ba530d6d71798ee1773a105a72e15f93d02f3 Mon Sep 17 00:00:00 2001 From: StanGirard Date: Fri, 24 Jul 2020 00:02:10 +0200 Subject: [PATCH 12/13] [CELERY][EXTRACT] Added extract --- toolkit/celeryapp/tasks.py | 48 +++++++++++++--- toolkit/models/audit.py | 14 ++++- toolkit/routes/extract/api.py | 55 +++++++++---------- .../headers/extract_headers_all.jinja2 | 23 +++++++- .../extract/images/images_all.jinja2 | 22 ++++++++ .../templates/extract/links/links_all.jinja2 | 22 ++++++++ .../links_website/links_all_website.jinja2 | 23 +++++++- 7 files changed, 169 insertions(+), 38 deletions(-) diff --git a/toolkit/celeryapp/tasks.py b/toolkit/celeryapp/tasks.py index 29b8bd7..30edfea 100644 --- a/toolkit/celeryapp/tasks.py +++ b/toolkit/celeryapp/tasks.py @@ -1,18 +1,22 @@ +import math import time from datetime import datetime -import math +import json +from celery.signals import task_prerun, worker_process_init from flask import current_app as app +from sqlalchemy import update from toolkit import celery from toolkit import dbAlchemy as db -from toolkit.controller.seo.lighthouse import audit_google_lighthouse_full from toolkit.controller.graphs.core import generate_interactive_graph -from toolkit.controller.serp.core import query_domain_serp from toolkit.controller.keywords.core import get_query_results -from toolkit.models import LighthouseScore -from celery.signals import worker_process_init, task_prerun -from sqlalchemy import update - +from toolkit.controller.seo.audit import get_all_links_website +from toolkit.controller.seo.headers import find_all_headers_url +from toolkit.controller.seo.images import find_all_images +from toolkit.controller.seo.lighthouse import audit_google_lighthouse_full +from toolkit.controller.seo.links import find_all_links +from toolkit.controller.serp.core import query_domain_serp +from toolkit.models import Audit, LighthouseScore # @task_prerun.connect # def celery_prerun(*args, **kwargs): @@ -52,3 +56,33 @@ def KeywordsGet(self,query): result = get_query_results(query, str(self.request.id)) return {'url': query, 'status': 'Task completed!'} +@celery.task(bind=True,name="Extract") +def Extractor(self,extract_type, url): + new_audit = Audit( + url=url, result=None, type_audit=extract_type,status_job="RUNNING",task_id=str(self.request.id), begin_date=datetime.now() + ) + db.session.add(new_audit) + db.session.commit() + if extract_type == "Headers": + value = find_all_headers_url(url) + conn = db.engine.connect() + smt = update(Audit).where(Audit.url == url).where(Audit.type_audit == extract_type).values(result=json.dumps(value), status_job="FINISHED") + conn.execute(smt) + if extract_type == "Links": + value = find_all_links(url) + conn = db.engine.connect() + smt = update(Audit).where(Audit.url == url).where(Audit.type_audit == extract_type).values(result=json.dumps(value), status_job="FINISHED") + conn.execute(smt) + if extract_type == "Links_Website": + value = get_all_links_website(url) + conn = db.engine.connect() + smt = update(Audit).where(Audit.url == url).where(Audit.type_audit == extract_type).values(result=json.dumps(value), status_job="FINISHED") + conn.execute(smt) + if extract_type == "Images": + print("hello") + value = find_all_images(url) + conn = db.engine.connect() + smt = update(Audit).where(Audit.url == url).where(Audit.type_audit == extract_type).values(result=json.dumps(value), status_job="FINISHED") + conn.execute(smt) + + return {'url': url,"Extract": extract_type, 'status': 'Task completed!'} diff --git a/toolkit/models/audit.py b/toolkit/models/audit.py index 486ffe1..192900b 100644 --- a/toolkit/models/audit.py +++ b/toolkit/models/audit.py @@ -19,7 +19,7 @@ class Audit(dbAlchemy.Model): dbAlchemy.Text, index=False, unique=False, - nullable=False + nullable=True ) type_audit = dbAlchemy.Column( dbAlchemy.String(20), @@ -27,6 +27,18 @@ class Audit(dbAlchemy.Model): unique=False, nullable=True ) + task_id = dbAlchemy.Column( + dbAlchemy.String(40), + index=False, + unique=False, + nullable=True, + ) + status_job = dbAlchemy.Column( + dbAlchemy.String(20), + index=False, + unique=False, + nullable=True + ) begin_date = dbAlchemy.Column( dbAlchemy.DateTime, index=False, diff --git a/toolkit/routes/extract/api.py b/toolkit/routes/extract/api.py index 0918438..46ce4c7 100644 --- a/toolkit/routes/extract/api.py +++ b/toolkit/routes/extract/api.py @@ -1,5 +1,6 @@ import json from datetime import datetime +import time from flask import current_app as app from flask import redirect, render_template, request, url_for @@ -10,6 +11,7 @@ from toolkit.controller.seo.images import find_all_images from toolkit.controller.seo.links import find_all_links from toolkit.lib.api_tools import generate_answer +from toolkit.celeryapp.tasks import Extractor from toolkit.models import Audit @@ -20,7 +22,7 @@ def get_extract_headers_all(): result_arr = {"results": []} for i in results: result_arr["results"].append( - {"id": i.id, "url": i.url, "result": i.result, "begin_date": i.begin_date}) + {"id": i.id, "url": i.url, "result": i.result, "begin_date": i.begin_date, "task_id": i.task_id, "status_job": i.status_job}) return generate_answer(data=result_arr) except Exception as e: print(e) @@ -43,7 +45,7 @@ def get_extract_links_status_all(): result_arr = {"results": []} for i in results: result_arr["results"].append( - {"id": i.id, "url": i.url, "result": i.result, "begin_date": i.begin_date}) + {"id": i.id, "url": i.url, "result": i.result, "begin_date": i.begin_date, "task_id": i.task_id, "status_job": i.status_job}) return generate_answer(data=result_arr) except Exception as e: print(e) @@ -68,12 +70,8 @@ def post_extract_headers(): count = Audit.query.filter(Audit.url == url).filter( Audit.type_audit == "Headers").count() if url and count == 0: - value = find_all_headers_url(url) - new_audit = Audit( - url=url, result=json.dumps(value), type_audit="Headers", begin_date=datetime.now() - ) - db.session.add(new_audit) - db.session.commit() + Extractor.delay("Headers",url) + time.sleep(.300) return generate_answer(success=True) except Exception as e: print(e) @@ -97,12 +95,8 @@ def post_extract_add_links(): count = Audit.query.filter(Audit.url == url).filter( Audit.type_audit == "Links").count() if url and count == 0: - value = find_all_links(url) - new_audit = Audit( - url=url, result=json.dumps(value), type_audit="Links", begin_date=datetime.now() - ) - db.session.add(new_audit) - db.session.commit() + Extractor.delay("Links",url) + time.sleep(.300) return generate_answer(success=True) except Exception as e: print(e) @@ -126,7 +120,7 @@ def get_extract_all_links_website(): result_arr = {"results": []} for i in results: result_arr["results"].append( - {"id": i.id, "url": i.url, "result": i.result, "begin_date": i.begin_date}) + {"id": i.id, "url": i.url, "result": i.result, "begin_date": i.begin_date, "task_id": i.task_id, "status_job": i.status_job}) return generate_answer(data=result_arr) except Exception as e: print(e) @@ -139,12 +133,8 @@ def post_extract_add_links_website(): count = Audit.query.filter(Audit.url == url).filter( Audit.type_audit == "Links_Website").count() if url and count == 0: - value = get_all_links_website(url) - new_audit = Audit( - url=url, result=json.dumps(value), type_audit="Links_Website", begin_date=datetime.now() - ) - db.session.add(new_audit) - db.session.commit() + Extractor.delay("Links_Website",url) + time.sleep(.300) return generate_answer(success=True) except Exception as e: print(e) @@ -179,7 +169,7 @@ def get_extract_images_all(): result_arr = {"results":[]} for i in results: result_arr["results"].append( - {"id": i.id, "url": i.url, "result": i.result, "begin_date": i.begin_date}) + {"id": i.id, "url": i.url, "result": i.result, "begin_date": i.begin_date, "task_id": i.task_id, "status_job": i.status_job}) return generate_answer(data=result_arr) except Exception as e: print(e) @@ -192,12 +182,9 @@ def post_extract_add_images(): count = Audit.query.filter(Audit.url == url).filter( Audit.type_audit == "Images").count() if url and count == 0: - value = find_all_images(url) - new_audit = Audit( - url=url, result=json.dumps(value), type_audit="Images", begin_date=datetime.now() - ) - db.session.add(new_audit) - db.session.commit() + Extractor.delay("Images",url) + time.sleep(.300) + return generate_answer(success=True) except Exception as e: print(e) @@ -225,3 +212,15 @@ def post_delete_extract_image(): print(e) return generate_answer(success=False) +@app.route('/api/extract/status', methods=["POST"]) +def get_extract_status_by_task(): + try: + task_id = request.form['task'] + result = Audit.query.filter(Audit.task_id == task_id).first() + if result and result.status_job == "FINISHED": + return generate_answer(success=True) + else: + return generate_answer(success=False) + except Exception as e: + print(e) + return generate_answer(success=False) \ No newline at end of file diff --git a/toolkit/templates/extract/headers/extract_headers_all.jinja2 b/toolkit/templates/extract/headers/extract_headers_all.jinja2 index 09d5e1a..1f3ba12 100644 --- a/toolkit/templates/extract/headers/extract_headers_all.jinja2 +++ b/toolkit/templates/extract/headers/extract_headers_all.jinja2 @@ -17,6 +17,7 @@ URL + Status Update Time Update @@ -31,7 +32,27 @@ {{item.url}} - + + {% if item.status_job == "RUNNING" %} + {{item["status_job"]}} + + {% else %} + {{item["status_job"]}} {% endif %} + {{item.begin_date|formatdatetime}} diff --git a/toolkit/templates/extract/images/images_all.jinja2 b/toolkit/templates/extract/images/images_all.jinja2 index e719a30..2a3ba6f 100644 --- a/toolkit/templates/extract/images/images_all.jinja2 +++ b/toolkit/templates/extract/images/images_all.jinja2 @@ -17,6 +17,7 @@ URL + Status Update Time Update @@ -31,6 +32,27 @@ {{item.url}} + + {% if item.status_job == "RUNNING" %} + {{item["status_job"]}} + + {% else %} + {{item["status_job"]}} {% endif %} + diff --git a/toolkit/templates/extract/links/links_all.jinja2 b/toolkit/templates/extract/links/links_all.jinja2 index 19fb90d..0e19896 100644 --- a/toolkit/templates/extract/links/links_all.jinja2 +++ b/toolkit/templates/extract/links/links_all.jinja2 @@ -17,6 +17,7 @@ URL + Status Update Time Update @@ -31,6 +32,27 @@ {{item.url}} + + {% if item.status_job == "RUNNING" %} + {{item["status_job"]}} + + {% else %} + {{item["status_job"]}} {% endif %} + diff --git a/toolkit/templates/extract/links_website/links_all_website.jinja2 b/toolkit/templates/extract/links_website/links_all_website.jinja2 index c1443a8..e9cab44 100644 --- a/toolkit/templates/extract/links_website/links_all_website.jinja2 +++ b/toolkit/templates/extract/links_website/links_all_website.jinja2 @@ -18,6 +18,7 @@ URL + Status Update Time Update @@ -32,7 +33,27 @@ {{item.url}} - + + {% if item.status_job == "RUNNING" %} + {{item["status_job"]}} + + {% else %} + {{item["status_job"]}} {% endif %} + {{item.begin_date|formatdatetime}} From 99f15b1c1323691c61ec564240ba330a26d0453c Mon Sep 17 00:00:00 2001 From: StanGirard Date: Fri, 24 Jul 2020 00:06:39 +0200 Subject: [PATCH 13/13] [README] Celery --- README.md | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 403ee00..8221fbe 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,10 @@ I've grown tired of SEO agencies making us pay hundreds of euros for simple tool ## Installation -You need **Python3** +You need: +- **Python3** +- **[Redis Server](https://redis.io/topics/quickstart)** + ```Bash git clone https://github.com/StanGirard/SEOToolkit @@ -42,24 +45,25 @@ Then install dependencies pip install -r requirements.txt ``` -or you can use Docker +## Running +### Flask ```Bash -docker build -t seo-toolkit:latest . +python3 run.py ``` -## Running - +### Redis Server ```Bash -flask run +redis-server ``` -or with docker - +### Celery Worker ```Bash -docker run -d -p 5000:5000 seo-toolkit:latest +celery worker -A celery_worker.celery --loglevel=info ``` + + ## Dashboard You can access the dashboard by going to [localhost:5000](http://localhost:5000)