diff --git a/cernopendata/config.py b/cernopendata/config.py index c6dc05a..fcfaa0f 100644 --- a/cernopendata/config.py +++ b/cernopendata/config.py @@ -27,12 +27,17 @@ import os import warnings +from celery.schedules import timedelta from flask import request from invenio_records_files.api import _Record from invenio_records_rest.config import RECORDS_REST_ENDPOINTS from invenio_records_rest.facets import nested_filter, range_filter, terms_filter from invenio_records_rest.utils import allow_all from invenio_search.engine import dsl +from invenio_stats.aggregations import StatAggregator +from invenio_stats.contrib.config import EVENTS_CONFIG +from invenio_stats.queries import TermsQuery +from invenio_stats.tasks import StatsAggregationTask, StatsEventTask from urllib3.exceptions import InsecureRequestWarning from cernopendata.modules.pages.config import * @@ -133,6 +138,107 @@ # Celery CELERY_ACCEPT_CONTENT = ["json", "msgpack", "yaml"] +STATS_EVENTS = EVENTS_CONFIG + +STATS_AGGREGATIONS = { + "file-download-agg": { + "templates": "invenio_stats.contrib.aggregations.aggr_file_download", + "cls": StatAggregator, + "params": { + "index_interval": "year", + "copy_fields": { + "file_key": "file_key", + "bucket_id": "bucket_id", + "file_id": "file_id", + }, + "metric_fields": { + "unique_count": ( + "cardinality", + "unique_session_id", + {"precision_threshold": 1000}, + ), + "volume": ("sum", "size", {}), + }, + }, + }, + "record-view-agg": { + "templates": "invenio_stats.contrib.aggregations.aggr_record_view", + "cls": StatAggregator, + "params": { + "event": "record-view", + "field": "unique_id", + "interval": "day", + "index_interval": "year", + "copy_fields": { + "record_id": "record_id", + "pid_type": "pid_type", + "pid_value": "pid_value", + }, + "metric_fields": { + "unique_count": ( + "cardinality", + "unique_session_id", + {"precision_threshold": 1000}, + ), + }, + }, + }, +} + +STATS_QUERIES = { + "record-view": { + "cls": TermsQuery, + "permission_factory": None, + "params": { + "index": "stats-record-view", + "doc_type": "record-view-day-aggregation", + "copy_fields": { + "recid": "recid", + }, + "query_modifiers": [], + "required_filters": { + "recid": "recid", + }, + "metric_fields": { + "views": ("sum", "count", {}), + "unique_views": ("sum", "unique_count", {}), + }, + }, + }, + "record-download": { + "cls": TermsQuery, + "permission_factory": None, + "params": { + "index": "stats-file-download", + "doc_type": "file-download-day-aggregation", + "copy_fields": { + "recid": "recid", + }, + "query_modifiers": [], + "required_filters": { + "recid": "recid", + }, + "metric_fields": { + "downloads": ("sum", "count", {}), + "unique_downloads": ("sum", "unique_count", {}), + "data_volume": ("sum", "volume", {}), + }, + }, + }, +} + + +CELERY_BEAT_SCHEDULE = { + # indexing of statistics events & aggregations + "stats-process-events": { + **StatsEventTask, + "schedule": timedelta(minutes=5), # Every five minutes + }, + "stats-aggregate-events": { + **StatsAggregationTask, + "schedule": timedelta(minutes=30), # Every thirty minutes + }, +} # JSONSchemas JSONSCHEMAS_ENDPOINT = "/schema" JSONSCHEMAS_HOST = "opendata.cern.ch" diff --git a/cernopendata/modules/records/utils.py b/cernopendata/modules/records/utils.py index 6a57d1b..c82dde9 100644 --- a/cernopendata/modules/records/utils.py +++ b/cernopendata/modules/records/utils.py @@ -30,9 +30,11 @@ import six from flask import abort, current_app, jsonify, render_template, request from invenio_files_rest.models import FileInstance +from invenio_files_rest.signals import file_downloaded from invenio_files_rest.views import ObjectResource from invenio_records.api import Record from invenio_records_files.utils import record_file_factory +from invenio_records_ui.signals import record_viewed # from invenio_files_rest.models import FileInstance, ObjectVersion # from invenio_records.errors import MissingModelError @@ -108,7 +110,7 @@ def file_download_ui(pid, record, _record_file_factory=None, **kwargs): obj = fileobj.obj # Check permissions ObjectResource.check_object_permission(obj) - + file_downloaded.send(current_app._get_current_object(), obj=obj) return ObjectResource.send_object( obj.bucket, obj, @@ -202,6 +204,11 @@ def record_metadata_view(pid, record, template=None): record["dataset_semantics_header"] = ( ["variable", "type"] + sorted(optional) + ["description"] ) + record_viewed.send( + current_app._get_current_object(), + pid=pid, + record=record, + ) return render_template( [ @@ -226,6 +233,11 @@ def term_metadata_view(pid, record, template=None): def doc_metadata_view(pid, record, template=None): """Doc detail view.""" + record_viewed.send( + current_app._get_current_object(), + pid=pid, + record=record, + ) return render_template( ["cernopendata_records_ui/docs/detail.html"], pid=pid, diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index f503470..5a671a3 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -63,6 +63,4 @@ services: volumes: - ./cernopendata:/code/cernopendata - ./scripts:/code/scripts - - ./tests:/code/tests - profiles: - - donotstart \ No newline at end of file + - ./tests:/code/tests \ No newline at end of file diff --git a/setup.py b/setup.py index b468f7c..d597123 100644 --- a/setup.py +++ b/setup.py @@ -94,6 +94,7 @@ # Custom Invenio `files` bundle "invenio-previewer>=2.0.1,<3.0.0", "invenio-records-files>=1.2.1,<3.0.0", + "invenio-stats>=4.0.1,<5.0.0", "jupyter-client==7.1.0", "pluggy==0.13.1", # Custom Invenio `postgresql` bundle @@ -127,7 +128,6 @@ # Pin Flask/gevent/greenlet/raven to make master work again "Flask==2.2.5", "Flask-Alembic==2.0.1", - "flask-celeryext==0.4.0", "Werkzeug~=2.2.0", "gevent==23.9.1", "greenlet==3.0.3",