Skip to content

Commit

Permalink
stats: collect stats on record views and file downloads.
Browse files Browse the repository at this point in the history
  • Loading branch information
psaiz committed Jan 16, 2025
1 parent ca34a20 commit a70e406
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 5 deletions.
106 changes: 106 additions & 0 deletions cernopendata/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,17 @@
import os
import warnings

from celery.schedules import timedelta
from flask import request
from invenio_records_files.api import _Record
from invenio_records_rest.config import RECORDS_REST_ENDPOINTS
from invenio_records_rest.facets import nested_filter, range_filter, terms_filter
from invenio_records_rest.utils import allow_all
from invenio_search.engine import dsl
from invenio_stats.aggregations import StatAggregator
from invenio_stats.contrib.config import EVENTS_CONFIG
from invenio_stats.queries import TermsQuery
from invenio_stats.tasks import StatsAggregationTask, StatsEventTask
from urllib3.exceptions import InsecureRequestWarning

from cernopendata.modules.pages.config import *
Expand Down Expand Up @@ -133,6 +138,107 @@
# Celery
CELERY_ACCEPT_CONTENT = ["json", "msgpack", "yaml"]

STATS_EVENTS = EVENTS_CONFIG

STATS_AGGREGATIONS = {
"file-download-agg": {
"templates": "invenio_stats.contrib.aggregations.aggr_file_download",
"cls": StatAggregator,
"params": {
"index_interval": "year",
"copy_fields": {
"file_key": "file_key",
"bucket_id": "bucket_id",
"file_id": "file_id",
},
"metric_fields": {
"unique_count": (
"cardinality",
"unique_session_id",
{"precision_threshold": 1000},
),
"volume": ("sum", "size", {}),
},
},
},
"record-view-agg": {
"templates": "invenio_stats.contrib.aggregations.aggr_record_view",
"cls": StatAggregator,
"params": {
"event": "record-view",
"field": "unique_id",
"interval": "day",
"index_interval": "year",
"copy_fields": {
"record_id": "record_id",
"pid_type": "pid_type",
"pid_value": "pid_value",
},
"metric_fields": {
"unique_count": (
"cardinality",
"unique_session_id",
{"precision_threshold": 1000},
),
},
},
},
}

STATS_QUERIES = {
"record-view": {
"cls": TermsQuery,
"permission_factory": None,
"params": {
"index": "stats-record-view",
"doc_type": "record-view-day-aggregation",
"copy_fields": {
"recid": "recid",
},
"query_modifiers": [],
"required_filters": {
"recid": "recid",
},
"metric_fields": {
"views": ("sum", "count", {}),
"unique_views": ("sum", "unique_count", {}),
},
},
},
"record-download": {
"cls": TermsQuery,
"permission_factory": None,
"params": {
"index": "stats-file-download",
"doc_type": "file-download-day-aggregation",
"copy_fields": {
"recid": "recid",
},
"query_modifiers": [],
"required_filters": {
"recid": "recid",
},
"metric_fields": {
"downloads": ("sum", "count", {}),
"unique_downloads": ("sum", "unique_count", {}),
"data_volume": ("sum", "volume", {}),
},
},
},
}


CELERY_BEAT_SCHEDULE = {
# indexing of statistics events & aggregations
"stats-process-events": {
**StatsEventTask,
"schedule": timedelta(minutes=5), # Every five minutes
},
"stats-aggregate-events": {
**StatsAggregationTask,
"schedule": timedelta(minutes=30), # Every thirty minutes
},
}
# JSONSchemas
JSONSCHEMAS_ENDPOINT = "/schema"
JSONSCHEMAS_HOST = "opendata.cern.ch"
Expand Down
14 changes: 13 additions & 1 deletion cernopendata/modules/records/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,11 @@
import six
from flask import abort, current_app, jsonify, render_template, request
from invenio_files_rest.models import FileInstance
from invenio_files_rest.signals import file_downloaded
from invenio_files_rest.views import ObjectResource
from invenio_records.api import Record
from invenio_records_files.utils import record_file_factory
from invenio_records_ui.signals import record_viewed

# from invenio_files_rest.models import FileInstance, ObjectVersion
# from invenio_records.errors import MissingModelError
Expand Down Expand Up @@ -108,7 +110,7 @@ def file_download_ui(pid, record, _record_file_factory=None, **kwargs):
obj = fileobj.obj
# Check permissions
ObjectResource.check_object_permission(obj)

file_downloaded.send(current_app._get_current_object(), obj=obj)
return ObjectResource.send_object(
obj.bucket,
obj,
Expand Down Expand Up @@ -202,6 +204,11 @@ def record_metadata_view(pid, record, template=None):
record["dataset_semantics_header"] = (
["variable", "type"] + sorted(optional) + ["description"]
)
record_viewed.send(
current_app._get_current_object(),
pid=pid,
record=record,
)

return render_template(
[
Expand All @@ -226,6 +233,11 @@ def term_metadata_view(pid, record, template=None):

def doc_metadata_view(pid, record, template=None):
"""Doc detail view."""
record_viewed.send(
current_app._get_current_object(),
pid=pid,
record=record,
)
return render_template(
["cernopendata_records_ui/docs/detail.html"],
pid=pid,
Expand Down
4 changes: 1 addition & 3 deletions docker-compose-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,4 @@ services:
volumes:
- ./cernopendata:/code/cernopendata
- ./scripts:/code/scripts
- ./tests:/code/tests
profiles:
- donotstart
- ./tests:/code/tests
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@
# Custom Invenio `files` bundle
"invenio-previewer>=2.0.1,<3.0.0",
"invenio-records-files>=1.2.1,<3.0.0",
"invenio-stats>=4.0.1,<5.0.0",
"jupyter-client==7.1.0",
"pluggy==0.13.1",
# Custom Invenio `postgresql` bundle
Expand Down Expand Up @@ -127,7 +128,6 @@
# Pin Flask/gevent/greenlet/raven to make master work again
"Flask==2.2.5",
"Flask-Alembic==2.0.1",
"flask-celeryext==0.4.0",
"Werkzeug~=2.2.0",
"gevent==23.9.1",
"greenlet==3.0.3",
Expand Down

0 comments on commit a70e406

Please sign in to comment.