diff --git a/api/metrics/utils.py b/api/metrics/utils.py index 31427067093..54af7531200 100644 --- a/api/metrics/utils.py +++ b/api/metrics/utils.py @@ -1,4 +1,5 @@ import re +from urllib.parse import urlsplit import pytz @@ -6,6 +7,9 @@ from django.utils import timezone from rest_framework.exceptions import ValidationError +from osf.models import AbstractNode, Guid +from osf.metrics.counted_usage import _get_immediate_wrapper + DATETIME_FORMAT = '%Y-%m-%dT%H:%M' DATE_FORMAT = '%Y-%m-%d' @@ -114,3 +118,47 @@ def parse_date_range(query_params, is_monthly=False): start_date, end_date = parse_dates(query_params, is_monthly=is_monthly) report_date_range = {'gte': str(start_date), 'lte': str(end_date)} return report_date_range + + +def _user_has_read_on_resolved_node(user, guid_referent): + """True if ``user`` has READ on the node this referent belongs to.""" + current = guid_referent + while current is not None and not isinstance(current, AbstractNode): + current = _get_immediate_wrapper(current) + if current is None or not isinstance(current, AbstractNode): + return False + return current.contributors_and_group_members.filter(guids___id=user._id).exists() + + +def should_skip_counted_usage(user, *, item_guid=None, pageview_info=None): + """Return True when this usage should not be recorded.""" + if not getattr(user, 'is_authenticated', False): + return False + + referents = [] + seen_ids = set() + + def _add_referent(ref): + if ref is None: + return + key = (ref.__class__.__name__, ref.pk) + if key in seen_ids: + return + seen_ids.add(key) + referents.append(ref) + + if item_guid: + guid_obj = Guid.load(item_guid) + if guid_obj and guid_obj.referent: + _add_referent(guid_obj.referent) + + page_url = (pageview_info or {}).get('page_url') + if page_url: + for segment in urlsplit(page_url).path.split('/'): + if not segment or len(segment) < 5: + continue + guid_obj = Guid.load(segment) + if guid_obj and guid_obj.referent: + _add_referent(guid_obj.referent) + + return any(_user_has_read_on_resolved_node(user, ref) for ref in referents) diff --git a/api/metrics/views.py b/api/metrics/views.py index daaa684d13a..99ecf3fe347 100644 --- a/api/metrics/views.py +++ b/api/metrics/views.py @@ -43,6 +43,7 @@ from api.metrics.utils import ( parse_datetimes, parse_date_range, + should_skip_counted_usage, ) from api.nodes.permissions import MustBePublic @@ -388,6 +389,12 @@ class CountedAuthUsageView(JSONAPIBaseView): def post(self, request, *args, **kwargs): serializer = self.serializer_class(data=request.data) serializer.is_valid(raise_exception=True) + if should_skip_counted_usage( + request.user, + item_guid=serializer.validated_data.get('item_guid'), + pageview_info=serializer.validated_data.get('pageview_info'), + ): + return HttpResponse(status=204) session_id, user_is_authenticated = self._get_session_id( request, client_session_id=serializer.validated_data.get('client_session_id'), diff --git a/api_tests/metrics/test_counted_usage.py b/api_tests/metrics/test_counted_usage.py index 568d663be9e..be1d986ff6d 100644 --- a/api_tests/metrics/test_counted_usage.py +++ b/api_tests/metrics/test_counted_usage.py @@ -3,13 +3,18 @@ import pytest from unittest import mock +from framework.auth.core import Auth + from osf_tests.factories import ( AuthUserFactory, - PreprintFactory, NodeFactory, + PreprintFactory, + PrivateLinkFactory, + ProjectFactory, RegistrationFactory, # UserFactory, ) +from osf.utils.permissions import ADMIN, READ, WRITE from api_tests.utils import create_test_file @@ -351,3 +356,125 @@ def test_child_registration_file(self, app, mock_save, child_reg_file_guid, chil 'surrounding_guids': None, }, ) + + +@pytest.mark.django_db +class TestContributorExclusion: + + def test_creator_pageview_not_recorded(self, app, mock_save): + user = AuthUserFactory() + project = ProjectFactory(creator=user) + payload = counted_usage_payload( + item_guid=project._id, + action_labels=['view', 'web'], + pageview_info={'page_url': f'https://osf.io/{project._id}/'}, + ) + resp = app.post_json_api(COUNTED_USAGE_URL, payload, auth=user.auth) + assert resp.status_code == 204 + assert mock_save.call_count == 0 + + @pytest.mark.parametrize( + 'permissions', + [READ, WRITE, ADMIN], + ids=['read', 'write', 'admin'], + ) + def test_contributor_pageview_not_recorded(self, app, mock_save, permissions): + creator = AuthUserFactory() + contributor = AuthUserFactory() + project = ProjectFactory(creator=creator) + project.add_contributor(contributor, permissions=permissions, auth=Auth(creator)) + payload = counted_usage_payload( + item_guid=project._id, + action_labels=['view', 'web'], + pageview_info={'page_url': f'https://osf.io/{project._id}/analytics/'}, + ) + resp = app.post_json_api(COUNTED_USAGE_URL, payload, auth=contributor.auth) + assert resp.status_code == 204 + assert mock_save.call_count == 0 + + def test_non_contributor_pageview_recorded(self, app, mock_save): + creator = AuthUserFactory() + visitor = AuthUserFactory() + project = ProjectFactory(creator=creator, is_public=True) + payload = counted_usage_payload( + item_guid=project._id, + action_labels=['view', 'web'], + pageview_info={'page_url': f'https://osf.io/{project._id}/'}, + ) + resp = app.post_json_api(COUNTED_USAGE_URL, payload, auth=visitor.auth) + assert resp.status_code == 201 + assert mock_save.call_count == 1 + + def test_parent_contributor_not_on_child_component_pageview_recorded(self, app, mock_save): + creator = AuthUserFactory() + child_owner = AuthUserFactory() + parent_reader = AuthUserFactory() + parent = ProjectFactory(creator=creator, is_public=True) + child = NodeFactory(parent=parent, creator=child_owner, is_public=True) + parent.add_contributor(parent_reader, permissions=ADMIN, auth=Auth(creator)) + assert not child.contributors_and_group_members.filter(guids___id=parent_reader._id).exists() + payload = counted_usage_payload( + item_guid=child._id, + action_labels=['view', 'web'], + pageview_info={'page_url': f'https://osf.io/{child._id}/'}, + ) + resp = app.post_json_api(COUNTED_USAGE_URL, payload, auth=parent_reader.auth) + assert resp.status_code == 201 + assert mock_save.call_count == 1 + + def test_anonymous_view_only_link_visitor_pageview_recorded(self, app, mock_save): + creator = AuthUserFactory() + project = ProjectFactory(creator=creator, is_public=False) + link = PrivateLinkFactory(anonymous=True, creator=creator) + link.nodes.add(project) + payload = counted_usage_payload( + item_guid=project._id, + action_labels=['view', 'web'], + client_session_id='vol-client-session', + pageview_info={ + 'page_url': f'https://osf.io/{project._id}/?view_only={link.key}', + }, + ) + resp = app.post_json_api(COUNTED_USAGE_URL, payload) + assert resp.status_code == 201 + assert mock_save.call_count == 1 + + def test_logged_in_non_contributor_view_only_link_pageview_recorded(self, app, mock_save): + creator = AuthUserFactory() + visitor = AuthUserFactory() + project = ProjectFactory(creator=creator, is_public=False) + link = PrivateLinkFactory(anonymous=False, creator=creator) + link.nodes.add(project) + payload = counted_usage_payload( + item_guid=project._id, + action_labels=['view', 'web'], + pageview_info={ + 'page_url': f'https://osf.io/{project._id}/files/?view_only={link.key}', + }, + ) + resp = app.post_json_api(COUNTED_USAGE_URL, payload, auth=visitor.auth) + assert resp.status_code == 201 + assert mock_save.call_count == 1 + + @pytest.mark.parametrize( + 'permissions', + [READ, WRITE, ADMIN], + ids=['read', 'write', 'admin'], + ) + def test_logged_in_contributor_view_only_link_pageview_not_recorded(self, app, mock_save, permissions): + creator = AuthUserFactory() + contributor = AuthUserFactory() + project = ProjectFactory(creator=creator, is_public=False) + project.add_contributor(contributor, permissions=permissions, auth=Auth(creator)) + link = PrivateLinkFactory(anonymous=False, creator=creator) + link.nodes.add(project) + payload = counted_usage_payload( + item_guid=project._id, + action_labels=['view', 'web'], + pageview_info={ + 'page_url': f'https://osf.io/{project._id}/?view_only={link.key}', + }, + ) + resp = app.post_json_api(COUNTED_USAGE_URL, payload, auth=contributor.auth) + assert resp.status_code == 204 + assert mock_save.call_count == 0