Skip to content

Commit

Permalink
Merge pull request #488 from internetstandards/42
Browse files Browse the repository at this point in the history
42
  • Loading branch information
stitch authored Nov 6, 2023
2 parents 36c99ed + 09719be commit a368872
Show file tree
Hide file tree
Showing 12 changed files with 78 additions and 56 deletions.
2 changes: 1 addition & 1 deletion dashboard/internet_nl_dashboard/logic/domains.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ def update_list_settings(account: Account, user_input: Dict) -> Dict[str, Any]:
last_report_prefetch = Prefetch(
'urllistreport_set',
# filter(pk=UrlListReport.objects.latest('id').pk).
queryset=UrlListReport.objects.order_by('-id').only('id', 'at_when'),
queryset=UrlListReport.objects.order_by('-id').only('id', 'at_when', 'urllist__id'),
to_attr='last_report'
)

Expand Down
14 changes: 13 additions & 1 deletion dashboard/internet_nl_dashboard/logic/mail.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from dashboard.internet_nl_dashboard.logic.report_comparison import (compare_report_in_detail, filter_comparison_report,
key_calculation, render_comparison_view)
from dashboard.internet_nl_dashboard.models import AccountInternetNLScan, DashboardUser, UrlListReport
from dashboard.settings import LANGUAGES

log = logging.getLogger(__package__)

Expand Down Expand Up @@ -92,9 +93,11 @@ def send_scan_finished_mails(scan: AccountInternetNLScan) -> int:
report = UrlListReport.objects.all().filter(id=scan.report.id).order_by("-id").defer('calculation').first()

for user in users:
log.debug("Sending finished mail to user %s", user.id)

# set unsubscribe code if it's not set yet. This allows the user to instantly unsubscribe from this feed.
if user.dashboarduser.mail_after_mail_unsubscribe_code == "":
log.debug("For some reason user %s has no unsubscribe code, generating one now", user.id)
user.dashboarduser.mail_after_mail_unsubscribe_code = generate_unsubscribe_code()
user.dashboarduser.save()

Expand Down Expand Up @@ -132,7 +135,7 @@ def send_scan_finished_mails(scan: AccountInternetNLScan) -> int:
previous = convert_to_email_safe_values(previous, user.dashboarduser.mail_preferred_language.code.lower())

placeholders = {**placeholders, **previous}

log.debug("Sending actual finished mail to user %s", user.id)
mail.send(
sender=config.EMAIL_NOTIFICATION_SENDER,
recipients=user.dashboarduser.mail_preferred_mail_address, # List of email addresses also accepted
Expand Down Expand Up @@ -213,6 +216,15 @@ def values_from_previous_report(report_id: int, previous_report: UrlListReport)


def convert_to_email_safe_values(values: dict, mail_language: str = "en") -> dict:

# in some cases this is not set or defaults to 'af' / afghanistan, which is the first ISO code in the list
# list here: https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes. Even though we do error handling.
# so we're nice here and try to use a code that we know in case this happens.
# see issue INTERNET-NL-DASHBOARD-68
if mail_language not in [language_code for language_code, name in LANGUAGES]:
mail_language = 'en'
log.debug("Mail language: %s", mail_language)

return {
"previous_report_available": str(values["previous_report_available"]),
"previous_report_average_internet_nl_score": values["previous_report_average_internet_nl_score"],
Expand Down
43 changes: 32 additions & 11 deletions dashboard/internet_nl_dashboard/logic/spreadsheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,22 @@ def is_valid_extension(file: str) -> bool:
return False


def get_sheet(file: str) -> List:
try:
sheet = p.get_sheet(file_name=file, name_columns_by_row=0)
except XLRDError:
# xlrd.biffh.XLRDError: Unsupported format, or corrupt file: Expected BOF record; found b'thisfile'
return []
except zipfile.BadZipFile:
# the corrupted file in the unit tests
return []
except Exception as exc: # pylint: disable=broad-except
log.exception(exc)
return []

return sheet


def get_data(file: str) -> Dict[str, Dict[str, Dict[str, list]]]:
"""
Will return a simple set of data, without too much validation. Deduplicates data per unique category.
Expand All @@ -133,16 +149,8 @@ def get_data(file: str) -> Dict[str, Dict[str, Dict[str, list]]]:

data: Dict[str, Any] = {}

try:
sheet = p.get_sheet(file_name=file, name_columns_by_row=0)
except XLRDError:
# xlrd.biffh.XLRDError: Unsupported format, or corrupt file: Expected BOF record; found b'thisfile'
return data
except zipfile.BadZipFile:
# the corrupted file in the unit tests
return data
except Exception as exc: # pylint: disable=broad-except
log.exception(exc)
sheet = get_sheet(file=file)
if not sheet:
return data

# Skips the first entry
Expand All @@ -154,7 +162,10 @@ def get_data(file: str) -> Dict[str, Dict[str, Dict[str, list]]]:
# Data is parsed to python-like datatype. In this case we only expect strings and cast them as such.
found_categories = str(row[0]).lower().strip().split(',')
found_urls = str(row[1]).lower().strip().split(',')
found_tags = str(row[2]).lower().strip().split(',')
found_tags = []
# if there is no tag column:
if len(row) > 2:
found_tags = str(row[2]).lower().strip().split(',')

for found_category in found_categories:
found_category = found_category.strip()
Expand Down Expand Up @@ -344,11 +355,21 @@ def upload_domain_spreadsheet_to_list(account: Account, user: DashboardUser, url
if not urllist:
return {'error': True, 'success': False, 'message': 'list_does_not_exist', 'details': '', 'status': 'error'}

# the spreadsheet content is leading, this means that anything in the current list, including tags, will
# be removed. There is no smart merging strategy here. This might be added in the future: where we look
# at what is already in the list and only add changes.
# this will also remove the tags on this list automatically, without touching other lists that have the same
# url and different tags.
urllist.urls.clear()

# we don't care about the list name, we'll just add anything that is given as input...
result = {'incorrect_urls': [],
'added_to_list': 0,
'already_in_list': 0}
for _, domain_data in domain_lists.items():
log.debug(domain_data)
# todo: when a tag has a domain, it might be added as a domain, which is wrong. Only use the first
# column of uploaded data.
extracted_urls, _ = retrieve_possible_urls_from_unfiltered_input(", ".join(domain_data))
cleaned_urls = clean_urls(extracted_urls)

Expand Down
2 changes: 0 additions & 2 deletions dashboard/internet_nl_dashboard/scanners/subdomains.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,9 @@ def scan_status(account: Account, urllist_id: int):
@app.task(queue="storage")
def progress_subdomain_discovery_scans():
scans = SubdomainDiscoveryScan.objects.all().filter(state="requested")
log.error("yolo")

tasks = []
for scan in scans:
log.error("swag")
update_state(scan.id, "scanning")
tasks.append(group(perform_subdomain_scan.si(scan.id) | update_state.si(scan.id, "finished")))

Expand Down
4 changes: 2 additions & 2 deletions dashboard/internet_nl_dashboard/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from dashboard.celery import app
from dashboard.internet_nl_dashboard.models import UrlList, UrlListReport
from dashboard.internet_nl_dashboard.scanners import scan_internet_nl_per_account
from dashboard.internet_nl_dashboard.scanners import scan_internet_nl_per_account, subdomains
from dashboard.internet_nl_dashboard.scanners.scan_internet_nl_per_account import initialize_scan

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -48,7 +48,7 @@ def start_scans_for_lists_who_are_up_for_scanning() -> Task:

# explicitly declare the imported modules as this modules 'content', prevents pyflakes issues
# Todo: List item 0 has incompatible type Module; expected Module
__all__: List[Module] = [scan_internet_nl_per_account] # type: ignore
__all__: List[Module] = [scan_internet_nl_per_account, subdomains] # type: ignore


@app.task(queue='storage')
Expand Down
3 changes: 2 additions & 1 deletion dashboard/internet_nl_dashboard/tests/test_mail.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ def setup_test():
account = Account(**{'name': 'test'})
account.save()

dashboarduser = DashboardUser(**{'mail_preferred_mail_address': '[email protected]', 'mail_preferred_language': 'nl',
# use a language that is not supported, so the system will fall back to english...
dashboarduser = DashboardUser(**{'mail_preferred_mail_address': '[email protected]', 'mail_preferred_language': 'af',
'mail_send_mail_after_scan_finished': True, 'account': account, 'user': user})
dashboarduser.save()

Expand Down
2 changes: 1 addition & 1 deletion requirements-deploy.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# make sure there are no version conflicts with requirements.txt
-c requirements.txt

websecmap[deploy] @ git+https://gitlab.com/internet-cleanup-foundation/web-security-map@6fcf0110e5809fceb0ddf93b8553804a7cc52151
websecmap[deploy] @ git+https://gitlab.com/internet-cleanup-foundation/web-security-map@d60d90d787d94de122dfe139df4790b82eee7554

# include (security) version constraints for non primary dependencies
-c security-constraints.in
Expand Down
14 changes: 7 additions & 7 deletions requirements-deploy.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#
# pip-compile --output-file=requirements-deploy.txt requirements-deploy.in
#
aiohttp==3.8.4
aiohttp==3.8.6
# via
# -c requirements.txt
# geoip2
Expand Down Expand Up @@ -97,7 +97,7 @@ colorlog==6.7.0
# via
# -c requirements.txt
# websecmap
cryptography==41.0.3
cryptography==41.0.5
# via
# -c requirements.txt
# -c security-constraints.in
Expand All @@ -123,7 +123,7 @@ diff-match-patch==20230430
# -c requirements.txt
# django-import-export
# websecmap
django==4.2.3
django==4.2.7
# via
# -c requirements.txt
# -c security-constraints.in
Expand Down Expand Up @@ -472,7 +472,7 @@ rdp==0.8
# via
# -c requirements.txt
# websecmap
redis==4.6.0
redis==5.0.0
# via
# -c requirements.txt
# celery
Expand Down Expand Up @@ -569,7 +569,7 @@ tldextract==3.4.4
# via
# -c requirements.txt
# websecmap
tornado==6.3.2
tornado==6.3.3
# via
# -c requirements.txt
# flower
Expand All @@ -585,7 +585,7 @@ typing-extensions==4.6.3
# asgiref
# django-countries
# websecmap
urllib3==1.26.6
urllib3==1.26.18
# via
# -c requirements.txt
# requests
Expand All @@ -607,7 +607,7 @@ vine==1.3.0
# celery
# flower
# websecmap
websecmap[deploy] @ git+https://gitlab.com/internet-cleanup-foundation/web-security-map@6fcf0110e5809fceb0ddf93b8553804a7cc52151
websecmap[deploy] @ git+https://gitlab.com/internet-cleanup-foundation/web-security-map@d60d90d787d94de122dfe139df4790b82eee7554
# via
# -c requirements.txt
# -r requirements-deploy.in
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.in
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ django-extensions
django-debug-toolbar
django-debug-toolbar-request-history
pytest-mock
websecmap @ git+https://gitlab.com/internet-cleanup-foundation/web-security-map@6fcf0110e5809fceb0ddf93b8553804a7cc52151
websecmap @ git+https://gitlab.com/internet-cleanup-foundation/web-security-map@d60d90d787d94de122dfe139df4790b82eee7554
# use the version with relaxed attr requirements
pytest-docker @ git+https://github.com/avast/pytest-docker@519b155009b6f3570c01f1f56e7c4e6ce3c5c760

Expand Down
27 changes: 9 additions & 18 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile --output-file=requirements-dev.txt requirements-dev.in
# pip-compile --output-file=requirements-dev.txt --resolver=backtracking requirements-dev.in
#
aiohttp==3.8.4
aiohttp==3.8.6
# via
# -c requirements.txt
# geoip2
Expand Down Expand Up @@ -87,7 +87,6 @@ celery-statsd==1.0.0
# websecmap
certifi==2023.7.22
# via
# -c requirements.txt
# -c security-constraints.in
# requests
# sentry-sdk
Expand Down Expand Up @@ -122,9 +121,8 @@ coverage[toml]==7.2.7
# via
# -r requirements-dev.in
# pytest-cov
cryptography==41.0.3
cryptography==41.0.5
# via
# -c requirements.txt
# -c security-constraints.in
# pyopenssl
# types-pyopenssl
Expand Down Expand Up @@ -152,9 +150,8 @@ diff-match-patch==20230430
# websecmap
dill==0.3.6
# via pylint
django==4.2.3
django==4.2.7
# via
# -c requirements.txt
# -c security-constraints.in
# django-celery-beat
# django-colorful
Expand Down Expand Up @@ -376,7 +373,6 @@ lml==0.1.0
# websecmap
lxml==4.9.2
# via
# -c requirements.txt
# -c security-constraints.in
# dnsrecon
# websecmap
Expand Down Expand Up @@ -472,7 +468,6 @@ phonenumberslite==8.13.15
# via -r requirements-dev.in
pillow==9.5.0
# via
# -c requirements.txt
# -c security-constraints.in
# python-resize-image
# reportlab
Expand Down Expand Up @@ -637,7 +632,7 @@ rdp==0.8
# websecmap
recommonmark==0.7.1
# via -r requirements-dev.in
redis==4.6.0
redis==5.0.0
# via
# -c requirements.txt
# celery
Expand Down Expand Up @@ -698,7 +693,6 @@ six==1.16.0
# via
# -c requirements.txt
# celery-statsd
# django-jet-reboot
# livereload
# python-dateutil
# python-monkey-business
Expand Down Expand Up @@ -741,7 +735,6 @@ sphinxcontrib-serializinghtml==1.1.5
# via sphinx
sqlparse==0.4.4
# via
# -c requirements.txt
# -c security-constraints.in
# django
# django-debug-toolbar
Expand Down Expand Up @@ -795,7 +788,7 @@ tomli==2.0.1
# pytest
tomlkit==0.11.8
# via pylint
tornado==6.3.2
tornado==6.3.3
# via
# -c requirements.txt
# flower
Expand Down Expand Up @@ -838,7 +831,7 @@ typing-extensions==4.6.3
# django-stubs-ext
# mypy
# websecmap
urllib3==1.26.6
urllib3==1.26.18
# via
# -c requirements.txt
# requests
Expand All @@ -858,10 +851,8 @@ vine==1.3.0
# websecmap
vulture==2.7
# via -r requirements-dev.in
websecmap @ git+https://gitlab.com/internet-cleanup-foundation/web-security-map@6fcf0110e5809fceb0ddf93b8553804a7cc52151
# via
# -c requirements.txt
# -r requirements-dev.in
websecmap @ git+https://gitlab.com/internet-cleanup-foundation/web-security-map@d60d90d787d94de122dfe139df4790b82eee7554
# via -r requirements-dev.in
wikidata==0.7.0
# via
# -c requirements.txt
Expand Down
4 changes: 2 additions & 2 deletions requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# - no version pinning, unless it is required and explained

# to update websecmap, update the SHA hash and run: make update_requirement_websecmap
websecmap @ git+https://gitlab.com/internet-cleanup-foundation/web-security-map@6fcf0110e5809fceb0ddf93b8553804a7cc52151
websecmap @ git+https://gitlab.com/internet-cleanup-foundation/web-security-map@d60d90d787d94de122dfe139df4790b82eee7554

django-otp
django-two-factor-auth>1.15
Expand Down Expand Up @@ -49,4 +49,4 @@ phonenumbers
# https://github.com/pyexcel/pyexcel-xlsx/issues/52
pyexcel==0.6.7
pyexcel-xlsx==0.6.0
openpyxl==3.0.10
openpyxl==3.0.10
Loading

0 comments on commit a368872

Please sign in to comment.