Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(API): Imports: nouvelle class CSVImportApiView générique (qui check le format, la taille max, et si déjà uploadé) #4936

Open
wants to merge 4 commits into
base: raphodn/backend-api-filters-utils
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 6 additions & 11 deletions api/views/diagnostic.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from rest_framework.exceptions import NotFound, PermissionDenied
from rest_framework.generics import CreateAPIView, ListAPIView, UpdateAPIView
from rest_framework.pagination import LimitOffsetPagination
from rest_framework.views import APIView

from api.exceptions import DuplicateException
from api.permissions import (
Expand All @@ -23,7 +22,7 @@
IsCanteenManager,
)
from api.serializers import DiagnosticAndCanteenSerializer, ManagerDiagnosticSerializer
from api.views.utils import update_change_reason_with_auth
from api.views.utils import CSVImportApiView, update_change_reason_with_auth
from common.utils import send_mail
from data.models import Canteen, Teledeclaration
from data.models.diagnostic import Diagnostic
Expand Down Expand Up @@ -93,13 +92,14 @@ def perform_update(self, serializer):
update_change_reason_with_auth(self, diagnostic)


class EmailDiagnosticImportFileView(APIView):
class EmailDiagnosticImportFileView(CSVImportApiView):
permission_classes = [IsAuthenticated]

def post(self, request):
try:
file = request.data["file"]
self._verify_file_size(file)
self.file = request.data["file"]
super()._verify_file_size()
super()._verify_file_format()
email = request.data.get("email", request.user.email).strip()
context = {
"from": email,
Expand All @@ -111,7 +111,7 @@ def post(self, request):
to=[settings.CONTACT_EMAIL],
reply_to=[email],
template="unusual_diagnostic_import_file",
attachments=[(file.name, file.read(), file.content_type)],
attachments=[(self.file.name, self.file.read(), self.file.content_type)],
context=context,
)
except ValidationError as e:
Expand All @@ -127,11 +127,6 @@ def post(self, request):

return HttpResponse()

@staticmethod
def _verify_file_size(file):
if file.size > settings.CSV_IMPORT_MAX_SIZE:
raise ValidationError("Ce fichier est trop grand, merci d'utiliser un fichier de moins de 10Mo")


class DiagnosticsToTeledeclarePagination(LimitOffsetPagination):
default_limit = 100
Expand Down
21 changes: 4 additions & 17 deletions api/views/diagnosticimport.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from decimal import Decimal, InvalidOperation

import requests
from django.conf import settings
from django.contrib.auth import get_user_model
from django.core.exceptions import ValidationError
from django.core.validators import validate_email
Expand All @@ -16,11 +15,11 @@
from django.http import JsonResponse
from rest_framework import status
from rest_framework.exceptions import PermissionDenied
from rest_framework.views import APIView
from simple_history.utils import update_change_reason

from api.permissions import IsAuthenticated
from api.serializers import FullCanteenSerializer
from api.views.utils import CSVImportApiView
from data.models import Canteen, ImportFailure, ImportType, Sector
from data.models.diagnostic import Diagnostic
from data.models.teledeclaration import Teledeclaration
Expand All @@ -31,7 +30,7 @@
logger = logging.getLogger(__name__)


class ImportDiagnosticsView(ABC, APIView):
class ImportDiagnosticsView(ABC, CSVImportApiView):
permission_classes = [IsAuthenticated]
value_error_regex = re.compile(r"Field '(.+)' expected .+? got '(.+)'.")
annotated_sectors = Sector.objects.annotate(name_lower=Lower("name"))
Expand Down Expand Up @@ -80,8 +79,8 @@ def post(self, request):
try:
with transaction.atomic():
self.file = request.data["file"]
ImportDiagnosticsView._verify_file_format(self.file)
ImportDiagnosticsView._verify_file_size(self.file)
super()._verify_file_size()
super()._verify_file_format()
self._process_file(self.file)

if self.errors:
Expand Down Expand Up @@ -113,18 +112,6 @@ def _log_error(self, message, level="warning"):
import_type=self.import_type,
)

@staticmethod
def _verify_file_format(file):
if file.content_type != "text/csv" and file.content_type != "text/tab-separated-values":
raise ValidationError(
f"Ce fichier est au format {file.content_type}, merci d'exporter votre fichier au format CSV et réessayer."
)

@staticmethod
def _verify_file_size(file):
if file.size > settings.CSV_IMPORT_MAX_SIZE:
raise ValidationError("Ce fichier est trop grand, merci d'utiliser un fichier de moins de 10Mo")

def check_admin_values(self, header):
is_admin_import = any("admin_" in column for column in header)
if is_admin_import and not self.request.user.is_staff:
Expand Down
27 changes: 8 additions & 19 deletions api/views/purchaseimport.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import csv
import hashlib
import io
import json
import logging
Expand All @@ -13,19 +12,18 @@
from django.http import JsonResponse
from rest_framework import status
from rest_framework.exceptions import PermissionDenied
from rest_framework.views import APIView

from api.permissions import IsAuthenticated
from api.serializers import PurchaseSerializer
from api.views.utils import CSVImportApiView
from data.models import Canteen, ImportFailure, ImportType, Purchase

from .diagnosticimport import ImportDiagnosticsView
from .utils import camelize, decode_bytes, normalise_siret

logger = logging.getLogger(__name__)


class ImportPurchasesView(APIView):
class ImportPurchasesView(CSVImportApiView):
permission_classes = [IsAuthenticated]
max_error_items = 30

Expand All @@ -50,8 +48,12 @@ def post(self, request):
logger.info("Purchase bulk import started")
try:
self.file = request.data["file"]
self._verify_file_size()
ImportDiagnosticsView._verify_file_format(self.file)
super()._verify_file_size()
super()._verify_file_format()

self.file_digest = super()._get_file_digest()
self._check_duplication()

with transaction.atomic():
self._process_file()

Expand All @@ -60,10 +62,6 @@ def post(self, request):
if self.errors:
raise IntegrityError()

# The duplication check is called after the processing. The cost of eventually processing
# the file for nothing appears to be smaller than read the file twice.
self._check_duplication()

# Update all purchases's import source with file digest
Purchase.objects.filter(import_source=self.tmp_id).update(import_source=self.file_digest)

Expand Down Expand Up @@ -100,13 +98,10 @@ def _log_error(self, message, level="warning"):
)

def _process_file(self):
file_hash = hashlib.md5()
chunk = []
read_header = True
row_count = 1
for row in self.file:
file_hash.update(row)

# Sniffing 1st line
if read_header:
# decode header, discarding encoding result that might not be accurate without more data
Expand All @@ -133,12 +128,6 @@ def _process_file(self):
if len(chunk) > 0:
self._process_chunk(chunk)

self.file_digest = file_hash.hexdigest()

def _verify_file_size(self):
if self.file.size > settings.CSV_IMPORT_MAX_SIZE:
raise ValidationError("Ce fichier est trop grand, merci d'utiliser un fichier de moins de 10Mo")

def _decode_chunk(self, chunk_list):
if self.encoding_detected is None:
chunk = b"".join(chunk_list)
Expand Down
24 changes: 24 additions & 0 deletions api/views/utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,37 @@
import hashlib
import json
import logging

import chardet
from django.conf import settings
from django.core.exceptions import ValidationError
from djangorestframework_camel_case.render import CamelCaseJSONRenderer
from rest_framework.views import APIView
from simple_history.utils import update_change_reason

logger = logging.getLogger(__name__)


class CSVImportApiView(APIView):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pour info, dans ce commentaire - #4896 (comment) - je me pose la question si c'est une classe DRF qu'on doit utiliser, ou plutôt un simple fichier utils python avec les différentes méthodes

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

parce que ces appels "super()", et l'utilisation un peu magique de "self.", ce n'est pas très très joli je trouve ^^

def _verify_file_size(self):
if self.file.size > settings.CSV_IMPORT_MAX_SIZE:
raise ValidationError(
f"Ce fichier est trop grand, merci d'utiliser un fichier de moins de {settings.CSV_IMPORT_MAX_SIZE_PRETTY}"
)

def _verify_file_format(self):
if self.file.content_type not in ["text/csv", "text/tab-separated-values"]:
raise ValidationError(
f"Ce fichier est au format {self.file.content_type}, merci d'exporter votre fichier au format CSV et réessayer."
)

def _get_file_digest(self):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

je l'ai mis ici car j'aimerais à terme rajouter la logique "fichier identique ?" pour les imports cantines/diagnostics. enfin je ne suis pas 100% certain de sa faisabilité, en tout cas on ne stock pas le md5 actuellement pour ces imports, mais ca serait utile pour tous les futurs !

file_hash = hashlib.md5()
for row in self.file:
file_hash.update(row)
return file_hash.hexdigest()


def camelize(data):
camel_case_bytes = CamelCaseJSONRenderer().render(data)
return json.loads(camel_case_bytes.decode("utf-8"))
Expand Down
1 change: 1 addition & 0 deletions macantine/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,7 @@

# Maximum CSV import file size: 10Mo
CSV_IMPORT_MAX_SIZE = 10485760
CSV_IMPORT_MAX_SIZE_PRETTY = "10Mo"

# Size of each chunk when processing files
CSV_PURCHASE_CHUNK_LINES = 10000
Expand Down
Loading