Skip to content

Commit

Permalink
Add admin action and task to unzip FoiAttachment
Browse files Browse the repository at this point in the history
  • Loading branch information
stefanw committed Nov 16, 2023
1 parent 4287ef3 commit 365c8d8
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 1 deletion.
8 changes: 8 additions & 0 deletions froide/foirequest/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -756,6 +756,7 @@ class FoiAttachmentAdmin(admin.ModelAdmin):
"convert",
"ocr_attachment",
"make_document",
"unpack_zipfile",
]

def get_queryset(self, request):
Expand Down Expand Up @@ -824,6 +825,13 @@ def ocr_attachment(self, request, queryset):
for att in queryset:
ocr_pdf_attachment(att)

@admin.action(description=_("Unpack ZIP file"))
def unpack_zipfile(self, request, queryset):
from .tasks import unpack_zipfile_attachment_task

for att in queryset:
unpack_zipfile_attachment_task(att)


@admin.register(FoiEvent)
class FoiEventAdmin(admin.ModelAdmin):
Expand Down
14 changes: 14 additions & 0 deletions froide/foirequest/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,3 +386,17 @@ def move_upload_to_attachment(att_id, upload_id):

if att.can_convert_to_pdf():
convert_attachment_task.delay(att.id)


@celery_app.task(
name="froide.foirequest.tasks.unpack_zipfile_attachment_task", time_limit=360
)
def unpack_zipfile_attachment_task(instance_id):
from .utils import unpack_zipfile_attachment

try:
att = FoiAttachment.objects.get(pk=instance_id)
except FoiAttachment.DoesNotExist:
return

unpack_zipfile_attachment(att)
56 changes: 55 additions & 1 deletion froide/foirequest/utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import json
import re
import zipfile
from dataclasses import dataclass
from datetime import timedelta
from io import BytesIO
from pathlib import PurePath
from typing import Iterator, List, Optional, Tuple, Union

from django import forms
from django.conf import settings
from django.core.exceptions import ValidationError
from django.core.files import File
from django.core.mail import mail_managers
from django.core.validators import validate_email
from django.template.loader import render_to_string
Expand All @@ -20,6 +24,7 @@
from froide.helper.content_urls import get_content_url
from froide.helper.date_utils import format_seconds
from froide.helper.email_utils import delete_mails_by_recipient
from froide.helper.storage import make_unique_filename
from froide.helper.tasks import search_instance_save
from froide.helper.text_utils import (
apply_text_replacements,
Expand All @@ -31,7 +36,7 @@
from froide.proof.models import ProofAttachment
from froide.publicbody.models import FoiLaw, PublicBody

from .models import FoiRequest
from .models import FoiAttachment, FoiRequest

MAX_ATTACHMENT_SIZE = settings.FROIDE_CONFIG["max_attachment_size"]
RECIPIENT_BLOCKLIST = settings.FROIDE_CONFIG.get("recipient_blocklist_regex", None)
Expand Down Expand Up @@ -880,3 +885,52 @@ def select_foirequest_template(foirequest, base_template: str):

templates.append(base_template)
return templates


ZIP_BLOCK_LIST = set(["__MACOSX"])
PATH_REPLACEMENT = "___" # 3 underscores


def unpack_zipfile_attachment(attachment: FoiAttachment):
import magic
from filingcabinet.services import remove_common_root_path

file_obj = attachment.file

if not zipfile.is_zipfile(file_obj):
return

with zipfile.ZipFile(file_obj, "r") as zf:
zip_paths = []
for zip_info in zf.infolist():
if zip_info.is_dir():
continue
path = PurePath(zip_info.filename)
parts = path.parts
if parts[0] in ZIP_BLOCK_LIST:
continue
zip_paths.append(path)
if not zip_paths:
return

doc_paths = remove_common_root_path(zip_paths)
names = set(
attachment.belongs_to.foiattachment_set.all().values_list("name", flat=True)
)
for doc_path, zip_path in zip(doc_paths, zip_paths):
attachment_name = PATH_REPLACEMENT.join(doc_path.parts)
attachment_name = make_unique_filename(attachment_name, names)
names.add(attachment_name)

file_obj = BytesIO(zf.read(str(zip_path)))
content_type = magic.from_buffer(file_obj.read(1024), mime=True)
file_obj.seek(0)

new_attachment = FoiAttachment(
belongs_to=attachment.belongs_to,
name=attachment_name,
size=file_obj.getbuffer().nbytes,
filetype=content_type,
can_approve=not attachment.belongs_to.request.not_publishable,
)
new_attachment.file.save(attachment_name, File(file_obj))

0 comments on commit 365c8d8

Please sign in to comment.