Skip to content

Commit 8f639f3

Browse files
Merge pull request #72 from SADiLaR/feature/add-admin-service
updated admin with GetTextFromPdf service
2 parents be4cc1c + 9c60c3c commit 8f639f3

File tree

1 file changed

+10
-23
lines changed

1 file changed

+10
-23
lines changed

app/general/admin.py

Lines changed: 10 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import magic
22
from django.contrib import admin
33
from django.forms import HiddenInput, ModelForm
4-
from pypdf import PdfReader
5-
from pypdf.errors import PdfStreamError
64
from simple_history.admin import SimpleHistoryAdmin
75

6+
from general.service.extract_text import GetTextError, GetTextFromPDF
7+
88
from .models import DocumentFile, Institution, Language, Project, Subject
99

1010

@@ -34,8 +34,14 @@ def clean(self):
3434
if file_type != "application/pdf":
3535
self.add_error("uploaded_file", "Only PDF files are allowed.")
3636

37-
# Extract text from PDF file
38-
cleaned_data["document_data"] = self.pdf_to_text(uploaded_file)
37+
try:
38+
# Extract text from PDF file
39+
cleaned_data["document_data"] = GetTextFromPDF(uploaded_file).to_text()
40+
41+
except GetTextError:
42+
return self.add_error(
43+
"uploaded_file", "The uploaded PDF file is corrupted or not fully downloaded."
44+
)
3945

4046
cleaned_data["mime_type"] = file_type
4147

@@ -52,25 +58,6 @@ def clean(self):
5258

5359
return cleaned_data
5460

55-
def pdf_to_text(self, uploaded_file):
56-
if uploaded_file:
57-
text_list = []
58-
# Read the PDF file and extract text
59-
try:
60-
reader = PdfReader(uploaded_file)
61-
for page in reader.pages:
62-
text_list.append(page.extract_text())
63-
64-
get_pdf_text = " ".join(text_list)
65-
66-
return str(get_pdf_text)
67-
68-
except PdfStreamError:
69-
return self.add_error(
70-
"uploaded_file", "The uploaded PDF file is corrupted or not fully downloaded."
71-
)
72-
return None
73-
7461

7562
class DocumentFileAdmin(SimpleHistoryAdmin):
7663
ordering = ["title"]

0 commit comments

Comments
 (0)