Skip to content

Commit 2019488

Browse files
Merge pull request #190 from camelot-dev/vinayak/2025-01-03-fix-deprecated-camelot-methods
Fix deprecated camelot methods
2 parents f47c132 + d1995ff commit 2019488

35 files changed

+1487
-7931
lines changed

.github/workflows/unittests.yml renamed to .github/workflows/tests.yml

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
# This workflow will install Python dependencies, run tests
2-
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3-
4-
name: Run Unittests
1+
name: Tests
52

63
on:
74
push:
@@ -26,7 +23,7 @@ jobs:
2623
- name: Install dependencies
2724
run: |
2825
python -m pip install --upgrade pip
29-
python -m pip install ".[all]"
26+
python -m pip install ".[dev]"
3027
- name: Test with pytest
3128
run: |
3229
python -m pytest

docs/user/install.rst

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,10 @@ This part of the documentation covers the steps to install Excalibur.
88
Using pip
99
---------
1010

11-
After installing `ghostscript`_, which is one of the requirements for Camelot (See `install instructions`_), you can simply use pip to install Excalibur::
11+
You can simply use pip to install Excalibur::
1212

1313
$ pip install excalibur-py
1414

15-
.. _ghostscript: https://www.ghostscript.com/
16-
.. _install instructions: https://camelot-py.readthedocs.io/en/master/user/install-deps.html
17-
1815
From the source code
1916
--------------------
2017

excalibur/cli.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33

44
import click
55

6-
from . import settings, __version__
6+
from . import __version__, settings
77
from . import configuration as conf
8-
from .tasks import split, extract
9-
from .www.app import create_app
10-
from .utils.database import reset_database, initialize_database
118
from .operators.python_operator import PythonOperator
9+
from .tasks import extract, split
10+
from .utils.database import initialize_database, reset_database
11+
from .www.app import create_app
1212

1313

1414
def abort_if_false(ctx, param, value):

excalibur/models.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
1-
import json
21
import datetime as dt
2+
import json
33
from typing import Any # noqa
44

55
from sqlalchemy import (
6-
Text,
7-
Column,
8-
String,
96
Boolean,
10-
Integer,
7+
Column,
118
DateTime,
129
ForeignKey,
10+
Integer,
11+
String,
12+
Text,
1313
)
1414
from sqlalchemy.ext.declarative import declarative_base
1515

excalibur/tasks.py

Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import camelot
99
import pandas as pd
10-
from camelot.backends.ghostscript_backend import GhostscriptBackend
10+
from camelot.backends.pdfium_backend import PdfiumBackend
1111
from camelot.core import TableList
1212
from camelot.parsers import Lattice, Stream
1313

@@ -43,23 +43,8 @@ def split(file_id):
4343
imagepath = os.path.join(conf.PDFS_FOLDER, file_id, imagename)
4444

4545
# convert single-page PDF to PNG
46-
try:
47-
backend = GhostscriptBackend()
48-
backend.convert(filepath, imagepath, 300)
49-
except OSError:
50-
gs_command = [
51-
"gs",
52-
"-q",
53-
"-sDEVICE=png16m",
54-
f"-o{imagepath}",
55-
"-r300",
56-
filepath,
57-
]
58-
try:
59-
subprocess.run(gs_command, check=True, capture_output=True)
60-
except subprocess.CalledProcessError as e:
61-
logging.error(f"Ghostscript conversion failed: {e.stderr.decode()}")
62-
raise
46+
backend = PdfiumBackend()
47+
backend.convert(filepath, imagepath, 300)
6348

6449
filenames[page] = filename
6550
filepaths[page] = filepath
@@ -123,7 +108,7 @@ def extract(job_id):
123108
if flavor.lower() == "lattice":
124109
kwargs.pop("columns", None)
125110

126-
t = camelot.read_pdf(filepaths[p], **kwargs, backend="poppler")
111+
t = camelot.read_pdf(filepaths[p], **kwargs)
127112
for _t in t:
128113
_t.page = int(p)
129114
tables.extend(t)

excalibur/utils/task.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
import os
22

33
import cv2
4+
from camelot.utils import (
5+
get_image_char_and_text_objects,
6+
get_page_layout,
7+
get_rotation,
8+
)
49
from PyPDF2 import PdfReader, PdfWriter
5-
from camelot.utils import get_rotation, get_page_layout, get_text_objects
610

711

812
def get_pages(filename, pages, password=""):
@@ -62,9 +66,9 @@ def save_page(filepath, page_number):
6266
froot, fext = os.path.splitext(outpath)
6367
layout, __ = get_page_layout(outpath)
6468
# fix rotated PDF
65-
chars = get_text_objects(layout, ltype="char")
66-
horizontal_text = get_text_objects(layout, ltype="horizontal_text")
67-
vertical_text = get_text_objects(layout, ltype="vertical_text")
69+
images, chars, horizontal_text, vertical_text = get_image_char_and_text_objects(
70+
layout
71+
)
6872
rotation = get_rotation(chars, horizontal_text, vertical_text)
6973
if rotation != "":
7074
outpath_new = "".join([froot.replace("page", "p"), "_rotated", fext])

excalibur/www/static/js/workspace.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ const onSavedRuleClick = function (e) {
110110

111111
if (ruleOptions['flavor'].toLowerCase() == 'lattice') {
112112
document.getElementById('process-background').value = ruleOptions['process_background'];
113-
document.getElementById('line-size-scaling').value = ruleOptions['line_size_scaling'];
113+
document.getElementById('line-scale').value = ruleOptions['line_scale'];
114114
document.getElementById('split-text-l').value = ruleOptions['split_text'];
115115
document.getElementById('flag-size-l').value = ruleOptions['flag_size'];
116116
} else if (ruleOptions['flavor'].toLowerCase() == 'stream') {
@@ -176,14 +176,14 @@ const getRuleOptions = function () {
176176
switch(flavor.toString().toLowerCase()) {
177177
case 'lattice': {
178178
ruleOptions['process_background'] = $("#process-background").val() ? true : false;
179-
ruleOptions['line_size_scaling'] = $('#line-size-scaling').val() ? Number($('#line-size-scaling').val()) : 15;
179+
ruleOptions['line_scale'] = $('#line-scale').val() ? Number($('#line-scale').val()) : 15;
180180
ruleOptions['split_text'] = $("#split-text-l").val() ? true : false;
181181
ruleOptions['flag_size'] = $("#flag-size-l").val() ? true : false;
182182
break;
183183
}
184184
case 'stream': {
185-
ruleOptions['row_close_tol'] = $('#row-close-tol').val() ? Number($('#line-size-scaling').val()) : 2;
186-
ruleOptions['col_close_tol'] = $('#col-close-tol').val() ? Number($('#line-size-scaling').val()) : 0;
185+
ruleOptions['row_close_tol'] = $('#row-close-tol').val() ? Number($('#line-scale').val()) : 2;
186+
ruleOptions['col_close_tol'] = $('#col-close-tol').val() ? Number($('#line-scale').val()) : 0;
187187
ruleOptions['split_text'] = $("#split-text-s").val() ? true : false;
188188
ruleOptions['flag_size'] = $("#flag-size-s").val() ? true : false;
189189
break;

public/.gitignore

Lines changed: 0 additions & 1 deletion
This file was deleted.

public/.travis.yml

Lines changed: 0 additions & 11 deletions
This file was deleted.

public/CNAME

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)