Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix deprecated camelot methods #190

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
# This workflow will install Python dependencies, run tests
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Run Unittests
name: Tests

on:
push:
Expand All @@ -26,7 +23,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install ".[all]"
python -m pip install ".[dev]"
- name: Test with pytest
run: |
python -m pytest
5 changes: 1 addition & 4 deletions docs/user/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,10 @@ This part of the documentation covers the steps to install Excalibur.
Using pip
---------

After installing `ghostscript`_, which is one of the requirements for Camelot (See `install instructions`_), you can simply use pip to install Excalibur::
You can simply use pip to install Excalibur::

$ pip install excalibur-py

.. _ghostscript: https://www.ghostscript.com/
.. _install instructions: https://camelot-py.readthedocs.io/en/master/user/install-deps.html

From the source code
--------------------

Expand Down
8 changes: 4 additions & 4 deletions excalibur/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@

import click

from . import settings, __version__
from . import __version__, settings
from . import configuration as conf
from .tasks import split, extract
from .www.app import create_app
from .utils.database import reset_database, initialize_database
from .operators.python_operator import PythonOperator
from .tasks import extract, split
from .utils.database import initialize_database, reset_database
from .www.app import create_app


def abort_if_false(ctx, param, value):
Expand Down
10 changes: 5 additions & 5 deletions excalibur/models.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import json
import datetime as dt
import json
from typing import Any # noqa

from sqlalchemy import (
Text,
Column,
String,
Boolean,
Integer,
Column,
DateTime,
ForeignKey,
Integer,
String,
Text,
)
from sqlalchemy.ext.declarative import declarative_base

Expand Down
23 changes: 4 additions & 19 deletions excalibur/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import camelot
import pandas as pd
from camelot.backends.ghostscript_backend import GhostscriptBackend
from camelot.backends.pdfium_backend import PdfiumBackend
from camelot.core import TableList
from camelot.parsers import Lattice, Stream

Expand Down Expand Up @@ -43,23 +43,8 @@ def split(file_id):
imagepath = os.path.join(conf.PDFS_FOLDER, file_id, imagename)

# convert single-page PDF to PNG
try:
backend = GhostscriptBackend()
backend.convert(filepath, imagepath, 300)
except OSError:
gs_command = [
"gs",
"-q",
"-sDEVICE=png16m",
f"-o{imagepath}",
"-r300",
filepath,
]
try:
subprocess.run(gs_command, check=True, capture_output=True)
except subprocess.CalledProcessError as e:
logging.error(f"Ghostscript conversion failed: {e.stderr.decode()}")
raise
backend = PdfiumBackend()
backend.convert(filepath, imagepath, 300)

filenames[page] = filename
filepaths[page] = filepath
Expand Down Expand Up @@ -123,7 +108,7 @@ def extract(job_id):
if flavor.lower() == "lattice":
kwargs.pop("columns", None)

t = camelot.read_pdf(filepaths[p], **kwargs, backend="poppler")
t = camelot.read_pdf(filepaths[p], **kwargs)
for _t in t:
_t.page = int(p)
tables.extend(t)
Expand Down
12 changes: 8 additions & 4 deletions excalibur/utils/task.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import os

import cv2
from camelot.utils import (
get_image_char_and_text_objects,
get_page_layout,
get_rotation,
)
from PyPDF2 import PdfReader, PdfWriter
from camelot.utils import get_rotation, get_page_layout, get_text_objects


def get_pages(filename, pages, password=""):
Expand Down Expand Up @@ -62,9 +66,9 @@ def save_page(filepath, page_number):
froot, fext = os.path.splitext(outpath)
layout, __ = get_page_layout(outpath)
# fix rotated PDF
chars = get_text_objects(layout, ltype="char")
horizontal_text = get_text_objects(layout, ltype="horizontal_text")
vertical_text = get_text_objects(layout, ltype="vertical_text")
images, chars, horizontal_text, vertical_text = get_image_char_and_text_objects(
layout
)
rotation = get_rotation(chars, horizontal_text, vertical_text)
if rotation != "":
outpath_new = "".join([froot.replace("page", "p"), "_rotated", fext])
Expand Down
8 changes: 4 additions & 4 deletions excalibur/www/static/js/workspace.js
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ const onSavedRuleClick = function (e) {

if (ruleOptions['flavor'].toLowerCase() == 'lattice') {
document.getElementById('process-background').value = ruleOptions['process_background'];
document.getElementById('line-size-scaling').value = ruleOptions['line_size_scaling'];
document.getElementById('line-scale').value = ruleOptions['line_scale'];
document.getElementById('split-text-l').value = ruleOptions['split_text'];
document.getElementById('flag-size-l').value = ruleOptions['flag_size'];
} else if (ruleOptions['flavor'].toLowerCase() == 'stream') {
Expand Down Expand Up @@ -176,14 +176,14 @@ const getRuleOptions = function () {
switch(flavor.toString().toLowerCase()) {
case 'lattice': {
ruleOptions['process_background'] = $("#process-background").val() ? true : false;
ruleOptions['line_size_scaling'] = $('#line-size-scaling').val() ? Number($('#line-size-scaling').val()) : 15;
ruleOptions['line_scale'] = $('#line-scale').val() ? Number($('#line-scale').val()) : 15;
ruleOptions['split_text'] = $("#split-text-l").val() ? true : false;
ruleOptions['flag_size'] = $("#flag-size-l").val() ? true : false;
break;
}
case 'stream': {
ruleOptions['row_close_tol'] = $('#row-close-tol').val() ? Number($('#line-size-scaling').val()) : 2;
ruleOptions['col_close_tol'] = $('#col-close-tol').val() ? Number($('#line-size-scaling').val()) : 0;
ruleOptions['row_close_tol'] = $('#row-close-tol').val() ? Number($('#line-scale').val()) : 2;
ruleOptions['col_close_tol'] = $('#col-close-tol').val() ? Number($('#line-scale').val()) : 0;
ruleOptions['split_text'] = $("#split-text-s").val() ? true : false;
ruleOptions['flag_size'] = $("#flag-size-s").val() ? true : false;
break;
Expand Down
1 change: 0 additions & 1 deletion public/.gitignore

This file was deleted.

11 changes: 0 additions & 11 deletions public/.travis.yml

This file was deleted.

1 change: 0 additions & 1 deletion public/CNAME

This file was deleted.

27 changes: 0 additions & 27 deletions public/LICENSE

This file was deleted.

74 changes: 0 additions & 74 deletions public/README.md

This file was deleted.

Loading
Loading