Skip to content

Commit

Permalink
feat: add dest argument (#15)
Browse files Browse the repository at this point in the history
* feat: add dest argument

---------

Signed-off-by: Frost Ming <[email protected]>
  • Loading branch information
frostming authored Jan 4, 2024
1 parent f9ac88e commit d177558
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 120 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -167,4 +167,3 @@ cython_debug/
*.epub
*.zip
.DS_Store

56 changes: 25 additions & 31 deletions epubhv/cli.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,36 @@
from dataclasses import dataclass
from argparse import ArgumentParser, RawTextHelpFormatter
from pathlib import Path
from argparse import ArgumentParser, Namespace, RawTextHelpFormatter
from typing import cast

from epubhv.epubhv import EPUBHV, list_all_epub_in_dir


@dataclass
class Options:
epub: str
v: bool
h: bool
method: str
convert: str
punctuation: str
ruby: bool
cantonese: bool
dest: Path


def main() -> None:
parser: ArgumentParser = ArgumentParser(formatter_class=RawTextHelpFormatter)
parser = ArgumentParser(formatter_class=RawTextHelpFormatter)
parser.add_argument("epub", help="file or dir that contains epub files to change")
parser.add_argument(
"--v",
dest="v",
action="store_true",
dest="method",
action="store_const",
const="to_vertical",
default="to_vertical",
help="change all the epub files to vertical.",
)
parser.add_argument(
"--h",
dest="h",
action="store_true",
dest="method",
action="store_const",
const="to_horizontal",
help="change all the epub files to hortical.",
)
parser.add_argument(
Expand All @@ -43,6 +45,13 @@ def main() -> None:
action="store_true",
help="Ruby it for cantonese.",
)
parser.add_argument(
"-d",
"--dest",
help="destination dir to save the epub files, default to current directory",
default=".",
type=Path,
)

parser.add_argument(
"--punctuation",
Expand Down Expand Up @@ -96,29 +105,14 @@ def main() -> None:
""",
)

raw_args: Namespace = parser.parse_args()
options: Options = Options(
epub=raw_args.epub,
v=raw_args.v,
h=raw_args.h,
convert=raw_args.convert,
punctuation=raw_args.punctuation,
ruby=raw_args.ruby,
cantonese=raw_args.cantonese,
)
options = cast(Options, parser.parse_args())
epub_files = Path(options.epub)
# default is to to_vertical
method: str = "to_vertical"
if options.h:
method = "to_horizontal"
elif options.v:
method = "to_vertical"
if epub_files.exists():
if epub_files.is_dir():
files: set[Path] = list_all_epub_in_dir(path=epub_files)
f: Path
for f in files:
print(f"{str(f)} is {method}")
print(f"{str(f)} is {options.method}")
try:
epubhv: EPUBHV = EPUBHV(
file_path=f,
Expand All @@ -127,18 +121,18 @@ def main() -> None:
need_ruby=options.ruby,
need_cantonese=options.cantonese,
)
epubhv.run(method=method)
epubhv.run(method=options.method, dest=options.dest)
except Exception as e:
print(f"{str(f)} {method} is failed by {str(e)}")
print(f"{str(f)} {options.method} is failed by {str(e)}")
else:
print(f"{str(epub_files)} is {method}")
print(f"{str(epub_files)} is {options.method}")
epubhv: EPUBHV = EPUBHV(
file_path=epub_files,
convert_to=options.convert,
need_ruby=options.ruby,
need_cantonese=options.cantonese,
)
epubhv.run(method=method)
epubhv.run(method=options.method, dest=options.dest)
else:
raise Exception("Please make sure it is a dir contains epub or is a epub file.")

Expand Down
61 changes: 28 additions & 33 deletions epubhv/epubhv.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import os
import shutil
import zipfile
from collections import defaultdict, Counter
from collections import Counter, defaultdict
from pathlib import Path
from typing import Dict, List, Optional

Expand All @@ -15,11 +15,10 @@
from bs4 import NavigableString, PageElement, ResultSet, Tag
from cssutils import CSSParser
from cssutils.css import CSSStyleSheet
from langdetect import LangDetectException, detect

from epubhv.punctuation import Punctuation
from epubhv.yomituki import RubySoup, string_containers # pyright: ignore
from langdetect import detect, LangDetectException


cssutils.log.setLevel(logging.CRITICAL) # type: ignore

Expand Down Expand Up @@ -66,40 +65,31 @@ def load_opf_meta_data(opf_file: Path) -> bs:


class EPUBHV:
book_path: Path
book_name: str
opf_file: Path

def __init__(
self,
file_path: Path,
convert_to: Optional[str] = None,
convert_punctuation: Optional[str] = "auto",
convert_punctuation: str = "auto",
need_ruby: bool = False,
need_cantonese: bool = False,
) -> None:
# declare instance fields
self.epub_file: Path
self.epub_file = file_path
self.has_css_file: bool = False
# for language ruby
self.need_ruby: bool = need_ruby
self.ruby_language = None
self.cantonese = need_cantonese
self.files_dict: Dict[str, List[Path]] = {}
self.content_files_list: List[Path] = []
self.book_path: Path
self.book_name: str
self.opf_file: Path
self.converter: Optional[opencc.OpenCC]
self.convert_to: Optional[str]
self.convert_punctuation: str = "auto"

# initialize instance fields
self.epub_file = file_path
if convert_to is not None:
self.converter = opencc.OpenCC(convert_to)
self.convert_to = convert_to
else:
self.converter = None
self.convert_to = None
if convert_punctuation:
self.convert_punctuation = convert_punctuation
self.convert_punctuation = convert_punctuation
self.convert_to = convert_to

self.converter = opencc.OpenCC(convert_to) if convert_to is not None else None

def extract_one_epub_to_dir(self) -> None:
assert self.epub_file.suffix == ".epub", f"{self.epub_file} Must be epub file"
Expand Down Expand Up @@ -342,9 +332,11 @@ def convert(self, method: str = "to_vertical") -> None:
content: str = f.read()
soup: bs = bs(content, "html.parser")
if self.converter:
html_element: Optional[Tag | NavigableString] = soup.find("html")
assert html_element is not None
text_elements: ResultSet[PageElement] = html_element.find_all(string=True) # type: ignore
html_element = soup.find("html")
assert isinstance(html_element, Tag)
text_elements: ResultSet[PageElement] = html_element.find_all(
string=True
) # type: ignore

element: Tag
for element in text_elements: # type: ignore
Expand Down Expand Up @@ -386,22 +378,25 @@ def convert(self, method: str = "to_vertical") -> None:
with open(html_file, "w", encoding="utf-8") as file:
file.write(ruby_soup.prettify())

def pack(self, method: str = "to_vertical") -> None:
lang: str = "original"
def pack(self, method: str = "to_vertical", dest: Path = Path.cwd()) -> None:
lang = "original"
if self.convert_to is not None:
lang = self.convert_to
if self.need_ruby:
lang = f"{lang}-ruby"
if method == "to_vertical":
book_name: str = f"{self.book_name}-v-{lang}.epub"
book_name = f"{self.book_name}-v-{lang}.epub"
else:
book_name: str = f"{self.book_name}-h-{lang}.epub"
book_name = f"{self.book_name}-h-{lang}.epub"
pack_to = dest / book_name

shutil.make_archive(base_name=book_name, format="zip", root_dir=self.book_path)
os.rename(src=book_name + ".zip", dst=book_name)
shutil.make_archive(
base_name=str(pack_to), format="zip", root_dir=self.book_path
)
os.rename(src=f"{pack_to}.zip", dst=pack_to)
shutil.rmtree(self.book_path)

def run(self, method: str = "to_vertical") -> None:
def run(self, method: str = "to_vertical", dest: Path = Path.cwd()) -> None:
assert method in [
"to_horizontal",
"to_vertical",
Expand All @@ -416,4 +411,4 @@ def run(self, method: str = "to_vertical") -> None:
raise Exception("Only support epub to vertical or horizontal for now")

self.convert(method=method)
self.pack(method=method)
self.pack(method=method, dest=dest)
2 changes: 0 additions & 2 deletions pdm.toml

This file was deleted.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ requires = ["pdm-backend"]
build-backend = "pdm.backend"

[tool.pdm.build]
include = ["epubhv"]
includes = ["epubhv"]

[tool.pdm.scripts]
format = "black ."
Expand Down
88 changes: 36 additions & 52 deletions tests/test_epubhv.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
from pathlib import Path
from shutil import rmtree
from typing import Dict, List
Expand All @@ -13,19 +12,21 @@
make_epub_files_dict,
)

TEST_DIR = Path(__file__).with_name("test_epub")


@pytest.fixture
def epub() -> EPUBHV:
return EPUBHV(file_path=Path("tests/test_epub/animal_farm.epub"))
return EPUBHV(file_path=TEST_DIR / "animal_farm.epub")


def test_find_epub_books() -> None:
assert list_all_epub_in_dir(Path("tests/test_epub")) == {
Path("tests/test_epub/animal_farm.epub"),
Path("tests/test_epub/books/animal.epub"),
Path("tests/test_epub/Liber_Esther.epub"),
Path("tests/test_epub/books/lemo.epub"),
Path("tests/test_epub/sanguo.epub"),
assert list_all_epub_in_dir(TEST_DIR) == {
TEST_DIR / "animal_farm.epub",
TEST_DIR / "books/animal.epub",
TEST_DIR / "Liber_Esther.epub",
TEST_DIR / "books/lemo.epub",
TEST_DIR / "sanguo.epub",
}


Expand All @@ -47,38 +48,29 @@ def test_make_files_dict(epub: EPUBHV) -> None:
rmtree(".epub_temp_dir/animal_farm")


def test_change_epub_to_vertical():
epub_file = Path("animal_farm-v-original.epub")
epub_file.unlink(True)
b = EPUBHV(Path("tests/test_epub/animal_farm.epub"))
b.run()
assert b.opf_file == Path(".epub_temp_dir/animal_farm/content.opf")
epub_file.unlink(True)
def test_change_epub_to_vertical(epub: EPUBHV, tmp_path: Path) -> None:
epub.run(dest=tmp_path)
assert epub.opf_file == Path(".epub_temp_dir/animal_farm/content.opf")
assert tmp_path.joinpath("animal_farm-v-original.epub").exists()


def test_find_epub_css_files():
lemo_output = Path("lemo-h-original.epub")
lemo_output.unlink(True)
b = EPUBHV(Path("tests/test_epub/animal_farm.epub"))
def test_find_epub_css_files(tmp_path: Path) -> None:
lemo_output = tmp_path / "lemo-h-original.epub"
b = EPUBHV(TEST_DIR / "animal_farm.epub")
b.make_epub_values()
assert b.has_css_file is False
b.run()
os.remove("animal_farm-v-original.epub")
f: EPUBHV = EPUBHV(Path("tests/test_epub/books/lemo.epub"))
f.run("to_horizontal")
b.run(dest=tmp_path)

f = EPUBHV(TEST_DIR / "books/lemo.epub")
f.run("to_horizontal", dest=tmp_path)
assert lemo_output.exists()
lemo_output.unlink(True)


def test_change_epub_covert() -> None:
if os.path.exists("sanguo-v-s2t-v-original.epub"):
os.remove("sanguo-v-s2t-v-original.epub")
if os.path.exists("sanguo-v-s2t.epub"):
os.remove("sanguo-v-s2t.epub")
f: EPUBHV = EPUBHV(Path("tests/test_epub/sanguo.epub"), "s2t")
f.run("to_vertical")
assert os.path.exists("sanguo-v-s2t.epub") is True
q: EPUBHV = EPUBHV(Path("sanguo-v-s2t.epub"))


def test_change_epub_covert(tmp_path: Path) -> None:
f = EPUBHV(TEST_DIR / "sanguo.epub", "s2t")
f.run("to_vertical", dest=tmp_path)
assert tmp_path.joinpath("sanguo-v-s2t.epub").exists()
q = EPUBHV(tmp_path.joinpath("sanguo-v-s2t.epub"))
q.extract_one_epub_to_dir()
q.make_epub_values()
has_t_count: int = 0
Expand All @@ -92,31 +84,23 @@ def test_change_epub_covert() -> None:
if r.find("滾滾長江東逝水") > 0:
has_t_count += 1
assert has_t_count > 0
q.run("to_vertical")
os.remove("sanguo-v-s2t.epub")
os.remove("sanguo-v-s2t-v-original.epub")
q.run("to_vertical", dest=tmp_path)


def test_ruby() -> None:
lemo_output = Path("lemo-h-original-ruby.epub")
lemo_output.unlink(True)
f: EPUBHV = EPUBHV(Path("tests/test_epub/books/lemo.epub"), need_ruby=True)
f.run("to_horizontal")
def test_ruby(tmp_path: Path) -> None:
lemo_output = tmp_path / "lemo-h-original-ruby.epub"
f = EPUBHV(TEST_DIR / "books/lemo.epub", need_ruby=True)
f.run("to_horizontal", dest=tmp_path)
assert f.ruby_language == "ja"
assert lemo_output.exists()
lemo_output.unlink(True)


def test_cantonese() -> None:
animal_output = Path("animal-h-original-ruby.epub")
animal_output.unlink(True)
f: EPUBHV = EPUBHV(
Path("tests/test_epub/books/animal.epub"), need_ruby=True, need_cantonese=True
)
f.run("to_horizontal")
def test_cantonese(tmp_path: Path) -> None:
animal_output = tmp_path / "animal-h-original-ruby.epub"
f = EPUBHV(TEST_DIR / "books/animal.epub", need_ruby=True, need_cantonese=True)
f.run("to_horizontal", dest=tmp_path)
assert f.ruby_language == "cantonese"
assert animal_output.exists()
animal_output.unlink(True)


def test_punctuation():
Expand Down

0 comments on commit d177558

Please sign in to comment.