Skip to content

Commit

Permalink
Added CLI tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
afourney committed Feb 12, 2025
1 parent 97eeed5 commit f60b41b
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 0 deletions.
2 changes: 2 additions & 0 deletions packages/markitdown/src/markitdown/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#
# SPDX-License-Identifier: MIT

from .__about__ import __version__
from ._markitdown import MarkItDown
from ._exceptions import (
MarkItDownException,
Expand All @@ -12,6 +13,7 @@
from .converters import DocumentConverter, DocumentConverterResult

__all__ = [
"__version__",
"MarkItDown",
"DocumentConverter",
"DocumentConverterResult",
Expand Down
119 changes: 119 additions & 0 deletions packages/markitdown/tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#!/usr/bin/env python3 -m pytest
import os
import subprocess
import pytest
from markitdown import __version__

try:
from .test_markitdown import TEST_FILES_DIR, DOCX_TEST_STRINGS
except ImportError:
from test_markitdown import TEST_FILES_DIR, DOCX_TEST_STRINGS


@pytest.fixture(scope="session")
def shared_tmp_dir(tmp_path_factory):
return tmp_path_factory.mktemp("pytest_tmp")


def test_version(shared_tmp_dir) -> None:
result = subprocess.run(
["python", "-m", "markitdown", "--version"], capture_output=True, text=True
)

assert result.returncode == 0, f"CLI exited with error: {result.stderr}"
assert __version__ in result.stdout, f"Version not found in output: {result.stdout}"


def test_invalid_flag(shared_tmp_dir) -> None:
result = subprocess.run(
["python", "-m", "markitdown", "--foobar"], capture_output=True, text=True
)

assert result.returncode != 0, f"CLI exited with error: {result.stderr}"
assert (
"unrecognized arguments" in result.stderr
), f"Expected 'unrecognized arguments' to appear in STDERR"
assert "SYNTAX" in result.stderr, f"Expected 'SYNTAX' to appear in STDERR"


def test_output_to_stdout(shared_tmp_dir) -> None:
# DOC X
result = subprocess.run(
["python", "-m", "markitdown", os.path.join(TEST_FILES_DIR, "test.docx")],
capture_output=True,
text=True,
)

assert result.returncode == 0, f"CLI exited with error: {result.stderr}"
for test_string in DOCX_TEST_STRINGS:
assert (
test_string in result.stdout
), f"Expected string not found in output: {test_string}"


def test_output_to_file(shared_tmp_dir) -> None:
# DOC X, flag -o at the end
docx_output_file_1 = os.path.join(shared_tmp_dir, "test_docx_1.md")
result = subprocess.run(
[
"python",
"-m",
"markitdown",
os.path.join(TEST_FILES_DIR, "test.docx"),
"-o",
docx_output_file_1,
],
capture_output=True,
text=True,
)

assert result.returncode == 0, f"CLI exited with error: {result.stderr}"
assert os.path.exists(
docx_output_file_1
), f"Output file not created: {docx_output_file_1}"

with open(docx_output_file_1, "r") as f:
output = f.read()
for test_string in DOCX_TEST_STRINGS:
assert (
test_string in output
), f"Expected string not found in output: {test_string}"

# DOC X, flag -o at the beginning
docx_output_file_2 = os.path.join(shared_tmp_dir, "test_docx_2.md")
result = subprocess.run(
[
"python",
"-m",
"markitdown",
"-o",
docx_output_file_2,
os.path.join(TEST_FILES_DIR, "test.docx"),
],
capture_output=True,
text=True,
)

assert result.returncode == 0, f"CLI exited with error: {result.stderr}"
assert os.path.exists(
docx_output_file_2
), f"Output file not created: {docx_output_file_2}"

with open(docx_output_file_2, "r") as f:
output = f.read()
for test_string in DOCX_TEST_STRINGS:
assert (
test_string in output
), f"Expected string not found in output: {test_string}"


if __name__ == "__main__":
"""Runs this file's tests from the command line."""
import tempfile

with tempfile.TemporaryDirectory() as tmp_dir:
test_version(tmp_dir)
test_invalid_flag(tmp_dir)
test_output_to_stdout(tmp_dir)
test_output_to_file(tmp_dir)
print("All tests passed!")

0 comments on commit f60b41b

Please sign in to comment.