Skip to content

Commit

Permalink
Use common root directory for synchronizing sub-trees
Browse files Browse the repository at this point in the history
  • Loading branch information
hunyadi committed Nov 15, 2024
1 parent b431cf3 commit 46db83e
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 24 deletions.
1 change: 1 addition & 0 deletions integration_tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def test_markdown(self) -> None:
document = ConfluenceDocument(
self.sample_dir / "index.md",
ConfluenceDocumentOptions(),
self.sample_dir,
{},
)
self.assertListEqual(document.links, [])
Expand Down
27 changes: 21 additions & 6 deletions md2conf/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,31 @@ def synchronize(self, path: Path) -> None:
else:
raise ValueError(f"expected: valid file or directory path; got: {path}")

def synchronize_page(self, page_path: Path) -> None:
def synchronize_page(
self, page_path: Path, root_dir: Optional[Path] = None
) -> None:
"Synchronizes a single Markdown page with Confluence."

page_path = page_path.resolve(True)
self._synchronize_page(page_path, {})
if root_dir is None:
root_dir = page_path.parent
else:
root_dir = root_dir.resolve(True)

def synchronize_directory(self, local_dir: Path) -> None:
self._synchronize_page(page_path, root_dir, {})

def synchronize_directory(
self, local_dir: Path, root_dir: Optional[Path] = None
) -> None:
"Synchronizes a directory of Markdown pages with Confluence."

LOGGER.info("Synchronizing directory: %s", local_dir)
local_dir = local_dir.resolve(True)
if root_dir is None:
root_dir = local_dir
else:
root_dir = root_dir.resolve(True)

LOGGER.info("Synchronizing directory: %s", local_dir)

# Step 1: build index of all page metadata
page_metadata: Dict[Path, ConfluencePageMetadata] = {}
Expand All @@ -76,17 +90,18 @@ def synchronize_directory(self, local_dir: Path) -> None:

# Step 2: convert each page
for page_path in page_metadata.keys():
self._synchronize_page(page_path, page_metadata)
self._synchronize_page(page_path, root_dir, page_metadata)

def _synchronize_page(
self,
page_path: Path,
root_dir: Path,
page_metadata: Dict[Path, ConfluencePageMetadata],
) -> None:
base_path = page_path.parent

LOGGER.info("Synchronizing page: %s", page_path)
document = ConfluenceDocument(page_path, self.options, page_metadata)
document = ConfluenceDocument(page_path, self.options, root_dir, page_metadata)

if document.id.space_key:
with self.api.switch_space(document.id.space_key):
Expand Down
22 changes: 13 additions & 9 deletions md2conf/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,8 @@ class ConfluenceStorageFormatConverter(NodeVisitor):

options: ConfluenceConverterOptions
path: Path
base_path: Path
base_dir: Path
root_dir: Path
links: List[str]
images: List[str]
embedded_images: Dict[str, bytes]
Expand All @@ -311,12 +312,14 @@ def __init__(
self,
options: ConfluenceConverterOptions,
path: Path,
root_dir: Path,
page_metadata: Dict[Path, ConfluencePageMetadata],
) -> None:
super().__init__()
self.options = options
self.path = path
self.base_path = path.parent
self.base_dir = path.parent
self.root_dir = root_dir
self.links = []
self.images = []
self.embedded_images = {}
Expand Down Expand Up @@ -383,18 +386,16 @@ def _transform_link(self, anchor: ET._Element) -> Optional[ET._Element]:
# convert the relative URL to absolute URL based on the base path value, then look up
# the absolute path in the page metadata dictionary to discover the relative path
# within Confluence that should be used
absolute_path = (self.base_path / relative_url.path).absolute()
if not str(absolute_path).startswith(str(self.base_path)):
msg = f"relative URL {url} points to outside base path: {self.base_path}"
absolute_path = (self.base_dir / relative_url.path).resolve(True)
if not str(absolute_path).startswith(str(self.root_dir)):
msg = f"relative URL {url} points to outside root path: {self.root_dir}"
if self.options.ignore_invalid_url:
LOGGER.warning(msg)
anchor.attrib.pop("href")
return None
else:
raise DocumentError(msg)

relative_path = os.path.relpath(absolute_path, self.base_path)

link_metadata = self.page_metadata.get(absolute_path)
if link_metadata is None:
msg = f"unable to find matching page for URL: {url}"
Expand All @@ -405,6 +406,7 @@ def _transform_link(self, anchor: ET._Element) -> Optional[ET._Element]:
else:
raise DocumentError(msg)

relative_path = os.path.relpath(absolute_path, self.base_dir)
LOGGER.debug(
"found link to page %s with metadata: %s", relative_path, link_metadata
)
Expand Down Expand Up @@ -437,7 +439,7 @@ def _transform_image(self, image: ET._Element) -> ET._Element:
relative_path = Path(path)
if (
relative_path.suffix == ".svg"
and (self.base_path / relative_path.with_suffix(".png")).exists()
and (self.base_dir / relative_path.with_suffix(".png")).exists()
):
path = str(relative_path.with_suffix(".png"))

Expand Down Expand Up @@ -944,10 +946,11 @@ def __init__(
self,
path: Path,
options: ConfluenceDocumentOptions,
root_dir: Path,
page_metadata: Dict[Path, ConfluencePageMetadata],
) -> None:
self.options = options
path = path.absolute()
path = path.resolve(True)

with open(path, "r", encoding="utf-8") as f:
text = f.read()
Expand Down Expand Up @@ -1001,6 +1004,7 @@ def __init__(
webui_links=self.options.webui_links,
),
path,
root_dir,
page_metadata,
)
converter.visit(self.root)
Expand Down
38 changes: 29 additions & 9 deletions md2conf/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import logging
import os
from pathlib import Path
from typing import Dict, List
from typing import Dict, List, Optional

from .converter import (
ConfluenceDocument,
Expand Down Expand Up @@ -42,15 +42,22 @@ def process(self, path: Path) -> None:
if path.is_dir():
self.process_directory(path)
elif path.is_file():
self.process_page(path, {})
self.process_page(path)
else:
raise ValueError(f"expected: valid file or directory path; got: {path}")

def process_directory(self, local_dir: Path) -> None:
def process_directory(
self, local_dir: Path, root_dir: Optional[Path] = None
) -> None:
"Recursively scans a directory hierarchy for Markdown files."

LOGGER.info("Synchronizing directory: %s", local_dir)
local_dir = local_dir.resolve(True)
if root_dir is None:
root_dir = local_dir
else:
root_dir = root_dir.resolve(True)

LOGGER.info("Synchronizing directory: %s", local_dir)

# Step 1: build index of all page metadata
page_metadata: Dict[Path, ConfluencePageMetadata] = {}
Expand All @@ -59,15 +66,28 @@ def process_directory(self, local_dir: Path) -> None:

# Step 2: convert each page
for page_path in page_metadata.keys():
self.process_page(page_path, page_metadata)
self._process_page(page_path, root_dir, page_metadata)

def process_page(
self, path: Path, page_metadata: Dict[Path, ConfluencePageMetadata]
) -> None:
def process_page(self, path: Path, root_dir: Optional[Path] = None) -> None:
"Processes a single Markdown file."

path = path.resolve(True)
document = ConfluenceDocument(path, self.options, page_metadata)
if root_dir is None:
root_dir = path.parent
else:
root_dir = root_dir.resolve(True)

self._process_page(path, root_dir, {})

def _process_page(
self,
path: Path,
root_dir: Path,
page_metadata: Dict[Path, ConfluencePageMetadata],
) -> None:
"Processes a single Markdown file."

document = ConfluenceDocument(path, self.options, root_dir, page_metadata)
content = document.xhtml()
with open(path.with_suffix(".csf"), "w", encoding="utf-8") as f:
f.write(content)
Expand Down
5 changes: 5 additions & 0 deletions tests/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def test_markdown(self) -> None:
actual = ConfluenceDocument(
self.source_dir / f"{name}.md",
ConfluenceDocumentOptions(),
self.source_dir,
{},
).xhtml()
actual = standardize(actual)
Expand All @@ -87,6 +88,7 @@ def test_broken_links(self) -> None:
actual = ConfluenceDocument(
self.source_dir / "missing.md",
ConfluenceDocumentOptions(ignore_invalid_url=True),
self.source_dir,
{},
).xhtml()
actual = standardize(actual)
Expand All @@ -100,6 +102,7 @@ def test_heading_anchors(self) -> None:
actual = ConfluenceDocument(
self.source_dir / "anchors.md",
ConfluenceDocumentOptions(heading_anchors=True),
self.source_dir,
{},
).xhtml()
actual = standardize(actual)
Expand All @@ -117,6 +120,7 @@ def test_mermaid_embedded_svg(self) -> None:
render_mermaid=True,
diagram_output_format="svg",
),
self.source_dir,
{},
)
self.assertEqual(len(document.embedded_images), 6)
Expand All @@ -129,6 +133,7 @@ def test_mermaid_embedded_png(self) -> None:
render_mermaid=True,
diagram_output_format="png",
),
self.source_dir,
{},
)
self.assertEqual(len(document.embedded_images), 6)
Expand Down

0 comments on commit 46db83e

Please sign in to comment.