Skip to content
This repository has been archived by the owner on May 8, 2024. It is now read-only.

Commit

Permalink
feat: make div ID deterministic
Browse files Browse the repository at this point in the history
  • Loading branch information
ninpnin committed Feb 9, 2024
1 parent 13c5b7e commit 3007ff5
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions scripts/add_uuid_to_divs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
)
from tqdm import tqdm
import argparse, multiprocessing

from pathlib import Path



Expand All @@ -28,7 +28,9 @@ def add_div_ids(protocol):
divs = body.findall(f"{tei_ns}div")
#print(len(divs), protocol)
for div in divs:
x = div.attrib.get(f"{xml_ns}id", get_formatted_uuid())
protocol_id = Path(protocol).stem
seed_str = f"{protocol_id}\n{' '.join(div.itertext())}"
x = div.attrib.get(f"{xml_ns}id", get_formatted_uuid(seed_str))
div.attrib[f"{xml_ns}id"] = x
num_ids += 1
ids.add(x)
Expand Down

0 comments on commit 3007ff5

Please sign in to comment.