Skip to content

Commit

Permalink
fixed the html for the vocabs
Browse files Browse the repository at this point in the history
  • Loading branch information
cedricdcc committed Oct 23, 2024
1 parent 6fbdd5e commit e71b2d6
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 0 deletions.
44 changes: 44 additions & 0 deletions entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,11 @@ def combined_index_pub(
return toreturn


def camel_case(value):
words = value.split(" ")
return words[0].lower() + "".join(word.title() for word in words[1:])


def vocabpub(baseuri, nsfolder, nssub, nsname, outfolder, template_path):
log.debug(f"vocab to process: {nssub}/{nsname} in {nsfolder}")
log.debug(f"other params: baseuri={baseuri}, outfolder={outfolder}")
Expand Down Expand Up @@ -400,6 +405,45 @@ def vocabpub(baseuri, nsfolder, nssub, nsname, outfolder, template_path):
sink = SinkFactory.make_sink(args["output"], force_output=True)
settings = GeneratorSettings()
service.process(args["template_name"], source, settings, sink, args)

# take the output html and parse it with bs4
output_html = open(output_folder / output_name_html, "r")
log.debug(f"output_html={output_html}")
soup = bs4.BeautifulSoup(output_html, "html.parser")
# find all divs with class="concept entity"
# then find the <th>IRI</th> in that div
# replace the part after the # with the same part where the same string with not spaces and for each char after the space a capital letter
# then take the <td><code>https://example.org/pylode2pages-test/emobonOntology#enaProjAccNum</code></td> that is next to the <th>IRI</th>
# copy the text between the # and the </code> tag
# replace the div id with that text

toc_div = soup.find("div", id="toc")
for div in soup.find_all("div", class_="concept entity"):
try:
for th in div.find_all("th"):
if th.text == "IRI":
iri_element = th.find_next("td").find("code")
iri = th.find_next("td").find("code").text
log.debug(f"iri={iri}")
previous_id = div["id"]
# find the a href tag in the div with id "toc" and replace the href with the iri.split("#")[1]
for a in toc_div.find_all("a"):
if a["href"] == "#" + previous_id:

# begin with the changing of the href
new_id = camel_case(iri.split("#")[1])
log.debug(f"new_id={new_id}")
a["href"] = "#" + new_id
div["id"] = new_id
# replace the iri split part with the new_id
iri_element.string = iri.replace(iri.split("#")[1], new_id)
except:
pass

# write the soup back to the file
with open(output_folder / output_name_html, "w") as output_html:
output_html.write(str(soup))

# ttl generation
second_args = {
"input": input_file.__str__(),
Expand Down
38 changes: 38 additions & 0 deletions templates/template_ttl.ttl.bak
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX dc: <http://purl.org/dc/elements/1.1/>

{% for row in sets['_']%}
<{{vars_dict.baseuri}}/{{vars_dict.relref}}#{{row.PREFLABEL_EN}}>
rdf:type skos:Concept ;
dc:date "{{row.DATE}}" ;
dc:identifier "{{row.PREFLABEL_EN}}" ;
skos:prefLabel "{{row.PREFLABEL_EN}}"@en ;
{%-if '[' in row.BROADER -%}
{%-set BROADER = row.BROADER.replace('[','').replace(']','').split(';')%}
skos:broader {%for B in BROADER-%}
<{{vars_dict.baseuri}}/{{vars_dict.relref}}#{{B}}>
{%- if not loop.last -%}
,
{%- else -%}
{%- endif -%}
{%endfor%};
{%-endif%}
{%-if '[' in row.NARROWER -%}
{%-set NARROWER = row.NARROWER.replace('[','').replace(']','').split(';')%}
skos:narrower {%for N in NARROWER-%}
<{{vars_dict.baseuri}}/{{vars_dict.relref}}#{{N}}>
{%- if not loop.last -%}
,
{%- else -%}
{%- endif -%}
{%endfor%};
{%-endif%}
{%if row.DEFINITION_EN-%}
skos:definition "{{row.DEFINITION_EN}}"@en ;
{%-endif%}
{%if row.ALTLABEL_EN-%}
skos:altLabel "{{row.ALTLABEL_EN}}"
{%-endif%}
.
{% endfor %}

0 comments on commit e71b2d6

Please sign in to comment.