Skip to content

Commit

Permalink
fixed the reading of the first paragraphs before H2
Browse files Browse the repository at this point in the history
  • Loading branch information
Zulko committed Aug 3, 2024
1 parent 4144672 commit 47fef2d
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
2 changes: 1 addition & 1 deletion data_collection/utils/wikipedia.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def _extract_sections_from_wikipedia_page(
# Adding the intro section, before the first h2 tag
paragraphs = []
for element in soup.find_all(["p", "h2"]):
if element.name == "h2":
if element.name == "h2" and element.get_text() != "Contents":
break
if element.name == "p":
paragraphs.append(element.get_text())
Expand Down
8 changes: 8 additions & 0 deletions public/data/composers.json
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,14 @@
"birth_year": 1838,
"death_year": 1875
},
{
"full_name": "Felix Blumenfeld",
"wikipedia_url": "https://en.wikipedia.org/wiki/Felix_Blumenfeld",
"first_names": "Felix Mikhailovich",
"last_name": "Blumenfeld",
"birth_year": 1863,
"death_year": 1931
},
{
"full_name": "Luigi Boccherini",
"first_names": "Luigi",
Expand Down

0 comments on commit 47fef2d

Please sign in to comment.