Skip to content

Commit 47fef2d

Browse files
committed
fixed the reading of the first paragraphs before H2
1 parent 4144672 commit 47fef2d

File tree

2 files changed

+9
-1
lines changed

2 files changed

+9
-1
lines changed

data_collection/utils/wikipedia.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def _extract_sections_from_wikipedia_page(
6161
# Adding the intro section, before the first h2 tag
6262
paragraphs = []
6363
for element in soup.find_all(["p", "h2"]):
64-
if element.name == "h2":
64+
if element.name == "h2" and element.get_text() != "Contents":
6565
break
6666
if element.name == "p":
6767
paragraphs.append(element.get_text())

public/data/composers.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,14 @@
104104
"birth_year": 1838,
105105
"death_year": 1875
106106
},
107+
{
108+
"full_name": "Felix Blumenfeld",
109+
"wikipedia_url": "https://en.wikipedia.org/wiki/Felix_Blumenfeld",
110+
"first_names": "Felix Mikhailovich",
111+
"last_name": "Blumenfeld",
112+
"birth_year": 1863,
113+
"death_year": 1931
114+
},
107115
{
108116
"full_name": "Luigi Boccherini",
109117
"first_names": "Luigi",

0 commit comments

Comments
 (0)