From 47fef2d633ff609ce88385a092c6c9805dffc35a Mon Sep 17 00:00:00 2001 From: zulko Date: Sat, 3 Aug 2024 18:45:06 -0400 Subject: [PATCH] fixed the reading of the first paragraphs before H2 --- data_collection/utils/wikipedia.py | 2 +- public/data/composers.json | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/data_collection/utils/wikipedia.py b/data_collection/utils/wikipedia.py index 47b4f76..e13bd46 100644 --- a/data_collection/utils/wikipedia.py +++ b/data_collection/utils/wikipedia.py @@ -61,7 +61,7 @@ def _extract_sections_from_wikipedia_page( # Adding the intro section, before the first h2 tag paragraphs = [] for element in soup.find_all(["p", "h2"]): - if element.name == "h2": + if element.name == "h2" and element.get_text() != "Contents": break if element.name == "p": paragraphs.append(element.get_text()) diff --git a/public/data/composers.json b/public/data/composers.json index 420a39d..ae47b90 100644 --- a/public/data/composers.json +++ b/public/data/composers.json @@ -104,6 +104,14 @@ "birth_year": 1838, "death_year": 1875 }, + { + "full_name": "Felix Blumenfeld", + "wikipedia_url": "https://en.wikipedia.org/wiki/Felix_Blumenfeld", + "first_names": "Felix Mikhailovich", + "last_name": "Blumenfeld", + "birth_year": 1863, + "death_year": 1931 + }, { "full_name": "Luigi Boccherini", "first_names": "Luigi",