Skip to content

Commit

Permalink
feat: add validation function
Browse files Browse the repository at this point in the history
  • Loading branch information
alimghmi committed Oct 22, 2023
1 parent 59fdced commit 6cb45fe
Showing 1 changed file with 10 additions and 2 deletions.
12 changes: 10 additions & 2 deletions scraper/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ def fetch(self) -> pd.DataFrame:
f"Successfully fetched content from {self.url}. Now parsing the content."
)
df = self.parse_html(content)
logger.info(f"Parsed content from {self.url}. Extracted {len(df)} rows.")
logger.debug(f"\n{df}")
self.validate_data(df)
logger.info(f"Parsed content from {self.url}. Extracted {len(df)} rows.")
return df

def get_content(self):
Expand All @@ -34,7 +35,7 @@ def parse_html(self, content: str) -> pd.DataFrame:
try:
dfs = pd.read_html(content)
except Exception as e:
logger.error(f"Error reading HTML via pandas. Error: {e}")
logger.error(f"Error parsing HTML via pandas. Error: {e}")
raise e

if len(dfs):
Expand All @@ -44,3 +45,10 @@ def parse_html(self, content: str) -> pd.DataFrame:
return dfs[0]
else:
raise ValueError(f"No data found when parsing content from {self.url}.")

def validate_data(self, df: pd.DataFrame) -> None:
if df["Clearing Date"].str.contains("No data found").any() or not len(df):
logger.error(f"Data validation failed")
raise ValueError("No price data provided by the provider as of now.")

logger.debug("Data validation succeeded.")

0 comments on commit 6cb45fe

Please sign in to comment.