Skip to content

Commit

Permalink
improved logging
Browse files Browse the repository at this point in the history
  • Loading branch information
esalonico committed Jun 27, 2023
1 parent 8fee512 commit 6b0ae15
Show file tree
Hide file tree
Showing 6 changed files with 342 additions and 429 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ temp.ipynb
simulate_data.ipynb

# logging
logs.log
logs/
8 changes: 4 additions & 4 deletions config.ini
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[routes]
; Format: [origin, destination, range_of_days_from_today]

muc_fco = ["MUC", "FCO", 90]
fco_muc = ["FCO", "MUC", 90]
fmm_fco = ["FMM", "FCO", 90]
fco_fmm = ["FCO", "FMM", 90]
muc_fco = ["MUC", "FCO", 2]
; fco_muc = ["FCO", "MUC", 90]
; fmm_fco = ["FMM", "FCO", 90]
; fco_fmm = ["FCO", "FMM", 90]
90 changes: 43 additions & 47 deletions flight_analysis.py
Original file line number Diff line number Diff line change
@@ -1,91 +1,87 @@
# author: Emanuele Salonico, 2023

import configparser
import json
import logging
from datetime import timedelta, datetime
import pandas as pd
import utils
import os

# logging
logger_name = os.path.basename(__file__)
logger = utils.setup_logger(logger_name)

import numpy as np
from os import path
import pandas as pd
from datetime import timedelta, datetime
import json
import configparser

from src.google_flight_analysis.scrape import Scrape
from src.google_flight_analysis.database import Database

import private.private as private

# config
config = configparser.ConfigParser()
config.read(path.join(path.dirname(__file__), "config.ini"))

# TODO: improve
logger = logging.getLogger("flight_analysis")
logger.setLevel('DEBUG')
log_format = '%(asctime)s - %(levelname)s - %(message)s'
file_handler = logging.FileHandler(path.join(path.dirname(__file__), "logs.log"))
stream_handler = logging.StreamHandler()
formatter = logging.Formatter(log_format)
file_handler.setFormatter(formatter)
stream_handler.setFormatter(formatter)
logger.addHandler(file_handler)
logger.addHandler(stream_handler)

config.read(os.path.join(os.path.dirname(__file__), "config.ini"))


def get_routes():
def get_routes_from_config():
"""
Returns a list of routes from the config file.
"""
routes = []
for route in config["routes"]:
routes.append(json.loads(config["routes"][route]))

return routes


if __name__ == "__main__":

# 1. scrape routes
routes = get_routes()
routes = get_routes_from_config()
all_results = []
all_iter_times = []

for route in routes:
origin = route[0]
destination = route[1]
date_range = [datetime.today() + timedelta(days=i+1) for i in range(route[2])]
date_range = [datetime.today() + timedelta(days=i+1)
for i in range(route[2])]
date_range = [date.strftime("%Y-%m-%d") for date in date_range]

for i, date in enumerate(date_range):
scrape = Scrape(origin, destination, date)

try:
time_start = datetime.now()
scrape.run_scrape()
time_end = datetime.now()

time_iteration = (time_end - time_start).seconds + round(((time_end - time_start).microseconds * 1e-6), 2)

time_iteration = (time_end - time_start).seconds + \
round(((time_end - time_start).microseconds * 1e-6), 2)
all_iter_times.append(time_iteration)
avg_iter_time = round(np.array(all_iter_times).mean(), 2)

logger.info(f"[{i+1}/{len(date_range)}] [{time_iteration} sec - avg: {avg_iter_time}] Scraped: {origin} {destination} {date} - {scrape.data.shape[0]} results")

logger.info(
f"[{i+1}/{len(date_range)}] [{time_iteration} sec - avg: {avg_iter_time}] Scraped: {origin} {destination} {date} - {scrape.data.shape[0]} results")
all_results.append(scrape.data)
except Exception as e:
logger.error(f"[{i+1}/{len(date_range)}] ERROR WITH {origin} {destination} {date}")
logger.error(
f"[{i+1}/{len(date_range)}] ERROR WITH {origin} {destination} {date}")
logger.error(e)

all_results_df = pd.concat(all_results)
logging.debug(all_results_df.head())

# logging.debug(all_results_df.head())

# TODO: implement push to database after every route (error handling basically)

# 2. add results to postgresql

# connect to database
db = Database(db_host=private.DB_HOST, db_name=private.DB_NAME, db_user=private.DB_USER, db_pw=private.DB_PW, db_table=private.DB_TABLE)
print(db.list_all_databases())

# prepare database and tables
db.prepare_db_and_tables(overwrite_table=False)

# add results to database
db.add_pandas_df_to_db(all_results_df)

# db = Database(db_host=private.DB_HOST, db_name=private.DB_NAME, db_user=private.DB_USER, db_pw=private.DB_PW, db_table=private.DB_TABLE)
# print(db.list_all_databases())

# # prepare database and tables
# db.prepare_db_and_tables(overwrite_table=False)

# # add results to database
# db.add_pandas_df_to_db(all_results_df)
Loading

0 comments on commit 6b0ae15

Please sign in to comment.