Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deenasun/81 feat schedule the web scraper #88

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions api/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from flask import Flask
from webscraper.database import *
from webscraper.nyserda_scraper import *

app = Flask(__name__)

# flask run --port 5328 --debug


@app.route("/api/hello", methods=["GET"])
def hello_world():
return "Hello, World!"


@app.route("/api/nyserda_large", methods=["GET"])
async def run_nyserda_large():
nyserda_large_to_database()
return {"message": "Hello NYSERDA Large"}


@app.route("/api/nyserda_solar", methods=["GET"])
async def run_nyserda_solar():
nyserda_solar_to_database()
return {"message": "Helo NYSERDA Solar"}


@app.route("/api/nyiso", methods=["GET"])
async def run_nyiso():
nyiso_to_database()
return {"message": "Hello NYISO"}


@app.route("/api/ores_noi", methods=["GET"])
async def run_ores_noi():
ores_noi_to_database()
return {"message": "Hello ORES NOI"}


@app.route("/api/ores_under_review", methods=["GET"])
async def run_ores_under_review():
ores_under_review_to_database()
return {"message": "Hello ORES Under Review"}


@app.route("/api/ores_permitted", methods=["GET"])
async def run_ores_permitted():
ores_permitted_to_database()
return {"message": "Hello ORES Permitted"}


if __name__ == "__main__":
app.run(port=5328, debug=True)
Empty file added api/webscraper/__init__.py
Empty file.
Binary file modified api/webscraper/__pycache__/database_constants.cpython-312.pyc
Binary file not shown.
Binary file modified api/webscraper/__pycache__/nyiso_scraper.cpython-312.pyc
Binary file not shown.
Binary file modified api/webscraper/__pycache__/nyserda_scraper.cpython-312.pyc
Binary file not shown.
15 changes: 9 additions & 6 deletions api/webscraper/database.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,30 @@
import os
from dotenv import load_dotenv
from datetime import datetime
from dateutil import tz
from supabase import create_client, Client
from geocodio import GeocodioClient

from nyserda_scraper import query_nyserda_large, query_nyserda_solar_repeat
from nyiso_scraper import (
from .nyserda_scraper import query_nyserda_large, query_nyserda_solar_repeat
from .nyiso_scraper import (
filter_nyiso_iq_sheet,
filter_nyiso_cluster_sheet,
filter_nyiso_in_service_sheet,
filter_nyiso_withdrawn_sheets,
)
from ores_scraper import query_ores_noi, query_ores_under_review, query_ores_permitted
from utils.scraper_utils import (
from .ores_scraper import query_ores_noi, query_ores_under_review, query_ores_permitted
from .utils.scraper_utils import (
geocode_lat_long,
create_update_object,
update_kdm,
update_last_updated,
)
from database_constants import (
from .database_constants import (
initial_kdm,
)

load_dotenv(os.path.join(os.path.dirname(__file__), "../../.env.local"))

url: str = os.environ.get("NEXT_PUBLIC_SUPABASE_URL")
key: str = os.environ.get("NEXT_PUBLIC_SUPABASE_ANON_KEY")
supabase: Client = create_client(url, key)
Expand All @@ -48,7 +51,7 @@ def nyserda_large_to_database() -> None:
In the case that the project is cancelled, we delete the project from the Supabase database.
"""
database = []
database.extend(query_nyserda_large())
database.extend(query_nyserda_large()[:10])
for project in database:
if project.get("proposed_cod", None) is not None:
ymd = datetime.strptime(project.get("proposed_cod"), "%Y").strftime(
Expand Down
4 changes: 2 additions & 2 deletions api/webscraper/nyiso_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import pandas as pd
from io import BytesIO
import json
from utils.scraper_utils import clean_df_data
from database_constants import (
from .utils.scraper_utils import clean_df_data
from .database_constants import (
renewable_energy_abbreviations,
)

Expand Down
4 changes: 2 additions & 2 deletions api/webscraper/nyserda_scraper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import requests
import json
from utils.scraper_utils import check_status, geocode_lat_long, standardize_label
from database_constants import renewable_energy_map, initial_kdm
from .utils.scraper_utils import check_status, geocode_lat_long, standardize_label
from .database_constants import renewable_energy_map, initial_kdm

"""
This scrapes data from the NYSERDA Large-scale Renewable Projects database.
Expand Down
4 changes: 2 additions & 2 deletions api/webscraper/ores_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
from bs4 import BeautifulSoup
import pandas as pd
from io import StringIO
from utils.scraper_utils import geocode_lat_long
from database_constants import initial_kdm
from .utils.scraper_utils import geocode_lat_long
from .database_constants import initial_kdm

# url = "https://dps.ny.gov/ores-permit-applications"
# page = requests.get(url)
Expand Down
8 changes: 8 additions & 0 deletions next.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@ const nextConfig = {
},
],
},
async rewrites() {
return [
{
source: '/api/:path*',
destination: 'http://127.0.0.1:5328/api/:path*', // Proxy to Backend
},
];
},
};

export default nextConfig;
Loading