Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fetch the latest Academic Calendar, parse its data and generate the ics file #147

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,8 @@ data.txt
.idea/
.vscode
venv
.env
.env

ACADEMIC_CALENDAR_*.pdf
Academic_Cal-j/**
final.json
57 changes: 52 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,58 @@
beautifulsoup4==4.12.2
google_api_python_client==2.90.0
blinker==1.8.2
bs4==0.0.2
cachetools==5.5.0
certifi==2024.8.30
cffi==1.17.1
chardet==5.2.0
charset-normalizer==3.4.0
click==8.1.7
cryptography==43.0.1
et-xmlfile==1.1.0
Flask==3.0.3
Flask-Cors==4.0.1
ghostscript==0.7
google-api-core==2.21.0
google-api-python-client==2.90.0
google-auth==2.35.0
google-auth-httplib2==0.2.0
google-auth-oauthlib==1.2.1
googleapis-common-protos==1.65.0
gunicorn==22.0.0
httplib2==0.22.0
icalendar==5.0.7
idna==3.10
iitkgp_erp_login==2.4.2
itsdangerous==2.2.0
Jinja2==3.1.4
MarkupSafe==3.0.1
numpy==2.1.2
oauth2client==4.1.3
oauthlib==3.2.2
opencv-python==4.10.0.84
openpyxl==3.1.5
packaging==24.1
pandas==2.2.3
pdfminer.six==20240706
proto-plus==1.24.0
protobuf==5.28.2
pyasn1==0.6.1
pyasn1_modules==0.4.1
pycparser==2.22
pyparsing==3.2.0
pypdf==4.3.1
pypdf_table_extraction==0.0.2
python-dateutil==2.9.0.post0
pytz==2023.3
Requests==2.31.0
flask==3.0.3
flask_cors==4.0.1
gunicorn==22.0.0
requests==2.31.0
requests-oauthlib==2.0.0
rsa==4.9
setuptools==75.1.0
six==1.16.0
soupsieve==2.6
tabulate==0.9.0
tk==0.1.0
tzdata==2024.2
uritemplate==4.1.1
urllib3==2.2.3
Werkzeug==3.0.4
11 changes: 11 additions & 0 deletions requirements.txt.bak
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
beautifulsoup4==4.12.2
google_api_python_client==2.90.0
httplib2==0.22.0
icalendar==5.0.7
iitkgp_erp_login==2.4.2
oauth2client==4.1.3
pytz==2023.3
Requests==2.31.0
flask==3.0.3
flask_cors==4.0.1
gunicorn==22.0.0
9 changes: 8 additions & 1 deletion timetable/generate_ics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from icalendar import Calendar, Event
from datetime import datetime, timedelta
from timetable import Course
from utils import dates, build_event_duration, generate_india_time, next_weekday
from utils import academic_calander_handler, dates, build_event_duration, generate_india_time, next_weekday

WORKING_DAYS = dates.get_dates()

Expand Down Expand Up @@ -51,6 +51,13 @@ def generate_ics(courses: list[Course], output_filename):
event.add("dtstart", holiday[1])
event.add("dtend", holiday[1] + timedelta(days=1))
cal.add_component(event)

for entry in academic_calander_handler.get_academic_calendar():
event = Event()
event.add("summary", entry.event)
event.add("dtstart",entry.start_date)
event.add("dtend",entry.end_date)
cal.add_component(event)


if output_filename != "":
Expand Down
1 change: 1 addition & 0 deletions utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from utils.dates import *
from utils.build_event import *
from utils.academic_calander_handler import *

191 changes: 191 additions & 0 deletions utils/academic_calander_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
from datetime import datetime, timedelta
import glob
import camelot
import os
import requests
import shutil
from zipfile import ZipFile
import json
from dataclasses import dataclass
import re


JSON_FOLDER_NAME = 'Academic_Cal-j'

@dataclass
class DataEntry:
start_date: datetime = datetime.today()
end_date: datetime = datetime.today()
event: str = ""

#get the current working directory
def cwd():
return os.getcwd()

def get_latest_calendar_name():
curr_year = datetime.today().year
curr_month = datetime.today().month

if(curr_month < 7):
curr_year -= 1

year_str = str(curr_year) + '_' + str((curr_year % 100) + 1)
filename = 'ACADEMIC_CALENDAR_' + year_str + '.pdf'
return filename

def is_file_present(file):
if(os.path.exists(cwd() + '/' + file) or
os.path.exists(cwd() + '/' + file + '/')
):
return True
return False

def delete_file(file):
if(is_file_present(file)):
try:
print("DELETING file ",file)
if(os.path.isdir(file)):
shutil.rmtree(cwd() + '/' + file)
elif(os.path.isfile(file)):
os.remove(file)
else:
raise Exception("filename not valid")
except Exception as e:
print("ERROR: seems file already exists but cannot be deleted")
print(e)
return False
else:
print(file, "File not present..")

#fetch the latest academic calendar from the iitkgp website
def get_latest_calendar():

filename = get_latest_calendar_name()
url = 'https://www.iitkgp.ac.in/assets/pdf/' + filename

## delete any old academic calander pdf if exists
if(is_file_present(filename)):
delete_file(filename)

with open(filename,"wb") as file:
response = requests.get(url)
file.write(response.content)

if(is_file_present(filename)):
return True
return False

def upzip_and_delete_zip(zip_file_name,result_folder_name):
with ZipFile(zip_file_name) as zip:
try:
zip.extractall(result_folder_name)
except Exception as E:
print(E)
return False

print("Zip File not needed anymore, Deleteting ", zip_file_name)
delete_file(zip_file_name)
return True

def export_json():
filename = get_latest_calendar_name()
## ignore the read_pdf not found warning
tables = camelot.read_pdf(filename,pages="all")

print("Checking for pre-existing folder")
delete_file(JSON_FOLDER_NAME)

try:
tables.export((JSON_FOLDER_NAME + '.json'),f='json',compress=True)
except Exception as E:
print(E)
return False

upzip_and_delete_zip((JSON_FOLDER_NAME + '.zip'),JSON_FOLDER_NAME)
return True

def get_json_files():
folder_path = cwd() + '/' + JSON_FOLDER_NAME
if(is_file_present(JSON_FOLDER_NAME)):
files = glob.glob(folder_path + '/*.json',include_hidden=True)
return files
else:
return []

def merge_json():
merged_data = []
for file in get_json_files():
with open(file) as f:
data = json.load(f)
merged_data.extend(data)

with open('final.json',"w") as f:
json.dump(merged_data,f,indent=4)

return merged_data

def get_academic_calendar() -> list[DataEntry]:

get_latest_calendar()
export_json()

all_dates = merge_json()
all_dates = all_dates[1:]

main_dates = []
# for date in all_dates:
# entry = DataEntry()
# if(len(date) > 4 and date['4'] != ''):
# if(len(date['1']) > 3):
# entry.event += date['1'].replace('\n','')
# entry.event += date['2'].replace('\n','')
# d = date['4'].replace('\n',' ').replace('(AN)','')
# print(d.find("to"))
# if(d.lower().find("to") != -1):
# d = str(d).lower().split("to")
# entry.start_date = datetime.strptime(d[0].split(" ")[0].strip(), "%d.%m.%Y")
# entry.end_date = datetime.strptime(d[-1].split(" ")[-1].strip(), "%d.%m.%Y")
# else:
# entry.start_date = datetime.strptime(d,"%d.%m.%Y")
# entry.end_date = ( entry.start_date + timedelta(1) )
# # elif(len(date) == 2 and date['1'] != ''):
# # entry.event = date['0']
# # d = date['1'].replace('\n','')
# # if(d.find("to")):
# # d = str(d).split("to")
# # entry.start_date = datetime.strptime(d[0].strip(), "%A, %d %B %Y")
# # entry.end_date = datetime.strptime(d[1].strip(), "%A, %d %B %Y")
# # else:
# # entry.start_date = datetime.strptime(d,"%A, %d %B %Y")
# # entry.end_date = ( entry.start_date + timedelta(1) )
# # main_dates.append([date['0'],datetime_object])
# main_dates.append(entry)

date_regex = re.compile(r'\d{2}.\d{2}.\d{4}')
maxLen = 1
for date in all_dates:
if(len(date) > 4 and date['4'] != ''):
entry = DataEntry()
if(len(date['1']) > 3):
entry.event += date['1'].replace('\n','')
entry.event += date['2'].replace('\n','')

d =date['3'].replace('\n',' ').replace('(AN)','') + date['4'].replace('\n',' ').replace('(AN)','')
d = date_regex.findall(d)
if(maxLen < len(d)):
maxLen = len(d)
if(len(d) == 1):
entry.start_date = datetime.strptime(d[0],"%d.%m.%Y")
entry.end_date = ( entry.start_date + timedelta(1) )
elif(len(d) == 2):
entry.start_date = datetime.strptime(d[0],"%d.%m.%Y")
entry.end_date = datetime.strptime(d[1],"%d.%m.%Y")
main_dates.append(entry)
annual_convocation = str(date['1']).strip().lower().split(" ")
## KGP hai .. cannot trust, they can even mess up the spellings of annual convocation
## this can just reduce the amount of places this will fail
if(len(annual_convocation) == 2 and ("annual" in annual_convocation or "convocation" in annual_convocation)):
break

return main_dates