Skip to content

Commit

Permalink
mirroring multiflex year system for event dash
Browse files Browse the repository at this point in the history
as above
  • Loading branch information
Llippeatt-git committed Aug 23, 2024
1 parent 496a6c8 commit 2e61c17
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 54 deletions.
15 changes: 13 additions & 2 deletions event_dash_lib/data_viewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ def write(self, data, data_key: str=None, st_loc=st, columns: list[str]=None):
def lineplot(
self,
df: pd.DataFrame,
month_reindex: list[int] = None,
year_reindex: list[int] = None,
totals: pd.Series = None,
categories: list[str] = None,
cumulative: bool = False,
Expand Down Expand Up @@ -154,8 +156,17 @@ def lineplot(
fig = plt.figure(figsize=(fig_width, fig_height))
ax = plt.gca()

if df.index.name == 'Month':
plt.xticks(xs,[calendar.month_abbr[i] for i in xs])
is_empty = True
for cols in df.columns:
if sum(list(df[cols])) != 0:
is_empty = False
break

if not is_empty:
if df.index.name == 'Reindexed Month':
plt.xticks(xs, [calendar.month_abbr[month_reindex[i-1]] for i in xs])
elif df.index.name == 'Reindexed Year':
plt.xticks(xs, year_reindex)
for j, category_j in enumerate(categories):

ys = df[category_j]
Expand Down
37 changes: 23 additions & 14 deletions event_dash_lib/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import numpy as np
import pandas as pd
import streamlit as st
import calendar

import matplotlib
import matplotlib.pyplot as plt
Expand Down Expand Up @@ -91,23 +92,29 @@ def request_data_axes(
index = display_defaults.get(key + '_ind', 0),
)

if value == 'Year Spotlight':
value2, ind2 = selectbox(
st_loc,
'what year do you want to spotlight?',
options = list(range(min_year, (max_year+1), 1))
)
value = value + ':' + str(value2)
if value == 'Month across all Years':
value2, ind2 = selectbox(
st_loc,
'what month do you want to spotlight?',
options= ['January', 'February', 'March', 'April', 'May', 'June', 'July','August','September','October','November','December']
)
value = value + ':' + value2
if value == 'Year(Flexible)':
month_dict = {'January(Calendar Year)':1, 'February':2, 'March':3,'April(Reporting Year)':4,'May':5,'June':6,'July':7,'August':8,'September(Fiscal Year)':9,'October':10,'November':11,'December':12}
col1, col2 = st_loc.columns(2)
with col1:
value_month, ind_month = selectbox(
st_loc,
'starting month for twelve-month recording period',
options = list(month_dict.keys())
)
value = value + ':' + str(month_dict[value_month])
with col2:
if month_dict[value_month] >= 9:
min_year = min_year - 1
start_year, end_year = st_loc.select_slider(
'years to view',
options=list(range(min_year,max_year+1)),
value=(min_year, max_year),
)
value = value + ':' + str(start_year) + ':' + str(end_year)

selected_settings[key] = value
selected_settings[key + '_ind'] = ind

key = 'y_column'
if key in ask_for:
if selected_settings['aggregation_method'] == 'count':
Expand Down Expand Up @@ -346,6 +353,8 @@ def request_view_settings(
'color_palette',
'category_colors',
'totals',
'month_reindex',
'year_reindex',
'kwargs'
]
if ask_for == 'all':
Expand Down
87 changes: 54 additions & 33 deletions event_dash_lib/pages/base_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
import types

import streamlit as st
import pandas as pd

from .. import dash_builder
from .. import dash_builder, utils

importlib.reload(dash_builder)

Expand All @@ -24,6 +25,7 @@ def main(config_fp: str, user_utils: types.ModuleType = None):
Defaults to those in root_dash_lib.
'''

pd.options.mode.copy_on_write = True
# This must be the first streamlit command
st.set_page_config(layout='wide')

Expand Down Expand Up @@ -59,15 +61,9 @@ def main(config_fp: str, user_utils: types.ModuleType = None):
),
)

# Identify year bounds for any range calculations
min_year = data['preprocessed']['Calendar Year'].min()
max_year = data['preprocessed']['Calendar Year'].max()
years_to_display = list(range(min_year,max_year+1, 1))

# for fiscal year range calcs
min_year_fisc = data['preprocessed']['Fiscal Year'].min()
max_year_fisc = data['preprocessed']['Fiscal Year'].max()
years_to_display_fisc = list(range(min_year_fisc, max_year_fisc+1, 1))
# for future reference, if you want to set artificial bounds for year/timescale, do it here
min_year = data['preprocessed']['Date'].dt.year.min()
max_year = data['preprocessed']['Date'].dt.year.max()

# Data axes
# entered search category passed down to filter settings for further specification
Expand All @@ -90,20 +86,45 @@ def main(config_fp: str, user_utils: types.ModuleType = None):
builder.settings.common['filters']['categorical'],
)

months_to_num = {'January':1, 'February':2, 'March':3,'April':4,'May':5,'June':6,'July':7,'August':8,'September':9,'October':10,'November':11,'December':12}
# filters by year binning method
if axes_object['x_column_ind'] == 2:
# tosses all entries that do not fall in specified calendar year
year = int(axes_object['x_column'].split(':')[1])
data['time_adjusted'] = data['selected'][data['selected']['Date'].dt.year == year]
builder.settings.common['data']['x_column'] = 'Month'
elif axes_object['x_column_ind'] == 3:
# tosses all entries that do not fall in specified month across all years
month = str(axes_object['x_column'].split(':')[1])
data['time_adjusted'] = data['selected'][data['selected']['Date'].dt.month == months_to_num[month]]
builder.settings.common['data']['x_column'] = 'Calendar Year'


# filters data by year bounds selected (so that only entries which fall into this year-bound are displayed)
reverse_month_dict = {1:'January', 2:'February', 3:'March', 4:'April', 5:'May',6:'June', 7:'July', 8:'August', 9:'September', 10:'October', 11:'November', 12:'December'}
if (axes_object['x_column_ind'] == 0):


# extracts time information from axes_object
time_object = axes_object['x_column'].split(':')
month_start = int(time_object[1])
year_start = int(time_object[2])
year_end = int(time_object[3])
years_to_display = list(range(year_start+1, year_end+1))

month_redef = [x if x<=12 else x-12 for x in range(month_start, 12+month_start)]


data['selected']['Reindexed Year'] = utils.get_year(
data['selected']['Date'], "{} 1".format(reverse_month_dict[month_start])
)
data['time_adjusted'] = data['selected'][data['selected']['Reindexed Year'] == year_start]

if len(years_to_display) != 0:
for i in years_to_display:
temp = data['selected'][data['selected']['Reindexed Year'] == i]
data['time_adjusted'] = pd.concat([data['time_adjusted'], temp])

builder.settings.common['data']['x_column'] = 'Reindexed Year'
if len(years_to_display) == 0:

# For Fiscal Month visualizations
def month_fisc_converter(month:int, forward=True):
return month_redef.index(month)+1

data['time_adjusted'].loc.__setitem__((slice(None), 'Reindexed Month'), data['time_adjusted'].__getitem__('Date').dt.month.map(month_fisc_converter))
builder.settings.common['data']['x_column'] = 'Reindexed Month'
else:
data['time_adjusted'] = data['selected']
builder.settings.common['data']['x_column'] = 'Calendar Year'

# Aggregate data
data['aggregated'] = builder.aggregate(
Expand All @@ -125,27 +146,23 @@ def main(config_fp: str, user_utils: types.ModuleType = None):

### adds all years for which we have data back into aggregated dataframe (even if all zero that time bin);
# more accurately displays trends across multiple years
years_to_display.insert(0, year_start)

# If you are going to change the configs for x_columns, make sure they are reflected below!
if len(list(data['aggregated'].columns)) != 0:
data['aggregated'] = data['aggregated'].T
data['totals'] = data['totals'].T

if builder.settings.common['data']['x_column'] == 'Month':
for month in months_to_num.values():
if builder.settings.common['data']['x_column'] == 'Reindexed Month':
for month in month_redef:
if month not in data['aggregated'].columns:
data['aggregated'].insert(month-1, month, [0 for i in range(len(data['aggregated'].index))])
data['totals'].insert(month-1, month, [0 for i in range(len(data['totals'].index))])
elif builder.settings.common['data']['x_column'] == 'Fiscal Year':
for years in years_to_display_fisc:
if years not in data['aggregated'].columns:
data['aggregated'].insert(years-min_year_fisc, years, [0 for i in range(len(data['aggregated'].index))])
data['totals'].insert(years-min_year_fisc, years, [0 for i in range(len(data['totals'].index))])
else:
elif builder.settings.common['data']['x_column'] == 'Reindexed Year':
for years in years_to_display:
if years not in data['aggregated'].columns:
data['aggregated'].insert(years-min_year, years, [0 for i in range(len(data['aggregated'].index))])
data['totals'].insert(years-min_year, years, [0 for i in range(len(data['totals'].index))])
data['aggregated'].insert(years-min(years_to_display), years, [0 for i in range(len(data['aggregated'].index))])
data['totals'].insert(years-min(years_to_display), years, [0 for i in range(len(data['totals'].index))])

data['aggregated'] = data['aggregated'].T
data['totals'] = data['totals'].T
Expand All @@ -158,7 +175,7 @@ def main(config_fp: str, user_utils: types.ModuleType = None):
# Lineplot
local_key = 'lineplot'
st.header(config.get('lineplot_header', 'Lineplot'))
st.text("Note: some data entries may correspond to multiple categories, and so may be contribute to dataset of each.\n As such, the all categories combined may exceed the total, which only counts each entry once***")
st.text("Note: entries may correspond to multiple data tags/categories, and so may contribute to the trendline of each (double-counting).\n this results in a conflict between aggregated data and total trendline, which only counts each entry once.\n Users and audience must be made clear of this discrepancy; in essence, total single-counts, aggregate double-counts")
with st.expander('Lineplot settings'):
local_opt_keys, common_opt_keys, unset_opt_keys = builder.settings.get_local_global_and_unset(
function=builder.data_viewer.lineplot,
Expand All @@ -181,12 +198,16 @@ def main(config_fp: str, user_utils: types.ModuleType = None):
if toggle:
builder.data_viewer.lineplot(
df = data['aggregated'],
month_reindex = month_redef if builder.settings.common['data']['x_column_ind'] == 0 else None,
year_reindex = years_to_display,
totals = data['totals'],
**builder.settings.get_settings(local_key)
)
else:
builder.data_viewer.lineplot(
df = data['aggregated'],
month_reindex = month_redef if builder.settings.common['data']['x_column_ind'] == 0 else None,
year_reindex=years_to_display,
**builder.settings.get_settings(local_key)
)

Expand Down
1 change: 0 additions & 1 deletion event_dash_lib/user_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,6 @@ def preprocess_data(cleaned_df, config):
)

preprocessed_df['Calendar Year'] = preprocessed_df['Date'].dt.year
preprocessed_df['Month'] = preprocessed_df['Date'].dt.month

# Tweaks to the press data
#if 'Title (optional)' in preprocessed_df.columns:
Expand Down
6 changes: 2 additions & 4 deletions src/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,8 @@ numerical_columns: # Numeric columns that can be summed
date_columns: # Dates
- Date
x_columns: # Data will be binned by these along the x-axis
- Fiscal Year # For example, a value of 2019 would be grouped into the 2019-2020 financial year
- Calendar Year
- Year Spotlight
- Month across all Years
- Year(Flexible) # For example, a value of 2019 would be grouped into the 2019-2020 financial year
- Adm
categorical_columns: # Categorical columns that can be grouped, e.g. all Press Types=="Science" articles
- Research Topic
- Calendar Group
Expand Down

0 comments on commit 2e61c17

Please sign in to comment.