Skip to content

Commit

Permalink
large scale changes
Browse files Browse the repository at this point in the history
added:
- Months, Calendar Year, and Months-across-all-years time binning options
- total coverage for all years for which we have data
- toggleable total line
  • Loading branch information
Llippeatt-git committed Aug 7, 2024
1 parent 7e1089a commit 5a5e71a
Show file tree
Hide file tree
Showing 8 changed files with 159 additions and 45 deletions.
2 changes: 2 additions & 0 deletions press_dash_lib/aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def count(
index=x_column,
values=count_column,
aggfunc='nunique',
fill_value=0
)
totals.fillna(value=0, inplace=True)
return totals
Expand All @@ -57,6 +58,7 @@ def count(
index=x_column,
columns=groupby_column,
values=count_column,
fill_value=0,
aggfunc='nunique',
)
counts.fillna(value=0, inplace=True)
Expand Down
6 changes: 5 additions & 1 deletion press_dash_lib/dash_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,8 @@ def aggregate(
y_column: str,
groupby_column: str = None,
aggregation_method: str = 'count',
start_year: int = None,
years_display: list[int] = None,
) -> Union[pd.Series, pd.DataFrame]:
'''Aggregate stats.
Expand Down Expand Up @@ -200,6 +202,7 @@ def aggregate(
count_column=y_column,
groupby_column=groupby_column,
)

elif aggregation_method == 'sum':
return _self.agg.sum(
df=df,
Expand All @@ -208,4 +211,5 @@ def aggregate(
groupby_column=groupby_column,
)
else:
raise KeyError('Requested aggregation method "{}" is not available.'.format(aggregation_method))
raise KeyError('Requested aggregation method "{}" is not available.'.format(aggregation_method))

5 changes: 5 additions & 0 deletions press_dash_lib/data_viewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
import copy
import re
import types

from typing import Tuple


import numpy as np
import pandas as pd
import streamlit as st
Expand Down Expand Up @@ -139,6 +141,9 @@ def lineplot(

# Set defaults
xs = df.index



if categories is None:
categories = df.columns
if category_colors is None:
Expand Down
51 changes: 37 additions & 14 deletions press_dash_lib/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
import matplotlib.font_manager as font_manager
import seaborn as sns

from press_dash_lib import utils

from .settings import Settings

class Interface:
Expand All @@ -31,6 +33,8 @@ def __init__(self, config: dict, settings: Settings):
def request_data_axes(
self,
st_loc,
max_year,
min_year,
ask_for: list[str] = ['aggregation_method', 'x_column', 'y_column', 'groupby_column'],
local_key: str = None,
display_defaults: dict = {},
Expand Down Expand Up @@ -86,6 +90,22 @@ def request_data_axes(
options = display_options.get('x_column', self.config['x_columns']),
index = display_defaults.get(key + '_ind', 0),
)

if value == 'Year Spotlight':
value2, ind2 = selectbox(
st_loc,
'what year do you want to spotlight?',
options = list(range(min_year, (max_year+1), 1))
)
value = value + ':' + str(value2)
if value == 'Month across all Years':
value2, ind2 = selectbox(
st_loc,
'what month do you want to spotlight?',
options= ['January', 'February', 'March', 'April', 'May', 'June', 'July','August','September','October','November','December']
)
value = value + ':' + value2

selected_settings[key] = value
selected_settings[key + '_ind'] = ind
key = 'y_column'
Expand Down Expand Up @@ -130,7 +150,7 @@ def request_data_settings(
display_defaults: dict = {},
selected_settings: dict = None,
tag: str = None,
) -> dict:
):
'''Request common data settings from the user.
Args:
Expand All @@ -154,19 +174,20 @@ def request_data_settings(
if selected_settings is None:
selected_settings = self.settings.common['data']


toggled_on = st_loc.toggle(
label='show total',
value=True,
)

st_loc.markdown('# Data Settings')

# Setup the tag
if tag is None:
tag = ''
else:
tag += ':'

key = 'show_total'
if key in ask_for:
selected_settings[key] = st_loc.checkbox(
'show total',
value=display_defaults.get(key, True),
key=tag + key
)

key = 'cumulative'
if key in ask_for:
selected_settings[key] = st_loc.checkbox(
Expand All @@ -190,13 +211,13 @@ def request_data_settings(
key=tag + key
)

return selected_settings
return selected_settings, toggled_on

def process_filter_settings(
self,
st_loc,
df: pd.DataFrame,
ask_for: list[str] = ['text', 'categorical', 'numerical'],
ask_for: list[str] = ['categorical', 'date'],
local_key: str = None,
display_defaults: dict = {},
value: str = None,
Expand Down Expand Up @@ -252,7 +273,7 @@ def process_filter_settings(
default=default,
key=tag + key + ':' + value
)

return selected_settings

def request_view_settings(
Expand All @@ -271,6 +292,8 @@ def request_view_settings(
display_options: dict = {},
selected_settings: dict = None,
tag: str = None,
default_x: str = '',
default_y: str = '',
):
'''Generic and common figure settings.
Expand Down Expand Up @@ -346,14 +369,14 @@ def request_view_settings(
if key in ask_for:
selected_settings[key] = st_loc.text_input(
'x label',
value=display_defaults.get(key, ''),
value=display_defaults.get(key, default_x),
key=tag + key,
)
key = 'y_label'
if key in ask_for:
selected_settings[key] = st_loc.text_input(
'y label',
value=display_defaults.get(key, ''),
value=display_defaults.get(key, default_y),
key=tag + key,
)
key = 'yscale'
Expand Down
98 changes: 85 additions & 13 deletions press_dash_lib/pages/base_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,15 @@ def main(config_fp: str, user_utils: types.ModuleType = None):
combined_settings = builder.settings.upload_button(st.sidebar)

# Global settings
st.sidebar.markdown('# Data Settings')
builder.interface.request_data_settings(
#st.sidebar.markdown('# Data Settings')
setting_check, toggle = builder.interface.request_data_settings(
st.sidebar,
)

st.sidebar.markdown('# View Settings')
builder.interface.request_view_settings(st.sidebar)
builder.interface.request_view_settings(
st.sidebar
)

# Recategorize data
selected_settings = builder.settings.common['data']
Expand All @@ -60,43 +63,102 @@ def main(config_fp: str, user_utils: types.ModuleType = None):
),
)

# Identify year bounds for any range calculations
min_year = data['preprocessed']['Calendar Year'].min()
max_year = data['preprocessed']['Calendar Year'].max()
years_to_display = list(range(min_year,max_year+1, 1))

# for fiscal year range calcs
min_year_fisc = data['preprocessed']['Fiscal Year'].min()
max_year_fisc = data['preprocessed']['Fiscal Year'].max()
years_to_display_fisc = list(range(min_year_fisc, max_year_fisc+1, 1))

# Data axes
# entered search category passed down to filter settings for further specification
st.subheader('Data Axes')
builder.interface.request_data_axes(st)
axes_object = builder.interface.request_data_axes(st, max_year, min_year)

# catches specified groupby category
category_specific = builder.settings.get_settings(common_to_include=['data'])

# filters data as per specs
builder.interface.process_filter_settings(
st,
data['recategorized'],
value=category_specific['groupby_column']
)

# Apply data filters
data['selected'] = builder.filter_data(
data['recategorized'],
builder.settings.common['filters']['categorical'],
)

months_to_num = {'January':1, 'February':2, 'March':3,'April':4,'May':5,'June':6,'July':7,'August':8,'September':9,'October':10,'November':11,'December':12}
# filters by year binning method
if axes_object['x_column_ind'] == 2:
# tosses all entries that do not fall in specified calendar year
year = int(axes_object['x_column'].split(':')[1])
data['time_adjusted'] = data['selected'][data['selected']['Date'].dt.year == year]
builder.settings.common['data']['x_column'] = 'Month'
elif axes_object['x_column_ind'] == 3:
# tosses all entries that do not fall in specified month across all years
month = str(axes_object['x_column'].split(':')[1])
data['time_adjusted'] = data['selected'][data['selected']['Date'].dt.month == months_to_num[month]]
builder.settings.common['data']['x_column'] = 'Calendar Year'
else:
data['time_adjusted'] = data['selected']

# Aggregate data
data['aggregated'] = builder.aggregate(
data['selected'],
data['time_adjusted'],
builder.settings.common['data']['x_column'],
builder.settings.common['data']['y_column'],
builder.settings.common['data']['groupby_column'],
builder.settings.common['data']['aggregation_method'],
)

# Aggregate data
data['totals'] = builder.aggregate(
data['selected'],
data['time_adjusted'],
builder.settings.common['data']['x_column'],
builder.settings.common['data']['y_column'],
aggregation_method=builder.settings.common['data']['aggregation_method'],
)


### adds all years for which we have data back into aggregated dataframe (even if all zero that time bin);
# more accurately displays trends across multiple years

# If you are going to change the configs for x_columns, make sure they are reflected below!
if len(list(data['aggregated'].columns)) != 0:
data['aggregated'] = data['aggregated'].T
data['totals'] = data['totals'].T

if builder.settings.common['data']['x_column'] == 'Month':
for month in months_to_num.values():
if month not in data['aggregated'].columns:
data['aggregated'].insert(month-1, month, [0 for i in range(len(data['aggregated'].index))])
data['totals'].insert(month-1, month, [0 for i in range(len(data['totals'].index))])
elif builder.settings.common['data']['x_column'] == 'Fiscal Year':
for years in years_to_display_fisc:
if years not in data['aggregated'].columns:
data['aggregated'].insert(years-min_year_fisc, years, [0 for i in range(len(data['aggregated'].index))])
data['totals'].insert(years-min_year_fisc, years, [0 for i in range(len(data['totals'].index))])
else:
for years in years_to_display:
if years not in data['aggregated'].columns:
data['aggregated'].insert(years-min_year, years, [0 for i in range(len(data['aggregated'].index))])
data['totals'].insert(years-min_year, years, [0 for i in range(len(data['totals'].index))])

data['aggregated'] = data['aggregated'].T
data['totals'] = data['totals'].T

# adds NaN values to dataframe for viewing
for topic in builder.settings.common['filters']['categorical'][category_specific['groupby_column']]:
if topic not in data['aggregated'].columns:
data['aggregated'][topic] = [0 for i in range(len(data['aggregated'].index))]

# Lineplot
local_key = 'lineplot'
st.header(config.get('lineplot_header', 'Lineplot'))
Expand All @@ -111,16 +173,26 @@ def main(config_fp: str, user_utils: types.ModuleType = None):
local_key=local_key,
selected_settings=builder.settings.local.setdefault('lineplot', {}),
tag=local_key,
default_x=builder.settings.common['data']['x_column'],
default_y=builder.settings.common['data']['y_column'],
)
local_opt_keys, common_opt_keys, unset_opt_keys = builder.settings.get_local_global_and_unset(
function = builder.data_viewer.lineplot,
local_key=local_key,
)
builder.data_viewer.lineplot(
df = data['aggregated'],
totals = data['totals'],
**builder.settings.get_settings(local_key)
)

#constructs line plot with or without the 'total' line, depending on if relevant feature has been toggled
if toggle:
builder.data_viewer.lineplot(
df = data['aggregated'],
totals = data['totals'],
**builder.settings.get_settings(local_key)
)
else:
builder.data_viewer.lineplot(
df = data['aggregated'],
**builder.settings.get_settings(local_key)
)

# View the data directly
builder.data_viewer.write(data)
Expand Down
4 changes: 3 additions & 1 deletion press_dash_lib/user_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,11 @@ def preprocess_data(cleaned_df, config):
preprocessed_df = cleaned_df.copy()

# Get the year, according to the config start date
preprocessed_df['Year'] = utils.get_year(
preprocessed_df['Fiscal Year'] = utils.get_year(
preprocessed_df['Date'], config['start_of_year']
)
preprocessed_df['Calendar Year'] = preprocessed_df['Date'].dt.year
preprocessed_df['Month'] = preprocessed_df['Date'].dt.month

# Tweaks to the press data
if 'Title (optional)' in preprocessed_df.columns:
Expand Down
14 changes: 10 additions & 4 deletions src/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,20 @@ groupings:
primary_id_column: index
id_columns: # Unique identifiers
- Title
- id
numerical_columns: # Numeric columns that can be summed
- Press Mentions
- People Reached
date_columns: # Dates
- Date
x_columns: # Data will be binned by these along the x-axis
- Year # For example, a value of 2019 would be grouped into the 2019-2020 financial year
- Fiscal Year # For example, a value of 2019 would be grouped into the 2019-2020 financial year
- Calendar Year
- Year Spotlight
- Month across all Years

# NOTE: do not change ORDER of config options for x_columns; some of processing relies on specific index in the list (see Base Page); changing that
# would mess up the code. Additional x_column options should be added onto the end

categorical_columns: # Categorical columns that can be grouped, e.g. all Press Types=="Science" articles
- Research Topics
- Press Types
Expand All @@ -67,8 +73,8 @@ text_columns: # Text columns that can be searched
new_categories:
# Press types are defined hierarchicaly.
Press Types:
'External Press (Inclusive)': "'External Press'" # Anything tagged as External Press will be included in this category
'Northwestern Press (Inclusive)': "'Northwestern Press' & (not 'External Press')" # Anything tagged with Northwestern Press but not External Press will be included in this category
'External Press (Exclusive)': "'External Press'" # Anything tagged as External Press will be included in this category
'Northwestern Press (Exclusive)': "'Northwestern Press' & (not 'External Press')" # Anything tagged with Northwestern Press but not External Press will be included in this category
# CIERA press is left undefined, so it defaults to showing up as usual, provided it is not also tagged with Northwestern Press or External Press.
# Research topics are defined as falling into one of three main categories: Exploding & Dead Stars, Astrophysical Populations, and Exoplanets & the Solar System.
Research Topics:
Expand Down
Loading

0 comments on commit 5a5e71a

Please sign in to comment.