analysis.Rmd

---
title: "analysis"
output: 
  html_document:
    toc: true
    toc_float: true
    toc_depth: 2
    numbered: true
date: "2023-04-07"
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, warning=FALSE, message=FALSE)
options(scipen = 999)
library(tidyverse)
library(dplyr)
library(DT)
library(ggplot2)
library(patchwork)
library(statebins)
library(GGally)
library(viridis)
library(plotly)
```

# States

## State Urbanicity Model 

```{r}
mod_data_state <- state_3years %>% 
  rename(urban_pop = x2020_urban_pop,
         urban_pop_pct = x2020_pct_urban_pop) %>% 
  mutate(net_pension_liability = net_pension_liability+1,
         urban_pop = urban_pop+1) 

mod_data_state %>% 
  ggplot(aes(urban_pop, net_pension_liability))+
  geom_point(color = "orange", alpha = 0.3) +
  #scale_x_log10() +
  #scale_y_log10()+
  theme_minimal() +
  labs(title = "Urbanicity & Net Pension Liability: Raw scale")
```

```{r}

mod_data_state %>% 
  ggplot(aes(urban_pop, net_pension_liability))+
  geom_point(color = "orange", alpha = 0.3) +
  scale_x_log10() +
  scale_y_log10()+
  theme_minimal() +
  labs(title = "Urbanicity & Net Pension Liability: Log scales",
       x = "Urban population",
       y = "Net Pension Liability",
       caption = "Log scales interpretation: For every 1% increase in urban population, there's a 1.27% increase in net pension liability ")
  
```

```{r}
mod_state <- lm(log(net_pension_liability) ~ log(urban_pop), data = mod_data_state)
summary(mod_state)
```

## State governments: Net Pension Liabilities
```{r fig.width=15, fig.width=15}
state_3years %>% #filter(state.abb == "MI")
  mutate(year = as.factor(year)) %>% 
  ggplot(aes(name, net_pension_liability), group = year) +
  geom_col(aes(fill = year), position = "dodge") +
  # scale_color_viridis(discrete=TRUE) +
  scale_y_continuous(breaks = c(50000000000, 100000000000, 150000000000),
                   label = c("$50B", "$100B","$150B")) +
  
    scale_fill_manual(values = c("grey","black","orange")) +
  
  labs(title = "Net Pension Liabilities of State Governments in 3 years: 2020, 2021, 2022",
       x = "",
       y = "Net Pension Liabilities", 
       caption = "Four states have not released 2022 ACFRs: AZ, CA, NV, OK") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 60)) 
# 4 are behind:       AZ" "CA" "NV" "OK""
```

## State governments: Net Pension Liabilities Per Person
```{r fig.width=15, fig.width=15}
avg_npl_person <- state_3years %>% select(year, avg_npl_person) %>% distinct()

state_3years %>% 
  mutate(year = as.factor(year)) %>% 
  ggplot(aes(name, npl_person), group = year) +
  geom_col(aes(fill = year), position = "dodge") +
  
  geom_hline(yintercept = 2002, color="grey") +
  geom_hline(yintercept = 2206, color="black") +
  geom_hline(yintercept = 1356, color="orange") +
  annotate("text", label = "Average NPL\nper person", x = 49, y = 2700) +
  
    scale_fill_manual(values = c("grey","black","orange")) +
  
  labs(title = "Net Pension Liabilities Per Person of State Governments in 3 years: 2020, 2021, 2022",
       x = "",
       y = "Net Pension Liabilities Per Person", 
       caption = "Four states have not released 2022 ACFRs: AZ, CA, NV, OK") +

  theme_minimal() +
  theme(axis.text.x = element_text(angle = 60)) 
```


```{r fig.width=15, fig.width=15, include=FALSE}
## State governments: Total Liabilities
state_3years %>% #filter(state.abb == "CA")
  mutate(year = as.factor(year)) %>% 
  ggplot(aes(name, total_liabilities), group = year) +
  geom_col(aes(fill = year), position = "dodge") +
  # scale_color_viridis(discrete=TRUE) +
  scale_y_continuous(breaks = c(100000000000, 200000000000,300000000000, 400000000000, 500000000000),
                   label = c("$100B","$200B", "$300B", "$400B", "$500B")) +
  
    scale_fill_manual(values = c('#D9AF6B', '#AF6458', '#68855C')) +
  
  labs(title = "Total Liabilities of State Governments in 3 years: 2020, 2021, 2022",
       x = "",
       y = "Total Liabilities", 
       caption = "Four states have not released 2022 ACFRs: AZ, CA, NV, OK\n Four states whose ACFRs are being processed: HI, IL, MT, NM") +

  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90)) 
```


```{r fig.width=15, fig.width=15, , include=FALSE}
## State governments: Revenues
state_3years %>% #filter(state.abb == "CA")
  mutate(year = as.factor(year)) %>% 
  ggplot(aes(name, revenues), group = year) +
  geom_col(aes(fill = year), position = "dodge") +
  # scale_color_viridis(discrete=TRUE) +
  scale_y_continuous(breaks = c(100000000000, 200000000000,300000000000, 400000000000, 500000000000),
                   label = c("$100B","$200B", "$300B", "$400B", "$500B")) +
  
    scale_fill_manual(values = c('#526A83', '#625377', '#68855C')) +
  labs(title = "Revenues of State Governments in 3 years: 2020, 2021, 2022",
       x = "",
       y = "Revenues", 
       caption = "Four states have not released 2022 ACFRs: AZ, CA, NV, OK\n Four states whose ACFRs are being processed: HI, IL, MT, NM") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90)) 
```

```{r}
# Note: 
# New Jersey 2020: need to subtract defferred amount: 
# total assets = 75210182697-20534907402
# NJ 2021 total assets = 121667820456-52919930521
# 
# Texas: found 250,250,365000, page 37. Previous on db: 62882563000

# read the file for total assets column - which was filled manually 
# FL: Net pension liability 2020 should be = 1,142,129,836 page 134, file on db
# FL 2021: Total revenues p. 21 (Condensed statements of activities) file in  (state_county_city/data/...)
# CA 2021: revenues 573971000000 p.15 
# CA 2020 : 386,615,000,000 p15
# 
# ND: In file 2021 Acfrs, p. 26 reports total revenues of total primary gov 2020 = 9,685,801,000; >< acfrs database = 10,137,156,850 
# 2021 = 13,356,908,000
# 
# OK 2021: revenues 23456031000
##########
gov_total_assets_pop_20 <- read.csv("data/state_gov_2020_assetsadded.csv") %>% mutate(year = 2020) %>% 
  select(id, state, total_assets)

state_gov_population <- read.csv("data/state_gov_2020_assetsadded.csv") %>% select(state, id, population, name)
# get other info from db for the most recent update. 
gov_20 <- readRDS("data/data_from_dbsite_2020.RDS") %>% 
  filter(id %in% gov_total_assets_pop_20$id) %>% 
  select(id, state, total_liabilities, revenues, year, expenses) %>% 
  left_join(gov_total_assets_pop_20)

# 2021: read the file, manual update total_assets and other values for 4 states
gov_21 <- read.csv("data/state_gov_2021_assetsadded.csv") %>% # manually added 4 states: FL, OK, CA, ND
          select(id, state, total_assets, total_liabilities, revenues, year, expenses)

state_gov_20_21_pop <- gov_20 %>% 
  rbind(gov_21) %>% 
  left_join(state_gov_population) %>% 
  
  mutate(liability_rev_ratio = total_liabilities/revenues,
         liability_per_capita = total_liabilities/population,
         asset_liability_ratio = total_assets/total_liabilities) %>% 
  mutate(across(10:12, round, 2))
```

## Liability_per_capita, FY 2020

State names in red indicate total liabilities per capita that are larger than $10000. 

```{r }
statebins(
  state_data= state_gov_20_21_pop %>% filter(year == 2020),
  state_col = "state",
  value_col = "liability_per_capita",
  dark_label = "red",
  light_label = "white",
  font_size = 3,
  state_border_col = "white",
  state_border_size = 2,
  round = TRUE,
  radius = grid::unit(6, "pt"),

  ggplot2_scale_function = ggplot2::scale_fill_distiller) +
  labs(title = "Total Liabilities per Capita\n FY 2020") +
    theme_statebins(legend_position="bottom") -> statebin1

statebins(
  state_data= state_gov_20_21_pop %>% filter(year == 2021),
  state_col = "state",
  value_col = "liability_per_capita",
  dark_label = "red",
  light_label = "white",
  font_size = 3,
  state_border_col = "white",
  state_border_size = 2,
  round = TRUE,
  radius = grid::unit(6, "pt"),

  ggplot2_scale_function = ggplot2::scale_fill_distiller 
  #ggplot2_scale_fuction = ggplot2::scale_fill_gradient2()
) +
  labs(title = "2021") +
    theme_statebins(legend_position="none") -> statebin2
  
statebin1 
```

```{r}
statebin2
```


## Liability-to-Revenues Ratio, grouped by Asset-to-Liability Ratio

The bar length indicates total liabilities to revenues ratios. The states are grouped in 4 groups of assets to total liabilities ratio. 

### 2020
```{r}
# Create dataset
data <- state_gov_20_21_pop %>% filter(year == 2020) %>% select(name, liability_rev_ratio, asset_liability_ratio) %>% 
  # the bar height is liability_rev_ratio
 mutate(liability_rev_ratio = liability_rev_ratio*30) %>% 
# create group of asset_liability_ratio
  mutate(group = case_when(asset_liability_ratio < 1 ~ "Ratio < 1",
                                      asset_liability_ratio < 2 ~ "1 < Ratio < 2",
                                      asset_liability_ratio < 3 ~ "2 < Ratio < 3", 
                                      asset_liability_ratio > 3 ~ "Ratio > 3")) %>% 
  
  select(-asset_liability_ratio) %>% 
  rename(value = liability_rev_ratio,
         individual = name) %>% 
  arrange(group, value) %>% 
  mutate(id = 1:51)
 
# Get the name and the y position of each label
empty_bar <- 4
label_data <- data
number_of_bar <- nrow(label_data)
angle <- 90 - 360 * (label_data$id-0.5) /number_of_bar     # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)
label_data$hjust <- ifelse( angle < -90, 1, 0)
label_data$angle <- ifelse(angle < -90, angle+180, angle)
 

# prepare a data frame for base lines
base_data <- data %>% 
  group_by(group) %>% 
  summarize(start=min(id), end=max(id) - empty_bar) %>% 
  rowwise() %>% 
  mutate(title=mean(c(start, end)))

# Make the plot
ggplot(data, aes(x=as.factor(id), y=value, fill=group)) +  # Note that id is a factor. If x is numeric, there is some space between the first bar
   geom_bar(aes(x=as.factor(id), y=value, fill=group), stat="identity", alpha=0.5) +
  
  ylim(-100,120) +
  theme_minimal() +
  theme(
    plot.title = element_text("2020"),
    legend.position = "none",
    axis.text = element_blank(),
    axis.title = element_blank(),
    panel.grid = element_blank(),
    plot.margin = unit(rep(-1,4), "cm")) +
  coord_polar() + 
    geom_text(data=label_data, aes(x=id, y=value+10, label=individual, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=2.5, angle= label_data$angle, inherit.aes = FALSE ) +
  geom_segment(data=base_data, aes(x = start, y = -2, xend = end + 4, yend = -2), colour = "black", alpha=0.8, size=0.6 , inherit.aes = FALSE )  +
  geom_text(data=base_data, aes(x = title, y = -18, label=group), hjust=c(1,1,0,0), colour = "black", alpha=0.8, size=2, fontface="bold", inherit.aes = FALSE) +
  annotate("text", x = 1, y = -100, label = "Asset-to-Liability\nRatio Group", size = 2) -> circular_2020
  
 circular_2020
```

### 2021
```{r fig.width=8}
# Create dataset
data21 <- state_gov_20_21_pop %>% filter(year == 2021) %>% select(name, liability_rev_ratio, asset_liability_ratio) %>% 
  # the bar height is liability_rev_ratio
 mutate(liability_rev_ratio = liability_rev_ratio*30) %>% 
# create group of asset_liability_ratio
  mutate(group = case_when(asset_liability_ratio < 1 ~ "Ratio < 1",
                                      asset_liability_ratio < 2 ~ "1 < Ratio < 2",
                                      asset_liability_ratio < 3 ~ "2 < Ratio < 3", 
                                      asset_liability_ratio > 3 ~ "Ratio > 3")) %>% 
  
  select(-asset_liability_ratio) %>% 
  rename(value = liability_rev_ratio,
         individual = name) %>% 
  arrange(group, value) %>% 
  mutate(id = 1:51)
 
# Get the name and the y position of each label
label_data21 <- data21
number_of_bar <- nrow(label_data)
angle <- 90 - 360 * (label_data$id-0.5) /number_of_bar     # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)
label_data21$hjust <- ifelse( angle < -90, 1, 0)
label_data21$angle <- ifelse(angle < -90, angle+180, angle)

# prepare a data frame for base lines
base_data21 <- data21 %>% 
  group_by(group) %>% 
  summarize(start=min(id), end=max(id) - empty_bar) %>% 
  rowwise() %>% 
  mutate(title=mean(c(start, end)))

# Make the plot
ggplot(data21, aes(x=as.factor(id), y=value, fill=group)) +  # Note that id is a factor. If x is numeric, there is some space between the first bar
   geom_bar(aes(x=as.factor(id), y=value, fill=group), stat="identity", alpha=0.5) +
  
  ylim(-100,120) +
  theme_minimal() +
  theme(
    plot.title = element_text("2020"),
    legend.position = "none",
    axis.text = element_blank(),
    axis.title = element_blank(),
    panel.grid = element_blank(),
    plot.margin = unit(rep(-1,4), "cm")) +
  coord_polar() + 
    geom_text(data=label_data21, aes(x=id, y=value+10, label=individual, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=2.5, angle= label_data$angle, inherit.aes = FALSE ) +
  geom_segment(data=base_data21, aes(x = start, y = -2, xend = end + 4, yend = -2), colour = "black", alpha=0.8, size=0.6 , inherit.aes = FALSE )  + 
  geom_text(data=base_data21, aes(x = title, y = -18, label=group), hjust=c(1,1,0,0), colour = "black", alpha=0.8, size=2, fontface="bold", inherit.aes = FALSE) +
  annotate("text", x = 1, y = -100, label = "Asset-to-Liability\nRatio Group", size = 2) -> circular_2021

circular_2021
```


## Parallel 4 key components   

Displayed 4 key components, colored by the group size of population. Each line represents one state. 

### 2020

Alaska has the largest total assets \$89104922000. Connecticut has the largest total liabilities $97821884000

```{r}

d_parrallel <- state_gov_20_21_pop %>% filter(year == 2020) %>% arrange(population) %>% 
  filter(population < 5000000) %>% 
  mutate(pop_group = case_when(population < 1000000 ~ "<1M",
                               population < 2000000 ~ "1~2M",
                               population < 3000000 ~ "2~3M", 
                               population < 4000000 ~ "3~4M",
                               population < 5000000 ~ "4~5M",
                               population > 5000000 ~ ">5M",
                               )) %>% 
  mutate(pop_group = as.factor(pop_group)) %>% 
  select(name, total_liabilities, revenues, total_assets, expenses, pop_group) 
ggparcoord(d_parrallel,
    columns = 2:5, groupColumn = 6, order = "anyClass",
    scale="globalminmax",
    showPoints = TRUE, 
    title = "28 States whose population is less than 5M",
    alphaLines = 0.3
    ) + 
  annotate("text", x = 2.7, y = 90000000000, label = "Alaska") +
  annotate("text", x = 3.5, y = 100000000000, label = "Connecticut") +
  #scale_color_discrete()+
  scale_color_viridis(discrete=TRUE) +
  theme_minimal()+
  #theme_ipsum()+
  theme(
    plot.title = element_text(size=10)
  )

```

### 2021
```{r}
d_parrallel_above5M <- state_gov_20_21_pop %>% filter(year == 2020) %>% arrange(population) %>% 
  filter(population > 5000000) %>% 
  mutate(pop_group = case_when(population < 6000000 ~ "5~6M",
                               #population < 7000000 ~ "6~7M",
                               population < 8000000 ~ "6~8M", 
                               #population < 9000000 ~ "8~9M",
                               population < 10000000 ~ "8~10M",
                               population > 10000000 ~ ">10M",
                               )) %>% 
  mutate(pop_group = as.factor(pop_group)) %>% 
  select(name, total_liabilities, revenues, total_assets, expenses, pop_group) 
ggparcoord(d_parrallel_above5M,
    columns = 2:5, groupColumn = 6, order = "anyClass",
    scale="globalminmax",
    showPoints = TRUE, 
    title = "23 States Whose Population is larger than 5M",
    alphaLines = 0.3
    ) + 
  annotate("text", x = 1.2, y = 406953774000, label = "California") +
  annotate("text", x = 4.2, y = 390362000000, label = "Texas") +
  #scale_color_discrete()+
  scale_color_viridis(discrete=TRUE) +
  #theme_ipsum()+
  theme_minimal() +
  theme(
    plot.title = element_text(size=10)
  )
```
## Expenses as a percentage of revenues 

```{r}

state_gov_20_21_pop %>% 
  mutate(year = as.factor(year)) %>% 
  mutate(exp_rev_pct = round(expenses/revenues, 2),
         ) %>% 
  #group_by(state) %>% 

  select(state, exp_rev_pct, year) %>% arrange(desc(exp_rev_pct)) %>% 
  ggplot(aes(state, exp_rev_pct, group = year)) +
  geom_point(aes(color = year)) +
  #geom_col(position = "dodge", aes(fill = year))+
  #coord_flip() +
  labs(title = "Expenses as a percentage of revenues, FY 2019-20, 2020-21")+
  scale_color_manual(values = c("2020" = "lightblue", "2021" =  "navy")) +
  theme_minimal() -> p_expenses_rev

ggplotly(p_expenses_rev)
```

## Liabilities to free cash flow 
showing how long it would take retire liabilities using any existing budget surplus
total liabilities / (revenues - expenses)
```{r}
state_gov_20_21_pop %>% 
  mutate(liabilities_free_cashflow = round(total_liabilities/(revenues - expenses),2)) %>% 
  select(state, year, liabilities_free_cashflow, total_liabilities, revenues, expenses) %>% 
  arrange(desc(liabilities_free_cashflow)) %>% 
  filter(year == 2020) %>% 
  ggplot(aes(state, liabilities_free_cashflow))+
  geom_bar(stat = "identity", fill = "orange")+
  theme_minimal()+
  labs(title = "Liabilities to free cash flow, FY 2019-20") -> p_cashflow_20

ggplotly(p_cashflow_20)

state_gov_20_21_pop %>% 
  mutate(liabilities_free_cashflow = round(total_liabilities/(revenues - expenses),2)) %>% 
  select(state, year, liabilities_free_cashflow, total_liabilities, revenues, expenses) %>% 
  arrange(desc(liabilities_free_cashflow)) %>% 
  filter(year == 2021) %>% 
  ggplot(aes(state, liabilities_free_cashflow))+
  geom_bar(stat = "identity", fill = "orange")+
  theme_minimal()+
  labs(title = "Liabilities to free cash flow, FY 2020-21") -> p_cashflow_21

ggplotly(p_cashflow_21)
```


# Counties

## Viz Urbanicity County

```{r}
county_3years %>% 
  ggplot(aes(pop_urb, net_pension_liability))+
  geom_point(color = "cornflowerblue", alpha = 0.3) +
  #scale_x_log10() +
  #scale_y_log10()+
  theme_minimal() +
  labs(title = "County government: Urban population & Net Pension Liability/n Raw scale")
```

```{r}
county_3years %>% 
  ggplot(aes(pop_urb, net_pension_liability))+
  geom_point(color = "cornflowerblue", alpha = 0.3) +
  scale_x_log10() +
  scale_y_log10()+
  theme_minimal() +
  labs(title = "County government: Urbanicity & Net Pension Liability: Log scales",
       x = "Urban population",
       y = "Net Pension Liability",
       caption = "Log scales interpretation: For every 1% increase in urban population, there's a 0.42% increase in net pension liability")

```

```{r}
mod_data_county <- county_3years %>% 
  mutate(net_pension_liability = net_pension_liability+1,
         pop_urb = pop_urb+1) %>% 
  select(net_pension_liability, pop_urb) 

mod_county <- lm(log(net_pension_liability) ~ log(pop_urb), data = mod_data_county)
summary(mod_county)

#Interpretation:
#https://library.virginia.edu/data/articles/interpreting-log-transformations-in-a-linear-model

```

```{r, include=FALSE, eval=FALSE}
plot(mod1, which = 3)
# The trend line is even BUT the residual are clustered. ==> Violate the constant variance assumption --> the model is misspecified. 
```

## Viz Median income
```{r inclue = FALSE, eval=FALSE}

income_mod_data <- county_3years %>% left_join(income) %>% 
 mutate(net_pension_liability = net_pension_liability +1)

income_mod_data %>% 
  ggplot(aes(Median_Household_Income_2021, net_pension_liability)) +
  geom_point(color = "purple", alpha = .3) +
  theme_minimal()+
  scale_x_log10()+
  scale_y_log10()+
  labs(title = "County government: Median household income & Net pension liability",
       caption = "Log scales interpretation: For every 1% increase in household median income,\nthere's a 6.3% increase in net pension liability")
```

```{r}
income_mod_data
income_mod_county <- lm(log(net_pension_liability) ~ log(Median_Household_Income_2021), data = income_mod_data)
summary(income_mod_county)
```

```{r}
top10_county_pop_assets_20 <- read.csv("data/top_10_county_2020_assetsadded.csv") %>% 
  select(2, 5, 7:9, 12) %>% 
  mutate(year = 2020) %>% 
  rename(state = state.abb)
```

```{r}
pop_top10county <- read.csv("data/top_10_county_2020_assetsadded.csv") %>% 
  select(id, population, state.abb) %>% 
  rename(state = state.abb)

top10_county_pop_assets_21 <- read.csv("data/top_10_county_2021_assetsadded.csv") %>% 
  filter(id %in% top10_county_pop_assets_20$id) %>% 
    select(state, name, id, year, total_assets, total_liabilities, revenues)
  
 
county_gov_20_21_pop <- top10_county_pop_assets_20 %>% 
  rbind(top10_county_pop_assets_21) %>% 
  left_join(pop_top10county) %>% 
  mutate(liability_rev_ratio = total_liabilities/revenues,
         liability_per_capita = total_liabilities/population,
         asset_liability_ratio = total_assets/total_liabilities) %>% 
  mutate(across(9:11, round, 2))

```

## Total Liabilities to Revenues 
```{r}
county_gov_20_21_pop %>% 
mutate(year = as.factor(year)) %>% 
ggplot(aes(fct_reorder(name, liability_rev_ratio), liability_rev_ratio, group = year)) +
  geom_segment(aes(x = name, xend = name, y = 0, yend = liability_rev_ratio), color = "darkgreen") +
  geom_point(aes(color = year), size = 4) +
  theme_light() +
  coord_flip() +
  scale_color_manual(values = c("2020" = "red", "2021" =  "black")) +
  theme(
    panel.grid.major.y = element_blank(),
    panel.border = element_blank(),
    axis.ticks.y = element_blank()) +
  labs(title = "Total Liabilities to Revenues Ratio in the Top 10 Counties", 
       x = "", 
       y = "Liability-to-Revenue Ratio") 

  
```

Four counties lowered their Liability-to-Revenue Ratio: Harris, Log Angeles, San Deiego, and Dallas.


## Total Assets to Total Liabilities Ratio

```{r}
county_gov_20_21_pop %>% 
  mutate(year = as.factor(year)) %>% 
ggplot(aes(fct_reorder(name, asset_liability_ratio), asset_liability_ratio, group = year)) +
  geom_segment(aes(x = name, xend = name, y = 0, yend = asset_liability_ratio), color = "skyblue") +
  geom_point(aes(color = year), size = 4) +
  theme_light() +
  coord_flip() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.border = element_blank(),
    axis.ticks.y = element_blank())+

  labs(title = "Total Assets to Total Liabilities Ratio in the Top 10 Counties", 
       x = "", 
       y = "Asset-to-Liability Ratio") 
```

## Total Liabilities per Capita

```{r}
county_gov_20_21_pop %>% 
  mutate(year = as.factor(year)) %>% 
  ggplot(aes(fct_reorder(name, liability_per_capita), liability_per_capita, group = year)) +
 # geom_segment( aes(x=name, xend=name, y=liability_per_capita, yend=liability_per_capita), color="grey") +
  
  geom_point(aes(color = year, size = liability_per_capita)) +
  coord_flip() +
  theme_minimal() +
  scale_color_manual(values = c("2020" = "orange", "2021" =  "purple")) +
  labs(title = "Total Liabilities per Capita in the Top 10 Counties. FY 2020, 2021", 
       x = "", 
       y = "liability_per_capita")
```
## Top 100 Counties: Net Pension Liabilities

```{r fig.width=15, fig.width=15}
county_top100_3years %>% #filter(name == "jacksonville")
  mutate(year = as.factor(year)) %>% 
  ggplot(aes(name, net_pension_liability), group = year) +
  geom_col(aes(fill = year), position = "dodge") +
  # scale_color_viridis(discrete=TRUE) +
  scale_y_continuous(breaks = c(5000000000, 10000000000, 15000000000),
                  label = c("$5B", "$10B","$15B")) +
  
    scale_fill_manual(values = c('#D9AF6B', '#AF6458', '#68855C')) +
  
  labs(title = "Net Pension Liabilities of The top 100 County Governments in 3 years: 2020, 2021, 2022",
       x = "",
       y = "Net Pension Liabilities", 
       caption = "") +

  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90)) 
```

## Top 100 Counties: Net Pension Liabilities Per Person

```{r fig.width=15, fig.width=15}
county_top100_3years %>% #filter(name == "jacksonville")
  mutate(year = as.factor(year)) %>% #select(state.abb, name, net_pension_liability, population) %>% 
  mutate(npl_person = net_pension_liability/population) %>% 
  
  ggplot(aes(name, npl_person), group = year) +
  geom_col(aes(fill = year), position = "dodge") +
  # scale_color_viridis(discrete=TRUE) +
  scale_y_continuous(breaks = c(5000000000, 10000000000, 15000000000),
                  label = c("$5B", "$10B","$15B")) +
  
    scale_fill_manual(values = c('#D9AF6B', '#AF6458', '#68855C')) +
  
  labs(title = "Net Pension Liabilities of The top 100 County Governments Per Person\nin 3 years: 2020, 2021, 2022",
       x = "",
       y = "Net Pension Liabilities", 
       caption = "") +

  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90)) 
```

# Cities

## Top 100 Cities: Net Pension Liabilities

```{r fig.width=15, fig.width=15}
city_top100_3years %>% 
  mutate(year = as.factor(year)) %>% 
  ggplot(aes(name, net_pension_liability), group = year) +
  geom_col(aes(fill = year), position = "dodge") +
  # scale_color_viridis(discrete=TRUE) +
  scale_y_continuous(breaks = c(5000000000, 10000000000, 15000000000),
                  label = c("$5B", "$10B","$15B")) +
  
    scale_fill_manual(values = c('#526A83', '#625377', '#68855C')) +
  
  labs(title = "Net Pension Liabilities of The top 100 City Governments in 3 years: 2020, 2021, 2022",
       x = "",
       y = "Net Pension Liabilities", 
       caption = "") +

  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90)) 
```

## Top 100 Cities: Net Pension Liabilities Per Person 

```{r fig.width=15, fig.width=15}
## Top 100 Cities Per person in 3 years 
city_top100_3years %>% #filter(name == "jacksonville")
  mutate(year = as.factor(year)) %>% #select(state.abb, name, net_pension_liability, population) %>% 
  mutate(npl_person = net_pension_liability/population) %>% 
  ggplot(aes(name, npl_person), group = year) +
  geom_col(aes(fill = year), position = "dodge") +
  # scale_color_viridis(discrete=TRUE) +
 # scale_y_continuous(breaks = c(5000000000, 10000000000, 15000000000),
    #              label = c("$5B", "$10B","$15B")) +
  
    scale_fill_manual(values = c('#526A83', '#625377', '#68855C')) +
  
  labs(title = "Net Pension Liabilities of The top 100 City Governments Per Person\nin 3 years: 2020, 2021, 2022",
       x = "",
       y = "Net Pension Liabilities", 
       caption = "") +

  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90)) 
```

## Entities at risk
1) Asset to liability ratio; 
2) Unrestricted - currently do not collect this
3) Current ratio: Current Assets / Current Liabilities --> Do not have current assets
```{r}
city_20 <- read.csv("data/top10_city_2020_assetsadded.csv")
```
## Shares of Net Pension Liability & Net OPEB Liability 

Percentage of net pension liability, net OPEB liability, and other components in total liabilities in each city.

```{r}
library(waffle)
city_20 %>% 
  mutate(net_pension_share = round((net_pension_liability/total_liabilities)*100,0),
         net_opeb_share = round((net_opeb_liability/total_liabilities)*100,0),
         others = 100 - net_pension_share - net_opeb_share
         ) %>% 
  #filter(name == "New York") %>% 
   select(name, net_pension_share, net_opeb_share, others) -> dtest
```

```{r}
parts1 <- c("Net Pension Share" = 15, "Net OPEB share"= 36, "Others"= 49)
w1 <- waffle(parts1, rows=5, legend_pos = "top", title = dtest[1,1])

parts2 <- c(17, 5, 78)
w2 <- waffle(parts2, rows=5, legend_pos = "none", title = dtest[2,1])


# print chart
iron(w1,  w2)
```


```{r}
parts3 <- c(48, 3, 49)
w3 <- waffle(parts3, rows=5, legend_pos = "none", title = dtest[3,1])

parts4 <- c(19, 9, 72)
w4 <- waffle(parts4, rows=5, legend_pos = "none", title = dtest[4,1])
iron(w3, w4)
```

```{r}
parts5 <- c(36, 1, 63)
w5 <- waffle(parts5, rows=5, legend_pos = "none", title = dtest[5,1])

parts6 <- c(32, 10, 58)
w6 <- waffle(parts6, rows=5, legend_pos = "none", title = dtest[6,1])

parts7 <- c(15, 18, 67)
w7 <- waffle(parts7, rows=5, legend_pos = "none", title = dtest[7,1])

parts8 <- c(36, 6, 58)
w8 <- waffle(parts8, rows=5, legend_pos = "none", title = dtest[8,1])

iron(w5, w6)
```

```{r}
iron(w7, w8)
```

```{r}
parts9 <- c(36, 6, 58)
w9 <- waffle(parts9, rows=5, legend_pos = "none", title = dtest[9,1])

parts10 <- c(20, 23, 57)
w10 <- waffle(parts10, rows=5, legend_pos = "none", title = dtest[10,1])

iron(w9, w10)
# waffle(dtest[5, 2:4], row = 5, legend_pos = "bottom",
#        title = dtest[5,1])
```

```{r }
# 
# waffle(dtest[6, 2:4], row = 5, legend_pos = "none",
#          title = dtest[6,1])
# waffle(dtest[7, 2:4], row = 5, legend_pos = "none",
#        title = dtest[7,1])
# waffle(dtest[8, 2:4], row = 5, legend_pos = "none",
#          title = dtest[8,1])
# waffle(dtest[9, 2:4], row = 5, legend_pos = "none",
#          title = dtest[9,1])
# waffle(dtest[10, 2:4], row = 5, legend_pos = "bottom",
#          title = dtest[10,1])

```