From 9846ace4fb3afe0ae1f8f26338d286db889eb1a1 Mon Sep 17 00:00:00 2001 From: Ipuch Date: Wed, 2 Mar 2022 19:05:05 -0500 Subject: [PATCH] elegant plot, messy data --- load_surveys.py | 15 +++++---- main.py | 26 ++++++++++----- plots.py | 88 ++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 93 insertions(+), 36 deletions(-) diff --git a/load_surveys.py b/load_surveys.py index 6fe2d71..3a34af6 100644 --- a/load_surveys.py +++ b/load_surveys.py @@ -78,7 +78,7 @@ def remove_undecided(df_survey: DataFrame, df_undecided_grades: DataFrame): def convert_grades( - df_survey: DataFrame, df_corresponding_grades: DataFrame, aggregation: AggregationMode, no_opinion_mode: bool + df_survey: DataFrame, df_corresponding_grades: DataFrame, aggregation: AggregationMode, no_opinion_mode: bool ): """ Remove the undecided grades and affect it proportionally to the other grades @@ -137,11 +137,11 @@ def convert_grades( def load_surveys( - csv_file: Path, - no_opinion_mode: bool = True, - candidates: Candidacy = None, - aggregation: AggregationMode = None, - polling_organization: PollingOrganizations = None, + csv_file: Path, + no_opinion_mode: bool = True, + candidates: Candidacy = None, + aggregation: AggregationMode = None, + polling_organization: PollingOrganizations = None, ): """ normalize file @@ -202,7 +202,8 @@ def load_surveys( df_surveys = df_surveys[df_surveys["retrait_candidature"] == "nan"] df_surveys = df_surveys[df_surveys["candidat"] != "Nathalie Arthaud"] # todo: dont hard code df_surveys = df_surveys[ - df_surveys["candidat"] != "Jean Lassalle"] # todo: remove candidates with only two dots instead. + df_surveys["candidat"] != "Jean Lassalle" + ] # todo: remove candidates with only two dots instead. if aggregation != AggregationMode.NO_AGGREGATION: diff --git a/main.py b/main.py index 2e84a6f..63c4133 100644 --- a/main.py +++ b/main.py @@ -17,7 +17,9 @@ class Arguments(tap.Tap): - show: bool = False + merit_profiles: bool = False + ranking_plot: bool = True + show: bool = True html: bool = False png: bool = False csv: Path = Path("presidentielle_jm.csv") @@ -30,8 +32,9 @@ def main(args: Arguments): df = load_surveys( args.csv, no_opinion_mode=True, - candidates=Candidacy.ALL_CURRENT_CANDIDATES, + candidates=Candidacy.ALL_CURRENT_CANDIDATES_WITH_ENOUGH_DATA, aggregation=AggregationMode.FOUR_MENTIONS, + polling_organization=PollingOrganizations.ALL, ) # Compute the rank for each survey @@ -56,7 +59,7 @@ def main(args: Arguments): # refill the dataframe of surveys df[df["id"] == survey] = df_with_rank - if args.show or args.html or args.png: + if args.merit_profiles: fig = plot_merit_profiles( df=df_with_rank, grades=grades, @@ -66,16 +69,21 @@ def main(args: Arguments): sponsor=sponsor, ) + if args.show: + fig.show() + if args.html: + fig.write_html(f"{args.dest}/{survey}.html") + if args.png: + fig.write_image(f"{args.dest}/{survey}.png") + + if args.ranking_plot: + fig = ranking_plot(df) if args.show: fig.show() if args.html: - fig.write_html(f"{args.dest}/{survey}.html") + fig.write_html(f"{args.dest}/ranking_plot.html") if args.png: - fig.write_image(f"{args.dest}/{survey}.png") - - print("done") - - fig = ranking_plot(df) + fig.write_image(f"{args.dest}/ranking_plot.png") if __name__ == "__main__": diff --git a/plots.py b/plots.py index a34874b..25d1119 100644 --- a/plots.py +++ b/plots.py @@ -143,6 +143,7 @@ def ranking_plot(df): df = df.sort_values(by="fin_enquete") annotations = [] for ii in get_candidates(df): + print(ii) temp_df = df[df["candidat"] == ii] fig.add_trace( go.Scatter( @@ -181,27 +182,74 @@ def ranking_plot(df): # name with break btw name and surname idx_space = ii.find(" ") - ii = ii[:idx_space] + "
" + ii[idx_space+1:] + name_label = ii[:idx_space] + "
" + ii[idx_space + 1 :] + size_annotations = 12 + # last dot annotation - annotations.append(dict(x=temp_df["fin_enquete"].iloc[-1], y=temp_df["rang"].iloc[-1], - xanchor='left', xshift=10, yanchor='middle', - text=ii, - font=dict(family='Arial', - size=16), - showarrow=False),) + annotations.append( + dict( + x=temp_df["fin_enquete"].iloc[-1], + y=temp_df["rang"].iloc[-1], + xanchor="left", + xshift=10, + yanchor="middle", + text=name_label, + font=dict(family="Arial", size=size_annotations, color=COLORS[ii]["couleur"]), + showarrow=False, + ), + ) # first dot annotation if temp_df["fin_enquete"].iloc[-1] != temp_df["fin_enquete"].iloc[0]: - annotations.append(dict(x=temp_df["fin_enquete"].iloc[0], y=temp_df["rang"].iloc[0], - xanchor='right', xshift=-10, yanchor='middle', - text=ii, - font=dict(family='Arial', - size=16), - showarrow=False)) - - fig.update_layout(yaxis=dict(autorange="reversed", - tick0=1, dtick=1), - annotations=annotations, - plot_bgcolor='white') - print("yo") - fig.show() + annotations.append( + dict( + x=temp_df["fin_enquete"].iloc[0], + y=temp_df["rang"].iloc[0], + xanchor="right", + xshift=-10, + yanchor="middle", + text=name_label, + font=dict(family="Arial", size=size_annotations, color=COLORS[ii]["couleur"]), + showarrow=False, + ) + ) + + fig.add_vline(x="2022-04-10", line_dash="dot") + annotations.append( + dict( + x="2022-04-10", + y=1.5, + xanchor="left", + xshift=10, + yanchor="middle", + text="1er Tour", + font=dict(family="Arial", size=size_annotations), + showarrow=False, + ) + ) + + fig.update_layout( + yaxis=dict(autorange="reversed", tick0=1, dtick=1, visible=False), + annotations=annotations, + plot_bgcolor="white", + showlegend=False) + + date = df["fin_enquete"].max() + title="Evaluation des sondages au jugement majoritaire
pour l'élection présidentielle 2022

" \ + + f" Dernier sondage: {date}." + fig.update_layout(title=title, title_x=0.5) + + fig.add_layout_image( + dict( + source="https://raw.githubusercontent.com/MieuxVoter/majority-judgment-tracker/main/icons/logo.png", + xref="paper", + yref="paper", + x=0.05, + y=1.01, + sizex=0.15, + sizey=0.15, + xanchor="left", + yanchor="bottom", + ) + ) + return fig