elegant plot, messy data

MieuxVoter · Mar 3, 2022 · 9846ace · 9846ace
1 parent ca60c75
commit 9846ace
Show file tree

Hide file tree

Showing 3 changed files with 93 additions and 36 deletions.
diff --git a/load_surveys.py b/load_surveys.py
@@ -78,7 +78,7 @@ def remove_undecided(df_survey: DataFrame, df_undecided_grades: DataFrame):
 
 
 def convert_grades(
-        df_survey: DataFrame, df_corresponding_grades: DataFrame, aggregation: AggregationMode, no_opinion_mode: bool
+    df_survey: DataFrame, df_corresponding_grades: DataFrame, aggregation: AggregationMode, no_opinion_mode: bool
 ):
     """
     Remove the undecided grades and affect it proportionally to the other grades
@@ -137,11 +137,11 @@ def convert_grades(
 
 
 def load_surveys(
-        csv_file: Path,
-        no_opinion_mode: bool = True,
-        candidates: Candidacy = None,
-        aggregation: AggregationMode = None,
-        polling_organization: PollingOrganizations = None,
+    csv_file: Path,
+    no_opinion_mode: bool = True,
+    candidates: Candidacy = None,
+    aggregation: AggregationMode = None,
+    polling_organization: PollingOrganizations = None,
 ):
     """
     normalize file
@@ -202,7 +202,8 @@ def load_surveys(
         df_surveys = df_surveys[df_surveys["retrait_candidature"] == "nan"]
         df_surveys = df_surveys[df_surveys["candidat"] != "Nathalie Arthaud"]  # todo: dont hard code
         df_surveys = df_surveys[
-            df_surveys["candidat"] != "Jean Lassalle"]  # todo: remove candidates with only two dots instead.
+            df_surveys["candidat"] != "Jean Lassalle"
+        ]  # todo: remove candidates with only two dots instead.
 
     if aggregation != AggregationMode.NO_AGGREGATION:
 

diff --git a/main.py b/main.py
@@ -17,7 +17,9 @@
 
 
 class Arguments(tap.Tap):
-    show: bool = False
+    merit_profiles: bool = False
+    ranking_plot: bool = True
+    show: bool = True
     html: bool = False
     png: bool = False
     csv: Path = Path("presidentielle_jm.csv")
@@ -30,8 +32,9 @@ def main(args: Arguments):
     df = load_surveys(
         args.csv,
         no_opinion_mode=True,
-        candidates=Candidacy.ALL_CURRENT_CANDIDATES,
+        candidates=Candidacy.ALL_CURRENT_CANDIDATES_WITH_ENOUGH_DATA,
         aggregation=AggregationMode.FOUR_MENTIONS,
+        polling_organization=PollingOrganizations.ALL,
     )
 
     # Compute the rank for each survey
@@ -56,7 +59,7 @@ def main(args: Arguments):
         # refill the dataframe of surveys
         df[df["id"] == survey] = df_with_rank
 
-        if args.show or args.html or args.png:
+        if args.merit_profiles:
             fig = plot_merit_profiles(
                 df=df_with_rank,
                 grades=grades,
@@ -66,16 +69,21 @@ def main(args: Arguments):
                 sponsor=sponsor,
             )
 
+            if args.show:
+                fig.show()
+            if args.html:
+                fig.write_html(f"{args.dest}/{survey}.html")
+            if args.png:
+                fig.write_image(f"{args.dest}/{survey}.png")
+
+    if args.ranking_plot:
+        fig = ranking_plot(df)
         if args.show:
             fig.show()
         if args.html:
-            fig.write_html(f"{args.dest}/{survey}.html")
+            fig.write_html(f"{args.dest}/ranking_plot.html")
         if args.png:
-            fig.write_image(f"{args.dest}/{survey}.png")
-
-    print("done")
-
-    fig = ranking_plot(df)
+            fig.write_image(f"{args.dest}/ranking_plot.png")
 
 
 if __name__ == "__main__":

diff --git a/plots.py b/plots.py
@@ -143,6 +143,7 @@ def ranking_plot(df):
     df = df.sort_values(by="fin_enquete")
     annotations = []
     for ii in get_candidates(df):
+        print(ii)
         temp_df = df[df["candidat"] == ii]
         fig.add_trace(
             go.Scatter(
@@ -181,27 +182,74 @@ def ranking_plot(df):
 
         # name with break btw name and surname
         idx_space = ii.find(" ")
-        ii = ii[:idx_space] + "<br>" + ii[idx_space+1:]
+        name_label = ii[:idx_space] + "<br>" + ii[idx_space + 1 :]
+        size_annotations = 12
+
         # last dot annotation
-        annotations.append(dict(x=temp_df["fin_enquete"].iloc[-1], y=temp_df["rang"].iloc[-1],
-                                xanchor='left', xshift=10, yanchor='middle',
-                                text=ii,
-                                font=dict(family='Arial',
-                                          size=16),
-                                showarrow=False),)
+        annotations.append(
+            dict(
+                x=temp_df["fin_enquete"].iloc[-1],
+                y=temp_df["rang"].iloc[-1],
+                xanchor="left",
+                xshift=10,
+                yanchor="middle",
+                text=name_label,
+                font=dict(family="Arial", size=size_annotations, color=COLORS[ii]["couleur"]),
+                showarrow=False,
+            ),
+        )
         # first dot annotation
         if temp_df["fin_enquete"].iloc[-1] != temp_df["fin_enquete"].iloc[0]:
-            annotations.append(dict(x=temp_df["fin_enquete"].iloc[0], y=temp_df["rang"].iloc[0],
-                                    xanchor='right', xshift=-10, yanchor='middle',
-                                    text=ii,
-                                    font=dict(family='Arial',
-                                              size=16),
-                                    showarrow=False))
-
-    fig.update_layout(yaxis=dict(autorange="reversed",
-                                 tick0=1, dtick=1),
-                      annotations=annotations,
-                      plot_bgcolor='white')
-    print("yo")
-    fig.show()
+            annotations.append(
+                dict(
+                    x=temp_df["fin_enquete"].iloc[0],
+                    y=temp_df["rang"].iloc[0],
+                    xanchor="right",
+                    xshift=-10,
+                    yanchor="middle",
+                    text=name_label,
+                    font=dict(family="Arial", size=size_annotations, color=COLORS[ii]["couleur"]),
+                    showarrow=False,
+                )
+            )
+
+    fig.add_vline(x="2022-04-10", line_dash="dot")
+    annotations.append(
+        dict(
+            x="2022-04-10",
+            y=1.5,
+            xanchor="left",
+            xshift=10,
+            yanchor="middle",
+            text="1er Tour",
+            font=dict(family="Arial", size=size_annotations),
+            showarrow=False,
+        )
+    )
+
+    fig.update_layout(
+        yaxis=dict(autorange="reversed", tick0=1, dtick=1, visible=False),
+        annotations=annotations,
+        plot_bgcolor="white",
+        showlegend=False)
+
+    date = df["fin_enquete"].max()
+    title="<b>Evaluation des sondages au jugement majoritaire <br> pour l'élection présidentielle 2022</b> <br>" \
+          + f"<i> Dernier sondage: {date}.</i>"
+    fig.update_layout(title=title, title_x=0.5)
+
+    fig.add_layout_image(
+        dict(
+            source="https://raw.githubusercontent.com/MieuxVoter/majority-judgment-tracker/main/icons/logo.png",
+            xref="paper",
+            yref="paper",
+            x=0.05,
+            y=1.01,
+            sizex=0.15,
+            sizey=0.15,
+            xanchor="left",
+            yanchor="bottom",
+        )
+    )
+
     return fig