Merge pull request #15 from Ipuch/main

Elegant ranking plot with messy data.
MieuxVoter · Mar 3, 2022 · 75ab221 · 75ab221
2 parents 46c0353 + 9846ace
commit 75ab221
Show file tree

Hide file tree

Showing 7 changed files with 295 additions and 90 deletions.
diff --git a/interface_mj.py b/interface_mj.py
@@ -1,4 +1,5 @@
 from libs.majority_judgment_2 import majority_judgment as mj
+import numpy as np
 import pandas as pd
 from pandas import DataFrame
 from utils import get_intentions
@@ -28,20 +29,28 @@ def sort_candidates_mj(
     merit_profiles_dict = set_dictionary(df_intentions, nb_grades, nb_candidates)
     ranking = mj(merit_profiles_dict, reverse=True)
 
-    # copy and empty the panda datafram to refill it.
-    new_df = df_intentions.copy()
-    new_df = new_df.drop(
-        labels=new_df.index, axis=0, index=None, columns=None, level=None, inplace=True, errors="raise"
-    )
-    # todo add a rank column
-    # refilling the dataframe
-    for key in ranking:
-        row = df_intentions[df_intentions["candidat"] == key]
-        new_df = pd.concat([new_df, row], ignore_index=True)
-    # set new index of rows
-    new_df.index = pd.Index(data=[i for i in range(1, nb_candidates + 1)], dtype="int64")
+    if "rang" not in df.columns:
+        df["rang"] = None
 
-    return new_df.reindex(index=new_df.index[::-1])  # sort to plot it the right way, best candidate at the top.
+    col_index = df.columns.get_loc("rang")
+    for c in ranking:
+        idx = np.where(df["candidat"] == c)[0][0]
+        df.iat[idx, col_index] = ranking[c]
+
+    # # copy and empty the panda datafram to refill it.
+    # new_df = df_intentions.copy()
+    # new_df = new_df.drop(
+    #     labels=new_df.index, axis=0, index=None, columns=None, level=None, inplace=True, errors="raise"
+    # )
+    # # refilling the dataframe
+    # for key in ranking:
+    #     row = df_intentions[df_intentions["candidat"] == key]
+    #     new_df = pd.concat([new_df, row], ignore_index=True)
+    # # set new index of rows
+    # new_df.index = pd.Index(data=[i for i in range(1, nb_candidates + 1)], dtype="int64")
+    # return new_df.reindex(index=new_df.index[::-1]) # sort to plot it the right way, best candidate at the top.
+
+    return df
 
 
 def set_dictionary(df_intentions: DataFrame, nb_grades: int, nb_candidates: int):

diff --git a/load_surveys.py b/load_surveys.py
@@ -14,7 +14,7 @@
 
 from utils import get_list_survey
 
-from misc.enums import Candidacy, AggregationMode
+from misc.enums import Candidacy, AggregationMode, PollingOrganizations
 
 
 def remove_undecided(df_survey: DataFrame, df_undecided_grades: DataFrame):
@@ -141,6 +141,7 @@ def load_surveys(
     no_opinion_mode: bool = True,
     candidates: Candidacy = None,
     aggregation: AggregationMode = None,
+    polling_organization: PollingOrganizations = None,
 ):
     """
     normalize file
@@ -155,6 +156,8 @@ def load_surveys(
         how to manage candidacies
     aggregation: AggregationMode
         how to manage Aggregation of several grades
+    polling_organization: PollingOrganizations
+        select polling organization
     Returns
     -------
     Return the DataFrame df with all surveys inside
@@ -163,10 +166,15 @@ def load_surveys(
         candidates = Candidacy.ALL
     if aggregation is None:
         aggregation = AggregationMode.NO_AGGREGATION
+    if polling_organization is None:
+        polling_organization = PollingOrganizations.ALL
 
     df_surveys = pd.read_csv(csv_file, na_filter=False)
     df_standardisation = pd.read_csv("standardisation.csv", na_filter=False)
 
+    if polling_organization != PollingOrganizations.ALL:
+        df_surveys = df_surveys[df_surveys["commanditaire"] == polling_organization.value]
+
     # remove undecided
     if no_opinion_mode:
         df_undecided_grades = df_standardisation[df_standardisation["to_4_mentions"] == "sans opinion"]
@@ -189,6 +197,14 @@ def load_surveys(
         df_surveys = df_surveys[df_surveys["candidat_presidentielle"] == True]
         df_surveys = df_surveys[df_surveys["retrait_candidature"] == "nan"]
 
+    if candidates == Candidacy.ALL_CURRENT_CANDIDATES_WITH_ENOUGH_DATA:
+        df_surveys = df_surveys[df_surveys["candidat_presidentielle"] == True]
+        df_surveys = df_surveys[df_surveys["retrait_candidature"] == "nan"]
+        df_surveys = df_surveys[df_surveys["candidat"] != "Nathalie Arthaud"]  # todo: dont hard code
+        df_surveys = df_surveys[
+            df_surveys["candidat"] != "Jean Lassalle"
+        ]  # todo: remove candidates with only two dots instead.
+
     if aggregation != AggregationMode.NO_AGGREGATION:
 
         surveys = get_list_survey(df_surveys)

diff --git a/main.py b/main.py
@@ -2,22 +2,23 @@
 import pandas as pd
 import numpy as np
 import tap
-from plots import plot_merit_profiles
+from plots import plot_merit_profiles, ranking_plot
 from utils import (
     get_list_survey,
     get_grades,
 )
 from interface_mj import sort_candidates_mj
 from load_surveys import load_surveys
-from misc.enums import Candidacy, AggregationMode
+from misc.enums import Candidacy, AggregationMode, PollingOrganizations
 
-# todo: handle sans opinion if case
 # todo: graphique classement en fonction des dates (avec mediane glissante)
 # todo: moyennes / ecart-type grades sur un profil de merite.
 # todo: video d'evolution du graphique (baromètre animé)
 
 
 class Arguments(tap.Tap):
+    merit_profiles: bool = False
+    ranking_plot: bool = True
     show: bool = True
     html: bool = False
     png: bool = False
@@ -31,16 +32,20 @@ def main(args: Arguments):
     df = load_surveys(
         args.csv,
         no_opinion_mode=True,
-        candidates=Candidacy.ALL_CURRENT_CANDIDATES,
+        candidates=Candidacy.ALL_CURRENT_CANDIDATES_WITH_ENOUGH_DATA,
         aggregation=AggregationMode.FOUR_MENTIONS,
+        polling_organization=PollingOrganizations.ALL,
     )
 
+    # Compute the rank for each survey
+    df["rang"] = None
+
     surveys = get_list_survey(df)
 
     for survey in surveys:
         print(survey)
         # only the chosen survey
-        df_survey = df[df["id"] == survey]
+        df_survey = df[df["id"] == survey].copy()
 
         nb_grades = df_survey["nombre_mentions"].unique()[0]
         grades = get_grades(df_survey, nb_grades)
@@ -49,23 +54,36 @@ def main(args: Arguments):
         sponsor = df_survey["commanditaire"].loc[first_idx]
         date = df_survey["fin_enquete"].loc[first_idx]
 
-        df_sorted = sort_candidates_mj(df_survey, nb_grades)
+        df_with_rank = sort_candidates_mj(df_survey, nb_grades)
+
+        # refill the dataframe of surveys
+        df[df["id"] == survey] = df_with_rank
+
+        if args.merit_profiles:
+            fig = plot_merit_profiles(
+                df=df_with_rank,
+                grades=grades,
+                auto_text=False,
+                source=source,
+                date=date,
+                sponsor=sponsor,
+            )
 
-        fig = plot_merit_profiles(
-            df=df_sorted,
-            grades=grades,
-            auto_text=False,
-            source=source,
-            date=date,
-            sponsor=sponsor,
-        )
+            if args.show:
+                fig.show()
+            if args.html:
+                fig.write_html(f"{args.dest}/{survey}.html")
+            if args.png:
+                fig.write_image(f"{args.dest}/{survey}.png")
 
+    if args.ranking_plot:
+        fig = ranking_plot(df)
         if args.show:
             fig.show()
         if args.html:
-            fig.write_html(f"{args.dest}/{survey}.html")
+            fig.write_html(f"{args.dest}/ranking_plot.html")
         if args.png:
-            fig.write_image(f"{args.dest}/{survey}.png")
+            fig.write_image(f"{args.dest}/ranking_plot.png")
 
 
 if __name__ == "__main__":

diff --git a/misc/enums.py b/misc/enums.py
@@ -6,6 +6,7 @@ class Candidacy(Enum):
     Select candidates
     """
 
+    ALL_CURRENT_CANDIDATES_WITH_ENOUGH_DATA = "all_current_candidates_with_enough_data"
     ALL_CURRENT_CANDIDATES = "all_current_candidates"
     ALL_CANDIDATES_FROM_BEGINNING = "all_candidates"
     ALL = "all"
@@ -18,3 +19,12 @@ class AggregationMode(Enum):
 
     NO_AGGREGATION = "None"
     FOUR_MENTIONS = "to_4_mentions"
+
+
+class PollingOrganizations(Enum):
+    """
+    Select how Institutes
+    """
+
+    ALL = "None"
+    MIEUX_VOTER = "Mieux voter"