Skip to content

Commit

Permalink
Merge pull request #15 from Ipuch/main
Browse files Browse the repository at this point in the history
Elegant ranking plot with messy data.
  • Loading branch information
Ipuch committed Mar 3, 2022
2 parents 46c0353 + 9846ace commit 75ab221
Show file tree
Hide file tree
Showing 7 changed files with 295 additions and 90 deletions.
35 changes: 22 additions & 13 deletions interface_mj.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from libs.majority_judgment_2 import majority_judgment as mj
import numpy as np
import pandas as pd
from pandas import DataFrame
from utils import get_intentions
Expand Down Expand Up @@ -28,20 +29,28 @@ def sort_candidates_mj(
merit_profiles_dict = set_dictionary(df_intentions, nb_grades, nb_candidates)
ranking = mj(merit_profiles_dict, reverse=True)

# copy and empty the panda datafram to refill it.
new_df = df_intentions.copy()
new_df = new_df.drop(
labels=new_df.index, axis=0, index=None, columns=None, level=None, inplace=True, errors="raise"
)
# todo add a rank column
# refilling the dataframe
for key in ranking:
row = df_intentions[df_intentions["candidat"] == key]
new_df = pd.concat([new_df, row], ignore_index=True)
# set new index of rows
new_df.index = pd.Index(data=[i for i in range(1, nb_candidates + 1)], dtype="int64")
if "rang" not in df.columns:
df["rang"] = None

return new_df.reindex(index=new_df.index[::-1]) # sort to plot it the right way, best candidate at the top.
col_index = df.columns.get_loc("rang")
for c in ranking:
idx = np.where(df["candidat"] == c)[0][0]
df.iat[idx, col_index] = ranking[c]

# # copy and empty the panda datafram to refill it.
# new_df = df_intentions.copy()
# new_df = new_df.drop(
# labels=new_df.index, axis=0, index=None, columns=None, level=None, inplace=True, errors="raise"
# )
# # refilling the dataframe
# for key in ranking:
# row = df_intentions[df_intentions["candidat"] == key]
# new_df = pd.concat([new_df, row], ignore_index=True)
# # set new index of rows
# new_df.index = pd.Index(data=[i for i in range(1, nb_candidates + 1)], dtype="int64")
# return new_df.reindex(index=new_df.index[::-1]) # sort to plot it the right way, best candidate at the top.

return df


def set_dictionary(df_intentions: DataFrame, nb_grades: int, nb_candidates: int):
Expand Down
18 changes: 17 additions & 1 deletion load_surveys.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from utils import get_list_survey

from misc.enums import Candidacy, AggregationMode
from misc.enums import Candidacy, AggregationMode, PollingOrganizations


def remove_undecided(df_survey: DataFrame, df_undecided_grades: DataFrame):
Expand Down Expand Up @@ -141,6 +141,7 @@ def load_surveys(
no_opinion_mode: bool = True,
candidates: Candidacy = None,
aggregation: AggregationMode = None,
polling_organization: PollingOrganizations = None,
):
"""
normalize file
Expand All @@ -155,6 +156,8 @@ def load_surveys(
how to manage candidacies
aggregation: AggregationMode
how to manage Aggregation of several grades
polling_organization: PollingOrganizations
select polling organization
Returns
-------
Return the DataFrame df with all surveys inside
Expand All @@ -163,10 +166,15 @@ def load_surveys(
candidates = Candidacy.ALL
if aggregation is None:
aggregation = AggregationMode.NO_AGGREGATION
if polling_organization is None:
polling_organization = PollingOrganizations.ALL

df_surveys = pd.read_csv(csv_file, na_filter=False)
df_standardisation = pd.read_csv("standardisation.csv", na_filter=False)

if polling_organization != PollingOrganizations.ALL:
df_surveys = df_surveys[df_surveys["commanditaire"] == polling_organization.value]

# remove undecided
if no_opinion_mode:
df_undecided_grades = df_standardisation[df_standardisation["to_4_mentions"] == "sans opinion"]
Expand All @@ -189,6 +197,14 @@ def load_surveys(
df_surveys = df_surveys[df_surveys["candidat_presidentielle"] == True]
df_surveys = df_surveys[df_surveys["retrait_candidature"] == "nan"]

if candidates == Candidacy.ALL_CURRENT_CANDIDATES_WITH_ENOUGH_DATA:
df_surveys = df_surveys[df_surveys["candidat_presidentielle"] == True]
df_surveys = df_surveys[df_surveys["retrait_candidature"] == "nan"]
df_surveys = df_surveys[df_surveys["candidat"] != "Nathalie Arthaud"] # todo: dont hard code
df_surveys = df_surveys[
df_surveys["candidat"] != "Jean Lassalle"
] # todo: remove candidates with only two dots instead.

if aggregation != AggregationMode.NO_AGGREGATION:

surveys = get_list_survey(df_surveys)
Expand Down
50 changes: 34 additions & 16 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,23 @@
import pandas as pd
import numpy as np
import tap
from plots import plot_merit_profiles
from plots import plot_merit_profiles, ranking_plot
from utils import (
get_list_survey,
get_grades,
)
from interface_mj import sort_candidates_mj
from load_surveys import load_surveys
from misc.enums import Candidacy, AggregationMode
from misc.enums import Candidacy, AggregationMode, PollingOrganizations

# todo: handle sans opinion if case
# todo: graphique classement en fonction des dates (avec mediane glissante)
# todo: moyennes / ecart-type grades sur un profil de merite.
# todo: video d'evolution du graphique (baromètre animé)


class Arguments(tap.Tap):
merit_profiles: bool = False
ranking_plot: bool = True
show: bool = True
html: bool = False
png: bool = False
Expand All @@ -31,16 +32,20 @@ def main(args: Arguments):
df = load_surveys(
args.csv,
no_opinion_mode=True,
candidates=Candidacy.ALL_CURRENT_CANDIDATES,
candidates=Candidacy.ALL_CURRENT_CANDIDATES_WITH_ENOUGH_DATA,
aggregation=AggregationMode.FOUR_MENTIONS,
polling_organization=PollingOrganizations.ALL,
)

# Compute the rank for each survey
df["rang"] = None

surveys = get_list_survey(df)

for survey in surveys:
print(survey)
# only the chosen survey
df_survey = df[df["id"] == survey]
df_survey = df[df["id"] == survey].copy()

nb_grades = df_survey["nombre_mentions"].unique()[0]
grades = get_grades(df_survey, nb_grades)
Expand All @@ -49,23 +54,36 @@ def main(args: Arguments):
sponsor = df_survey["commanditaire"].loc[first_idx]
date = df_survey["fin_enquete"].loc[first_idx]

df_sorted = sort_candidates_mj(df_survey, nb_grades)
df_with_rank = sort_candidates_mj(df_survey, nb_grades)

# refill the dataframe of surveys
df[df["id"] == survey] = df_with_rank

if args.merit_profiles:
fig = plot_merit_profiles(
df=df_with_rank,
grades=grades,
auto_text=False,
source=source,
date=date,
sponsor=sponsor,
)

fig = plot_merit_profiles(
df=df_sorted,
grades=grades,
auto_text=False,
source=source,
date=date,
sponsor=sponsor,
)
if args.show:
fig.show()
if args.html:
fig.write_html(f"{args.dest}/{survey}.html")
if args.png:
fig.write_image(f"{args.dest}/{survey}.png")

if args.ranking_plot:
fig = ranking_plot(df)
if args.show:
fig.show()
if args.html:
fig.write_html(f"{args.dest}/{survey}.html")
fig.write_html(f"{args.dest}/ranking_plot.html")
if args.png:
fig.write_image(f"{args.dest}/{survey}.png")
fig.write_image(f"{args.dest}/ranking_plot.png")


if __name__ == "__main__":
Expand Down
10 changes: 10 additions & 0 deletions misc/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class Candidacy(Enum):
Select candidates
"""

ALL_CURRENT_CANDIDATES_WITH_ENOUGH_DATA = "all_current_candidates_with_enough_data"
ALL_CURRENT_CANDIDATES = "all_current_candidates"
ALL_CANDIDATES_FROM_BEGINNING = "all_candidates"
ALL = "all"
Expand All @@ -18,3 +19,12 @@ class AggregationMode(Enum):

NO_AGGREGATION = "None"
FOUR_MENTIONS = "to_4_mentions"


class PollingOrganizations(Enum):
"""
Select how Institutes
"""

ALL = "None"
MIEUX_VOTER = "Mieux voter"
Loading

0 comments on commit 75ab221

Please sign in to comment.