-
Notifications
You must be signed in to change notification settings - Fork 1
/
csvToJson.py
148 lines (116 loc) · 4.55 KB
/
csvToJson.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import datetime
import pandas as pd
import json
import os
def evolVal(valI: float, valE: float) -> float:
"""
retourne la valeur de l'evolution entre les deux valeurs d'entrees
"""
if (valE != 0 and valI != valE):
return valI - valE
return 0
def evolPercent(ev: float, val: float) -> float:
"""
retourne le pourcentage d'evolution entre les deux valeurs d'entrees
"""
if val != 0:
return round((ev / val)*100, 2) # probablement faux, mais pas utilise plus tard
return 0
def get_last_data(dff: pd.DataFrame) -> list:
"""
sommes des valeurs de la date la plus recente
"""
most_recent_date = dff['period_date'].max()
dfDate = dff.loc[dff.period_date == most_recent_date]
return [most_recent_date, dfDate["valeur"].sum()]
def get_evolution(dff: pd.DataFrame, last_date: datetime.datetime, last_value: float) -> dict:
dfEvol = dff.copy()
dfEvol.drop(dfEvol.loc[dfEvol['period_date'] == last_date].index, inplace=True)
previous_last_data = get_last_data(dfEvol)
evol = evolVal(last_value, previous_last_data[1])
evol_percent = evolPercent(evol, previous_last_data[1])
return [evol, evol_percent]
def get_data_history(dff: pd.DataFrame) -> list:
"""
retourne la somme de toutes les valeurs pour une date et un indicateur pour une maille (region, departement, france)
"""
dates = dff.sort_values(by="period_date").period_date.unique()
values = []
for date in dates:
value = dff.loc[dff.period_date == date]
values.append({"date": date.astype(str), "value": value["valeur"].sum()})
return values
def get_level(df: pd.DataFrame, level: str, code_level: str) -> dict:
"""
structure data pour chaque maille (dep, reg, france)
"""
last_data = get_last_data(df)
evolution = get_evolution(df, last_data[0], last_data[1])
data_history = get_data_history(df)
data_level = {"level": level,
"code_level": code_level,
"last_value": last_data[1],
"last_date": str(last_data[0]),
"evol": evolution[0],
"evol_percentage": evolution[1],
"evol_color": "red",
"values": data_history}
return data_level
def clean_file(file_name: str):
"""
Suppression du fichier genere precedemment
"""
try:
os.remove(file_name)
except:
pass
def append_to_file(data: dict, file_name: str):
"""
creation du fichier txt contenant les valeurs du csv
"""
with open(file_name, "a", encoding="utf8") as output_file:
json.dump(data, output_file, ensure_ascii=False)
output_file.write('\n')
def convert_csv_to_json():
col = ["indicateur",
"period_date",
"valeur",
"mesure",
"short_indic",
"maille",
"indic_id",
"Code_Departement",
"Code_Region"]
df_propilot = pd.read_csv("exports/propilot.csv", usecols=col, sep=";")
# avoid null values
df_propilot = df_propilot[~df_propilot.indicateur.isna()]
df_propilot['period_date'] = pd.to_datetime(df_propilot['period_date'])
file_name = 'france-relance-data-tableau-de-bord.txt'
clean_file(file_name)
# Recuperation des indicateurs uniques
for indicateur in df_propilot.indicateur.unique():
df_indicateur = df_propilot.loc[df_propilot.indicateur == indicateur]
data = {}
data = {"code": df_indicateur["indic_id"].iloc[0],
"nom": df_indicateur["short_indic"].iloc[0],
"unite": df_indicateur["short_indic"].iloc[0]}
# France
france = get_level(df_indicateur, "nat", "fra")
data["france"] = [france]
# Regions
regions_data = []
for region in df_indicateur.Code_Region.unique():
df_indicateur_region = df_indicateur.loc[df_propilot.Code_Region == region]
region_data = get_level(df_indicateur_region, "reg", region)
regions_data.append(region_data)
data["regions"] = regions_data
# Departements
departements_data = []
for departement in df_indicateur.Code_Departement.unique():
df_indicateur_departement = df_indicateur.loc[df_propilot.Code_Departement == departement]
departement_data = get_level(df_indicateur_departement, "dep", departement)
departements_data.append(departement_data)
data["departements"] = departements_data
append_to_file(data, file_name)
if __name__ == "__main__":
convert_csv_to_json()