-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
4cf2280
commit 09302ff
Showing
3 changed files
with
80 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import pandas as pd | ||
import os | ||
|
||
def add_headers(csv_file): | ||
# Check if the file exists | ||
if not os.path.isfile(csv_file): | ||
print("File does not exist.") | ||
return | ||
|
||
# Read the CSV file | ||
df = pd.read_csv(csv_file) | ||
|
||
# Check if any of the rank headers already exist | ||
rank_headers = ['RANK', 'Happiness.Rank', 'Overall rank', 'Happiness Rank'] | ||
rank_header_exists = any(header in df.columns for header in rank_headers) | ||
|
||
# Add "Year" column with the name of the file | ||
year = os.path.basename(csv_file).split('.')[0] | ||
df['Year'] = year | ||
|
||
# Add "Rank" column if rank header doesn't exist | ||
if not rank_header_exists: | ||
df['Happiness Rank'] = df.index + 1 | ||
|
||
# Save the modified DataFrame back to CSV | ||
output_file = f"{year}cleaned.csv" | ||
df.to_csv(output_file, index=False) | ||
print(f"Headers added successfully. Saved as {output_file}") | ||
|
||
# Example usage | ||
csv_file = "2022.csv" # Provide the path to your CSV file here | ||
add_headers(csv_file) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import os | ||
import pandas as pd | ||
|
||
def process_csv_files(folder_path): | ||
# Get list of CSV files in the folder | ||
csv_files = [file for file in os.listdir(folder_path) if file.endswith('.csv')] | ||
|
||
for file in csv_files: | ||
file_path = os.path.join(folder_path, file) | ||
|
||
# Read CSV file into a DataFrame | ||
df = pd.read_csv(file_path) | ||
|
||
# Task 1: Change row named United States to "United States of America" | ||
df.loc[df['Country'] == 'United States', 'Country'] = 'United States of America' | ||
|
||
# Task 2: Remove all periods from the column "Happiness Score" | ||
df['Happiness Score'] = df['Happiness Score'].str.replace('.', '') | ||
|
||
# Task 3: Add zeroes to make sure the Happiness Score is 4 digits | ||
df['Happiness Score'] = df['Happiness Score'].apply(lambda x: x.zfill(4)) | ||
|
||
# Write the modified DataFrame back to the CSV file | ||
df.to_csv(file_path, index=False) | ||
|
||
print(f"Processed file: {file}") | ||
|
||
# Example usage: | ||
folder_path = '/test' | ||
process_csv_files(folder_path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import pandas as pd | ||
|
||
def update_csv_with_region(csv_file_with_region, csv_file_to_update): | ||
# Read both CSV files | ||
df_region = pd.read_csv(csv_file_with_region) | ||
df_to_update = pd.read_csv(csv_file_to_update) | ||
|
||
# Merge the two dataframes on 'Country name' | ||
merged_df = pd.merge(df_to_update, df_region[['Country', 'Region']], on='Country', how='left') | ||
|
||
# Rename the column to 'Regional indicator' | ||
merged_df.rename(columns={'Region': 'Region'}, inplace=True) | ||
|
||
# Save the updated dataframe to a new CSV file | ||
merged_df.to_csv('updated_' + csv_file_to_update, index=False) | ||
|
||
# Replace 'file_with_region.csv' and 'file_to_update.csv' with your file paths | ||
update_csv_with_region('2022cleaned.csv', '2017cleaned.csv') |