-
Notifications
You must be signed in to change notification settings - Fork 0
/
explore_page.py
81 lines (62 loc) · 2.24 KB
/
explore_page.py
1
import streamlit as stimport pandas as pdimport matplotlib.pyplot as pltdef shorten_categories(categories, cutoff): categorical_map = {} # cutoff number we can play around with for i in range(len(categories)): # for country count_value equal or greater than the cutoff, keep if categories.values[i] >= cutoff: categorical_map[categories.index[i]] = categories.index[i] else: # otherwise group them as 'Other' categorical_map[categories.index[i]] = 'Other' return categorical_map def cleanexperience(x): if x == 'More than 50 years': return 50 else: x == 'Less than 1 year' return 0.5 return float(x)@st.cachedef clean_education(x): # keep Bachelor's degree if 'Bachelor’s degree' in x: return 'Bachelor’s degree' # keep master's degree if 'Master’s degree' in x: return 'Master’s degree' # group doctorates and assign to Post grad if 'Professional degree' in x or 'Other doctoral' in x: return 'Post grad' return 'Less than a Bachelors'def load_data(): df= pd.read_csv('survey_results_public.csv') df = df[['Employment', 'Country', 'EdLevel', 'YearsCodePro', 'ConvertedCompYearly']] df = df.rename({'ConvertedCompYearly': 'Salary'}, axis=1) df= df[df["Salary"].notnull()] df= df.dropna() df = df.drop('Employment', axis=1) country_map = shorten_categories(df.Country.value_counts(), 500) df['Country']= df['Country'].map(country_map) df = df[df['Salary'] <= 250000] df = df[df['Salary'] >= 100000] df = df[df['Country'] != 'Other'] df['YearsCodePro'] = df['YearsCodePro'].apply(cleanexperience) df['EdLevel'] = df['EdLevel'].apply(clean_education) return df df = load_data()def show_explore_page(): st.title("Explore Software Engineer Salaries") st.write( ''' ### Stack Overflow Survey 2020 ''' ) data= df["Country"].value_counts() fig1, ax1 = plt.subplots() ax1.pie(data, labels=data.index, autopct= '%1.1f%', shadow=True, startangle=90) ax1.axis("equal") st.write('''#### Number of Data from different countries''') st.pyplot(fig1)