import pandas as pd
df = pd.read_csv('County CAO Salary Survey Results.csv', skiprows=1)
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
df[df[' W-2, Box 5 '] == 'No CAO'] = np.nan
df = df.dropna()
df[' W-2, Box 5 '] = df[' W-2, Box 5 '].str.replace('$', '')
df[' W-2, Box 5 '] = df[' W-2, Box 5 '].str.replace(',', '').astype(float)
df['2009 Population'] = df['2009 Population'].str.replace(',', '').astype(float)
from gender import gender
gender_match = [gender[name.split()[0].upper()] if name.split()[0].upper() in gender.keys() else '' for name in df['Name']]
df['gender'] = gender_match
df['annual'] = df[' W-2, Box 5 ']/df[' Months Worked '].astype(float)*12
total_male_sal = df.loc[df['gender'] == "male", 'annual'].sum()
total_fem_sal = df.loc[df['gender'] == "female", 'annual'].sum()
#df.groupby('gender', 'female').count()
gender_cts = df['gender'].value_counts()
male_cts = gender_cts['male']
female_cts = gender_cts['female']
female_avg = int(total_fem_sal/female_cts)
male_avg = int(total_male_sal/male_cts)
import matplotlib.pyplot as plt
objects = ('Women', 'Men')
y_pos = np.arange(len(objects))
salarys = [female_avg, male_avg]
plt.barh(y_pos, salarys, align='center', alpha=0.5)
plt.yticks(y_pos, objects)
plt.xlabel('Salary')
plt.title('County Executives Salaries by Gender')
plt.show
# Data to plot
labels = 'Women', 'Men'
sizes = [female_cts, male_cts]
colors = ['yellowgreen','lightskyblue']
# Plot
plt.pie(sizes, labels=labels, colors=colors, autopct='%1.0f%%')
plt.title('Percentage of Women and Men Holding Executive Positions')
plt.axis('equal')
plt.show()