In [7]:
import pandas as pd
In [104]:
df = pd.read_csv('County CAO Salary Survey Results.csv', skiprows=1)
In [105]:
import matplotlib.pyplot as plt
%matplotlib inline
In [20]:
import numpy as np
In [ ]:
df[df[' W-2, Box 5 '] == 'No CAO'] = np.nan
df = df.dropna()
df[' W-2, Box 5 '] = df[' W-2, Box 5 '].str.replace('$', '')
df[' W-2, Box 5 '] = df[' W-2, Box 5 '].str.replace(',', '').astype(float)
In [26]:
df['2009 Population'] = df['2009 Population'].str.replace(',', '').astype(float)
In [31]:
from gender import gender
In [34]:
gender_match = [gender[name.split()[0].upper()] if name.split()[0].upper() in gender.keys() else '' for name in df['Name']]
In [36]:
df['gender'] = gender_match
In [39]:
df['annual'] = df[' W-2, Box 5 ']/df[' Months Worked '].astype(float)*12
In [101]:
total_male_sal = df.loc[df['gender'] == "male", 'annual'].sum()
In [60]:
total_fem_sal = df.loc[df['gender'] == "female", 'annual'].sum()
In [54]:
#df.groupby('gender', 'female').count()
gender_cts = df['gender'].value_counts()
In [102]:
male_cts = gender_cts['male']
In [56]:
female_cts = gender_cts['female']
In [68]:
female_avg = int(total_fem_sal/female_cts)
In [103]:
male_avg = int(total_male_sal/male_cts)
In [72]:
import matplotlib.pyplot as plt
In [106]:
 
objects = ('Women', 'Men')
y_pos = np.arange(len(objects))
salarys = [female_avg, male_avg]
 
plt.barh(y_pos, salarys, align='center', alpha=0.5)
plt.yticks(y_pos, objects)
plt.xlabel('Salary')
plt.title('County Executives Salaries by Gender')
 
plt.show
Out[106]:
<function matplotlib.pyplot.show>
In [99]:
# Data to plot
labels = 'Women', 'Men'
sizes = [female_cts, male_cts]
colors = ['yellowgreen','lightskyblue']
 
# Plot
plt.pie(sizes, labels=labels, colors=colors, autopct='%1.0f%%')
plt.title('Percentage of Women and Men Holding Executive Positions')
 
plt.axis('equal')
plt.show()