{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### This notebook demonstrates the ability of the DisparateImpactRemover algorithm.\n", "The algorithm corrects for imbalanced selection rates between unprivileged and privileged groups at various levels of repair. It follows the guidelines set forth by [1] for training the algorithm and classifier and uses the AdultDataset as an example." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from __future__ import absolute_import\n", "from __future__ import division\n", "from __future__ import print_function\n", "from __future__ import unicode_literals" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from matplotlib import pyplot as plt\n", "\n", "import sys\n", "sys.path.append(\"../\")\n", "import warnings\n", "\n", "import numpy as np\n", "import pandas as pd\n", "from tqdm import tqdm\n", "\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.svm import SVC as SVM\n", "from sklearn.preprocessing import MinMaxScaler\n", "\n", "from aif360.algorithms.preprocessing import DisparateImpactRemover\n", "from aif360.datasets import AdultDataset\n", "from aif360.metrics import BinaryLabelDatasetMetric\n", "\n", "from IPython.display import Markdown, display" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Dataset\n", "https://archive.ics.uci.edu/ml/datasets/adult" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This data was extracted from the census bureau database.\n", "\n", "48842 instances, mix of continuous and discrete (train=32561, test=16281)\n", "\n", "45222 if instances with unknown values are removed (train=30162, test=15060)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "f = open(\"adult.data\",\"r\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "fl =f.readlines()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "final_list = []\n", "for line in fl:\n", " line = line.replace(\" \", \"\")\n", " line = line.rstrip()\n", " l = line.split(\",\")\n", "# print(l)\n", " if l[len(l) - 1] == '>50K':\n", " l[len(l) - 1] = 1\n", " else:\n", " l[len(l) - 1] = 0\n", "# print(l)\n", "# print(l)\n", " final_list.append(l)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "f.close()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "adult_df = pd.DataFrame(final_list, columns=['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status',\n", " 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week',\n", " 'native-country', 'income-per-year'])" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(32562, 15)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adult_df.shape" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countryincome-per-year
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States0.0
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States0.0
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States0.0
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States0.0
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba0.0
\n", "
" ], "text/plain": [ " age workclass fnlwgt education education-num marital-status \\\n", "0 39 State-gov 77516 Bachelors 13 Never-married \n", "1 50 Self-emp-not-inc 83311 Bachelors 13 Married-civ-spouse \n", "2 38 Private 215646 HS-grad 9 Divorced \n", "3 53 Private 234721 11th 7 Married-civ-spouse \n", "4 28 Private 338409 Bachelors 13 Married-civ-spouse \n", "\n", " occupation relationship race sex capital-gain capital-loss \\\n", "0 Adm-clerical Not-in-family White Male 2174 0 \n", "1 Exec-managerial Husband White Male 0 0 \n", "2 Handlers-cleaners Not-in-family White Male 0 0 \n", "3 Handlers-cleaners Husband Black Male 0 0 \n", "4 Prof-specialty Wife Black Female 0 0 \n", "\n", " hours-per-week native-country income-per-year \n", "0 40 United-States 0.0 \n", "1 13 United-States 0.0 \n", "2 40 United-States 0.0 \n", "3 40 United-States 0.0 \n", "4 40 Cuba 0.0 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adult_df.head()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "age object\n", "workclass object\n", "fnlwgt object\n", "education object\n", "education-num object\n", "marital-status object\n", "occupation object\n", "relationship object\n", "race object\n", "sex object\n", "capital-gain object\n", "capital-loss object\n", "hours-per-week object\n", "native-country object\n", "income-per-year float64\n", "dtype: object" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adult_df.dtypes" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "adult_df = adult_df.drop(['fnlwgt'], axis=1)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "adult_df = adult_df.dropna()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(32561, 14)" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adult_df.shape" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['39' '50' '38' '53' '28' '37' '49' '52' '31' '42' '30' '23' '32' '40'\n", " '34' '25' '43' '54' '35' '59' '56' '19' '20' '45' '22' '48' '21' '24'\n", " '57' '44' '41' '29' '18' '47' '46' '36' '79' '27' '67' '33' '76' '17'\n", " '55' '61' '70' '64' '71' '68' '66' '51' '58' '26' '60' '90' '75' '65'\n", " '77' '62' '63' '80' '72' '74' '69' '73' '81' '78' '88' '82' '83' '84'\n", " '85' '86' '87']\n", "['State-gov' 'Self-emp-not-inc' 'Private' 'Federal-gov' 'Local-gov' '?'\n", " 'Self-emp-inc' 'Without-pay' 'Never-worked']\n", "['Bachelors' 'HS-grad' '11th' 'Masters' '9th' 'Some-college' 'Assoc-acdm'\n", " 'Assoc-voc' '7th-8th' 'Doctorate' 'Prof-school' '5th-6th' '10th'\n", " '1st-4th' 'Preschool' '12th']\n", "['13' '9' '7' '14' '5' '10' '12' '11' '4' '16' '15' '3' '6' '2' '1' '8']\n", "['Never-married' 'Married-civ-spouse' 'Divorced' 'Married-spouse-absent'\n", " 'Separated' 'Married-AF-spouse' 'Widowed']\n", "['Adm-clerical' 'Exec-managerial' 'Handlers-cleaners' 'Prof-specialty'\n", " 'Other-service' 'Sales' 'Craft-repair' 'Transport-moving'\n", " 'Farming-fishing' 'Machine-op-inspct' 'Tech-support' '?'\n", " 'Protective-serv' 'Armed-Forces' 'Priv-house-serv']\n", "['Not-in-family' 'Husband' 'Wife' 'Own-child' 'Unmarried' 'Other-relative']\n", "['White' 'Black' 'Asian-Pac-Islander' 'Amer-Indian-Eskimo' 'Other']\n", "['Male' 'Female']\n", "['2174' '0' '14084' '5178' '5013' '2407' '14344' '15024' '7688' '34095'\n", " '4064' '4386' '7298' '1409' '3674' '1055' '3464' '2050' '2176' '594'\n", " '20051' '6849' '4101' '1111' '8614' '3411' '2597' '25236' '4650' '9386'\n", " '2463' '3103' '10605' '2964' '3325' '2580' '3471' '4865' '99999' '6514'\n", " '1471' '2329' '2105' '2885' '25124' '10520' '2202' '2961' '27828' '6767'\n", " '2228' '1506' '13550' '2635' '5556' '4787' '3781' '3137' '3818' '3942'\n", " '914' '401' '2829' '2977' '4934' '2062' '2354' '5455' '15020' '1424'\n", " '3273' '22040' '4416' '3908' '10566' '991' '4931' '1086' '7430' '6497'\n", " '114' '7896' '2346' '3418' '3432' '2907' '1151' '2414' '2290' '15831'\n", " '41310' '4508' '2538' '3456' '6418' '1848' '3887' '5721' '9562' '1455'\n", " '2036' '1831' '11678' '2936' '2993' '7443' '6360' '1797' '1173' '4687'\n", " '6723' '2009' '6097' '2653' '1639' '18481' '7978' '2387' '5060']\n", "['0' '2042' '1408' '1902' '1573' '1887' '1719' '1762' '1564' '2179' '1816'\n", " '1980' '1977' '1876' '1340' '2206' '1741' '1485' '2339' '2415' '1380'\n", " '1721' '2051' '2377' '1669' '2352' '1672' '653' '2392' '1504' '2001'\n", " '1590' '1651' '1628' '1848' '1740' '2002' '1579' '2258' '1602' '419'\n", " '2547' '2174' '2205' '1726' '2444' '1138' '2238' '625' '213' '1539' '880'\n", " '1668' '1092' '1594' '3004' '2231' '1844' '810' '2824' '2559' '2057'\n", " '1974' '974' '2149' '1825' '1735' '1258' '2129' '2603' '2282' '323'\n", " '4356' '2246' '1617' '1648' '2489' '3770' '1755' '3683' '2267' '2080'\n", " '2457' '155' '3900' '2201' '1944' '2467' '2163' '2754' '2472' '1411']\n", "['40' '13' '16' '45' '50' '80' '30' '35' '60' '20' '52' '44' '15' '25'\n", " '38' '43' '55' '48' '58' '32' '70' '2' '22' '56' '41' '28' '36' '24' '46'\n", " '42' '12' '65' '1' '10' '34' '75' '98' '33' '54' '8' '6' '64' '19' '18'\n", " '72' '5' '9' '47' '37' '21' '26' '14' '4' '59' '7' '99' '53' '39' '62'\n", " '57' '78' '90' '66' '11' '49' '84' '3' '17' '68' '27' '85' '31' '51' '77'\n", " '63' '23' '87' '88' '73' '89' '97' '94' '29' '96' '67' '82' '86' '91'\n", " '81' '76' '92' '61' '74' '95']\n", "['United-States' 'Cuba' 'Jamaica' 'India' '?' 'Mexico' 'South'\n", " 'Puerto-Rico' 'Honduras' 'England' 'Canada' 'Germany' 'Iran'\n", " 'Philippines' 'Italy' 'Poland' 'Columbia' 'Cambodia' 'Thailand' 'Ecuador'\n", " 'Laos' 'Taiwan' 'Haiti' 'Portugal' 'Dominican-Republic' 'El-Salvador'\n", " 'France' 'Guatemala' 'China' 'Japan' 'Yugoslavia' 'Peru'\n", " 'Outlying-US(Guam-USVI-etc)' 'Scotland' 'Trinadad&Tobago' 'Greece'\n", " 'Nicaragua' 'Vietnam' 'Hong' 'Ireland' 'Hungary' 'Holand-Netherlands']\n", "[0. 1.]\n" ] } ], "source": [ "for col in adult_df.columns:\n", " print(adult_df[col].unique())" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "adult_df = adult_df.replace({'?':np.nan}).dropna()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(30162, 14)" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adult_df.shape" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "int_cols = ['age', 'education-num','capital-gain', 'capital-loss', 'hours-per-week', 'income-per-year']\n", "for c in int_cols:\n", " adult_df[c] = adult_df[c].astype(int)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "age int64\n", "workclass object\n", "education object\n", "education-num int64\n", "marital-status object\n", "occupation object\n", "relationship object\n", "race object\n", "sex object\n", "capital-gain int64\n", "capital-loss int64\n", "hours-per-week int64\n", "native-country object\n", "income-per-year int64\n", "dtype: object" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adult_df.dtypes" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 30162.000000\n", "mean 38.437902\n", "std 13.134665\n", "min 17.000000\n", "25% 28.000000\n", "50% 37.000000\n", "75% 47.000000\n", "max 90.000000\n", "Name: age, dtype: float64" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adult_df['age'].describe()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 30162.000000\n", "mean 1092.007858\n", "std 7406.346497\n", "min 0.000000\n", "25% 0.000000\n", "50% 0.000000\n", "75% 0.000000\n", "max 99999.000000\n", "Name: capital-gain, dtype: float64" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adult_df['capital-gain'].describe()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 30162.000000\n", "mean 88.372489\n", "std 404.298370\n", "min 0.000000\n", "25% 0.000000\n", "50% 0.000000\n", "75% 0.000000\n", "max 4356.000000\n", "Name: capital-loss, dtype: float64" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adult_df['capital-loss'].describe()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAEWCAYAAACNJFuYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFcZJREFUeJzt3W+QZXV95/H3J0N0yR/CKA01mcHMaLXWImtGnUU2lpYJCwyYdXBr3YWtkllD7agFMVbyQHQfgLrUmkRjFZYhO4ZZhy0FiegyMRgcp1zd1IpOo4Q/IjsNorQzxbSOUXbJYoZ898H9tXuZc3u66dv0bXPfr6pb95zv+Z17v5fqmg/nd869J1WFJEn9fmbUDUiSVh/DQZLUYThIkjoMB0lSh+EgSeowHCRJHYaDJKnDcJAkdRgOkqSOE0bdwFKdcsoptXHjxlG3IUk/Ve68887vVdXEQuN+asNh48aNTE1NjboNSfqpkuTbixnntJIkqcNwkCR1GA6SpA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHT+1X4L7abHxyr8YdQv/YDz8vteOugVpbCx45JDk9CRfSHJ/kvuS/E6rPyfJ3iQH2vPaVk+Sa5NMJ7k7ycv6Xmt7G38gyfa++suT3NP2uTZJnokPK0lanMVMKx0Ffq+q/jFwNnB5kjOAK4F9VTUJ7GvrABcAk+2xA7gOemECXAW8AjgLuGouUNqYHX37bR3+o0mSlmrBcKiqQ1X1tbb8GHA/sB7YBuxuw3YDF7XlbcAN1XMHcHKSdcD5wN6qOlJVPwD2AlvbtpOq6stVVcANfa8lSRqBp3VCOslG4KXAV4DTquoQ9AIEOLUNWw880rfbTKsdrz4zoD7o/XckmUoyNTs7+3RalyQ9DYsOhyS/ANwCvL2qfnS8oQNqtYR6t1i1s6q2VNWWiYkFf3FWkrREiwqHJD9LLxg+VlWfauVH25QQ7flwq88Ap/ftvgE4uEB9w4C6JGlEFnO1UoDrgfur6o/6Nu0B5q442g7c2le/tF21dDbwwzbtdDtwXpK17UT0ecDtbdtjSc5u73Vp32tJkkZgMd9zeCXwRuCeJHe12ruA9wE3J7kM+A7whrbtNuBCYBp4HHgTQFUdSfJeYH8b956qOtKW3wp8FDgR+Gx7SJJGZMFwqKq/YvB5AYBzBowv4PJ5XmsXsGtAfQo4c6FeJEkrw5/PkCR1GA6SpA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUofhIEnqMBwkSR2GgySpYzG3Cd2V5HCSe/tqn0hyV3s8PHeHuCQbk/xt37Y/6dvn5UnuSTKd5Np2S1CSPCfJ3iQH2vPaZ+KDSpIWbzFHDh8FtvYXqurfVNXmqtoM3AJ8qm/zg3PbquotffXrgB3AZHvMveaVwL6qmgT2tXVJ0ggtGA5V9SXgyKBt7f/+/zVw4/FeI8k64KSq+nK7jegNwEVt8zZgd1ve3VeXJI3IsOccXgU8WlUH+mqbknw9yReTvKrV1gMzfWNmWg3gtKo6BNCeT53vzZLsSDKVZGp2dnbI1iVJ8xk2HC7hqUcNh4DnVdVLgd8FPp7kJCAD9q2n+2ZVtbOqtlTVlomJiSU1LEla2AlL3THJCcC/BF4+V6uqJ4An2vKdSR4EXkjvSGFD3+4bgINt+dEk66rqUJt+OrzUniRJy2OYI4d/Dnyzqn4yXZRkIsmatvx8eieeH2rTRY8lObudp7gUuLXttgfY3pa399UlSSOymEtZbwS+DLwoyUySy9qmi+meiH41cHeSvwY+CbylquZOZr8V+FNgGngQ+Gyrvw84N8kB4Ny2LkkaoQWnlarqknnq/25A7RZ6l7YOGj8FnDmg/n3gnIX6kCStHL8hLUnqMBwkSR2GgySpw3CQJHUYDpKkDsNBktRhOEiSOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1GA6SpA7DQZLUYThIkjoMB0lSh+EgSepYzG1CdyU5nOTevtrVSb6b5K72uLBv2zuTTCd5IMn5ffWtrTad5Mq++qYkX0lyIMknkjxrOT+gJOnpW8yRw0eBrQPqH6yqze1xG0CSM+jdW/rFbZ8/TrImyRrgw8AFwBnAJW0swO+315oEfgBcduwbSZJW1oLhUFVfAo4s8vW2ATdV1RNV9S1gGjirPaar6qGq+jFwE7AtSYDfAD7Z9t8NXPQ0P4MkaZkNc87hiiR3t2mnta22Hnikb8xMq81Xfy7wN1V19Jj6QEl2JJlKMjU7OztE65Kk41lqOFwHvADYDBwCPtDqGTC2llAfqKp2VtWWqtoyMTHx9DqWJC3aCUvZqaoenVtO8hHgM211Bji9b+gG4GBbHlT/HnBykhPa0UP/eEnSiCzpyCHJur7V1wNzVzLtAS5O8uwkm4BJ4KvAfmCyXZn0LHonrfdUVQFfAP5V2387cOtSepIkLZ8FjxyS3Ai8BjglyQxwFfCaJJvpTQE9DLwZoKruS3Iz8A3gKHB5VT3ZXucK4HZgDbCrqu5rb/EO4KYk/xH4OnD9sn06SdKSLBgOVXXJgPK8/4BX1TXANQPqtwG3Dag/RO9qJknSKuE3pCVJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUofhIEnqMBwkSR2GgySpw3CQJHUYDpKkDsNBktRhOEiSOgwHSVKH4SBJ6lgwHJLsSnI4yb19tT9M8s0kdyf5dJKTW31jkr9Ncld7/EnfPi9Pck+S6STXJkmrPyfJ3iQH2vPaZ+KDSpIWbzFHDh8Fth5T2wucWVUvAf4X8M6+bQ9W1eb2eEtf/TpgB737Sk/2veaVwL6qmgT2tXVJ0ggtGA5V9SXgyDG1z1XV0bZ6B7DheK+RZB1wUlV9uaoKuAG4qG3eBuxuy7v76pKkEVmOcw6/BXy2b31Tkq8n+WKSV7XaemCmb8xMqwGcVlWHANrzqfO9UZIdSaaSTM3Ozi5D65KkQYYKhyT/ATgKfKyVDgHPq6qXAr8LfDzJSUAG7F5P9/2qamdVbamqLRMTE0ttW5K0gBOWumOS7cBvAue0qSKq6gngibZ8Z5IHgRfSO1Lon3raABxsy48mWVdVh9r00+Gl9iRJWh5LOnJIshV4B/C6qnq8rz6RZE1bfj69E88Ptemix5Kc3a5SuhS4te22B9jelrf31SVJI7LgkUOSG4HXAKckmQGuond10rOBve2K1DvalUmvBt6T5CjwJPCWqpo7mf1Welc+nUjvHMXceYr3ATcnuQz4DvCGZflkkqQlWzAcquqSAeXr5xl7C3DLPNumgDMH1L8PnLNQH5KkleM3pCVJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUofhIEnqMBwkSR2GgySpw3CQJHUYDpKkDsNBktRhOEiSOgwHSVKH4SBJ6jAcJEkdiwqHJLuSHE5yb1/tOUn2JjnQnte2epJcm2Q6yd1JXta3z/Y2/kCS7X31lye5p+1zbbvPtCRpRBZ75PBRYOsxtSuBfVU1Cexr6wAXAJPtsQO4DnphQu/+068AzgKumguUNmZH337HvpckaQUtKhyq6kvAkWPK24DdbXk3cFFf/YbquQM4Ock64Hxgb1UdqaofAHuBrW3bSVX15aoq4Ia+15IkjcAw5xxOq6pDAO351FZfDzzSN26m1Y5XnxlQ70iyI8lUkqnZ2dkhWpckHc8zcUJ60PmCWkK9W6zaWVVbqmrLxMTEEC1Kko7nhCH2fTTJuqo61KaGDrf6DHB637gNwMFWf80x9f/e6hsGjJf0TLr6l0bdwT8sV/9w1B0sq2GOHPYAc1ccbQdu7atf2q5aOhv4YZt2uh04L8nadiL6POD2tu2xJGe3q5Qu7XstSdIILOrIIcmN9P6v/5QkM/SuOnofcHOSy4DvAG9ow28DLgSmgceBNwFU1ZEk7wX2t3Hvqaq5k9xvpXdF1InAZ9tDkjQiiwqHqrpknk3nDBhbwOXzvM4uYNeA+hRw5mJ6kSQ98/yGtCSpw3CQJHUYDpKkDsNBktRhOEiSOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1GA6SpA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHUsOhyQvSnJX3+NHSd6e5Ook3+2rX9i3zzuTTCd5IMn5ffWtrTad5MphP5QkaTiLuhPcIFX1ALAZIMka4LvAp+ndFvSDVfX+/vFJzgAuBl4M/DLw+SQvbJs/DJwLzAD7k+ypqm8stTdJ0nCWHA7HOAd4sKq+nWS+MduAm6rqCeBbSaaBs9q26ap6CCDJTW2s4SBJI7Jc5xwuBm7sW78iyd1JdiVZ22rrgUf6xsy02nz1jiQ7kkwlmZqdnV2m1iVJxxo6HJI8C3gd8GetdB3wAnpTToeAD8wNHbB7HafeLVbtrKotVbVlYmJiqL4lSfNbjmmlC4CvVdWjAHPPAEk+Anymrc4Ap/fttwE42Jbnq0uSRmA5ppUuoW9KKcm6vm2vB+5ty3uAi5M8O8kmYBL4KrAfmEyyqR2FXNzGSpJGZKgjhyQ/R+8qozf3lf8gyWZ6U0MPz22rqvuS3EzvRPNR4PKqerK9zhXA7cAaYFdV3TdMX5Kk4QwVDlX1OPDcY2pvPM74a4BrBtRvA24bphdJ0vLxG9KSpA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUofhIEnqMBwkSR2GgySpw3CQJHUMHQ5JHk5yT5K7kky12nOS7E1yoD2vbfUkuTbJdJK7k7ys73W2t/EHkmwfti9J0tIt15HDr1fV5qra0tavBPZV1SSwr60DXEDv3tGTwA7gOuiFCXAV8ArgLOCquUCRJK28Z2paaRuwuy3vBi7qq99QPXcAJydZB5wP7K2qI1X1A2AvsPUZ6k2StIDlCIcCPpfkziQ7Wu20qjoE0J5PbfX1wCN9+8602nz1p0iyI8lUkqnZ2dllaF2SNMgJy/Aar6yqg0lOBfYm+eZxxmZArY5Tf2qhaiewE2DLli2d7ZKk5TH0kUNVHWzPh4FP0ztn8GibLqI9H27DZ4DT+3bfABw8Tl2SNAJDhUOSn0/yi3PLwHnAvcAeYO6Ko+3ArW15D3Bpu2rpbOCHbdrpduC8JGvbiejzWk2SNALDTiudBnw6ydxrfbyq/jLJfuDmJJcB3wHe0MbfBlwITAOPA28CqKojSd4L7G/j3lNVR4bsTZK0REOFQ1U9BPzqgPr3gXMG1Au4fJ7X2gXsGqYfSdLy8BvSkqQOw0GS1GE4SJI6DAdJUofhIEnqMBwkSR2GgySpw3CQJHUYDpKkDsNBktRhOEiSOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1GA6SpI4lh0OS05N8Icn9Se5L8jutfnWS7ya5qz0u7NvnnUmmkzyQ5Py++tZWm05y5XAfSZI0rGFuE3oU+L2q+lqSXwTuTLK3bftgVb2/f3CSM4CLgRcDvwx8PskL2+YPA+cCM8D+JHuq6htD9CZJGsKSw6GqDgGH2vJjSe4H1h9nl23ATVX1BPCtJNPAWW3bdLsfNUluamMNB0kakWU555BkI/BS4CutdEWSu5PsSrK21dYDj/TtNtNq89UHvc+OJFNJpmZnZ5ejdUnSAEOHQ5JfAG4B3l5VPwKuA14AbKZ3ZPGBuaEDdq/j1LvFqp1VtaWqtkxMTAzbuiRpHsOccyDJz9ILho9V1acAqurRvu0fAT7TVmeA0/t23wAcbMvz1SVJIzDM1UoBrgfur6o/6quv6xv2euDetrwHuDjJs5NsAiaBrwL7gckkm5I8i95J6z1L7UuSNLxhjhxeCbwRuCfJXa32LuCSJJvpTQ09DLwZoKruS3IzvRPNR4HLq+pJgCRXALcDa4BdVXXfEH1JkoY0zNVKf8Xg8wW3HWefa4BrBtRvO95+kqSV5TekJUkdhoMkqcNwkCR1GA6SpA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUofhIEnqWDXhkGRrkgeSTCe5ctT9SNI4WxXhkGQN8GHgAuAMercaPWO0XUnS+FoV4QCcBUxX1UNV9WPgJmDbiHuSpLG15HtIL7P1wCN96zPAK44dlGQHsKOt/u8kD6xAb+PiFOB7o27iePL7o+5AI7Lq/zYBeHdG3cFi/cpiBq2WcBj0X7U6haqdwM5nvp3xk2SqqraMug/pWP5tjsZqmVaaAU7vW98AHBxRL5I09lZLOOwHJpNsSvIs4GJgz4h7kqSxtSqmlarqaJIrgNuBNcCuqrpvxG2NG6frtFr5tzkCqepM7UuSxtxqmVaSJK0ihoMkqcNwkCR1GA6SpA7DYcwlOTHJi0bdhzRIkp8fdQ/jynAYY0n+BXAX8JdtfXMSv1+ikUvya0m+Adzf1n81yR+PuK2xYjiMt6vp/ejh3wBU1V3AxhH2I835IHA+8H2Aqvpr4NUj7WjMGA7j7WhV/XDUTUiDVNUjx5SeHEkjY2pVfENaI3Nvkn8LrEkyCbwN+J8j7kkCeCTJrwHVflLnbbQpJq0MjxzG228DLwaeAG4EfgS8faQdST1vAS6n93P+M8Dmtq4V4s9nSJI6nFYaQ0n+nAH3y5hTVa9bwXakn0jyIY7/t/m2FWxnrBkO4+n9o25AmsfUqBtQj9NKkqQOjxzGWLtC6T8BZwD/aK5eVc8fWVMSkGQCeAfdv83fGFlTY8arlcbbfwGuA44Cvw7cAPzXkXYk9XyM3qWrm4B3Aw/Tu2OkVojhMN5OrKp99KYXv11VVwP+n5lWg+dW1fXA31XVF6vqt4CzR93UOHFaabz93yQ/Axxot2n9LnDqiHuSAP6uPR9K8lrgILBhhP2MHU9Ij7Ek/5TeofvJwHuBXwL+oKruGGljGntJfhP4H8DpwIeAk4B3V5U/DLlCDAdJUofTSmNooZ/l9ktwGrUkm+j9vMtG+v6d8m9z5RgO4+mfAY/Q+z2lrwAZbTtSx38Drgf+HPj7EfcylpxWGkNJ1gDnApcALwH+Arixqu4baWNSk+QrVfWKUfcxzgyHMZfk2fRC4g+B91TVh0bckkT7KflJ4HP0fjUYgKr62siaGjNOK42pFgqvpRcMG4FrgU+Nsiepzz8B3kjvezdz00qF38NZMR45jKEku4Ezgc8CN1XVvSNuSXqKJN8EXlJVPx51L+PKcBhDSf4e+D9ttf8PIEBV1Ukr35X0/yX5BPDbVXV41L2MK6eVxlBV+bMpWu1OA76ZZD9PPefgpawrxHCQtBpdNeoGxp3TSpJWpSS/AkxW1eeT/BywpqoeG3Vf48LpBUmrTpJ/D3wS+M+ttJ7eF+O0QgwHSavR5cArgR8BVNUB/MXgFWU4SFqNnui/jDXJCTz1yjo9wwwHSavRF5O8CzgxybnAn9H7nSWtEE9IS1p12k2oLgPOo/f9m9uBPy3/wVoxhoOkVSPJ86rqO6PuQ04rSVpdfnJFUpJbRtnIuDMcJK0m/fcWef7IupDhIGlVqXmWtcI85yBp1UjyJL0fhQxwIvD43Cb8UcgVZThIkjqcVpIkdRgOkqQOw0GS1GE4SJI6/h+BVjy/i9wFJAAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "adult_df['sex'].value_counts().plot(kind='bar')" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAD4CAYAAAAHHSreAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAADLRJREFUeJzt3V+M3XVax/H3Z6mYjetKsQPBttiNNlF2E5FtoMneoCSl4EUxkQQupCEkYzaQuIkXW70pC27CXqgJyUpSs5WSKEjUDc3KWptmzca47HZQwh8RO0GEsYQWi4ghcWX38WJ+E0/6nHamM6Vnyrxfyck55zm/c+Z7SOGd358pqSokSRr1sUkvQJK0+hgHSVJjHCRJjXGQJDXGQZLUGAdJUmMcJEmNcZAkNcZBktSsm/QClmvDhg21ZcuWSS9Dki4qzz777NtVNbXYdhdtHLZs2cLMzMyklyFJF5Uk/7aU7TysJElqjIMkqTEOkqTGOEiSGuMgSWqMgySpMQ6SpMY4SJKai/aX4C4WW/b81aSX8JHx2kO/MuklSGuGew6SpMY4SJIa4yBJaoyDJKkxDpKkxjhIkhrjIElqjIMkqTEOkqTGOEiSGuMgSWqMgySpMQ6SpMY4SJIa4yBJaoyDJKkxDpKkxjhIkhrjIElqjIMkqVk0Dkk2J/lWkpeTvJTkN4f55UkOJzk23K8f5knycJLZJM8nuW7ks3YP2x9Lsntk/tkkLwzveThJPowvK0lamqXsOXwA/FZV/TywHbg3yTXAHuBIVW0FjgzPAW4Btg63aeARmI8JsBe4Abge2LsQlGGb6ZH37Vz5V5MkLdeicaiqN6vqH4bH7wEvAxuBXcCBYbMDwG3D413AYzXvGeCyJFcBNwOHq+pUVb0DHAZ2Dq99sqq+U1UFPDbyWZKkCTincw5JtgC/CHwXuLKq3oT5gABXDJttBN4YedvcMDvbfG7MXJI0IUuOQ5JPAH8BfKGq/utsm46Z1TLm49YwnWQmyczJkycXW7IkaZmWFIckP8J8GP6kqv5yGL81HBJiuD8xzOeAzSNv3wQcX2S+acy8qap9VbWtqrZNTU0tZemSpGVYytVKAb4GvFxVvz/y0kFg4Yqj3cBTI/O7hquWtgPvDoedDgE7kqwfTkTvAA4Nr72XZPvws+4a+SxJ0gSsW8I2nwN+HXghyXPD7HeAh4Ank9wDvA7cPrz2NHArMAu8D9wNUFWnkjwIHB22e6CqTg2PPw88Cnwc+OZwkyRNyKJxqKq/Y/x5AYCbxmxfwL1n+Kz9wP4x8xngM4utRZJ0Yfgb0pKkxjhIkhrjIElqjIMkqTEOkqTGOEiSGuMgSWqMgySpMQ6SpMY4SJIa4yBJaoyDJKkxDpKkxjhIkhrjIElqjIMkqTEOkqTGOEiSGuMgSWqMgySpMQ6SpMY4SJIa4yBJaoyDJKkxDpKkxjhIkhrjIElqjIMkqTEOkqTGOEiSGuMgSWqMgySpMQ6SpMY4SJIa4yBJaoyDJKkxDpKkZtE4JNmf5ESSF0dm9yf59yTPDbdbR1777SSzSV5JcvPIfOcwm02yZ2T+qSTfTXIsyZ8lufR8fkFJ0rlbyp7Do8DOMfM/qKprh9vTAEmuAe4APj285w+TXJLkEuCrwC3ANcCdw7YAXxk+ayvwDnDPSr6QJGnlFo1DVX0bOLXEz9sFPFFV/1NV/wrMAtcPt9mqerWqvg88AexKEuCXgT8f3n8AuO0cv4Mk6TxbyTmH+5I8Pxx2Wj/MNgJvjGwzN8zONP9J4D+r6oPT5mMlmU4yk2Tm5MmTK1i6JOlslhuHR4CfAa4F3gR+b5hnzLa1jPlYVbWvqrZV1bapqalzW7EkacnWLedNVfXWwuMkfwR8Y3g6B2we2XQTcHx4PG7+NnBZknXD3sPo9pKkCVnWnkOSq0ae/iqwcCXTQeCOJD+a5FPAVuB7wFFg63Bl0qXMn7Q+WFUFfAv4teH9u4GnlrMmSdL5s+ieQ5LHgRuBDUnmgL3AjUmuZf4Q0GvAbwBU1UtJngT+CfgAuLeqfjB8zn3AIeASYH9VvTT8iC8CTyT5XeAfga+dt28nSVqWReNQVXeOGZ/xP+BV9WXgy2PmTwNPj5m/yvzVTJKkVcLfkJYkNcZBktQYB0lSYxwkSY1xkCQ1xkGS1BgHSVJjHCRJjXGQJDXGQZLUGAdJUmMcJEmNcZAkNcZBktQYB0lSYxwkSY1xkCQ1xkGS1BgHSVJjHCRJjXGQJDXGQZLUGAdJUmMcJEmNcZAkNcZBktQYB0lSYxwkSY1xkCQ1xkGS1BgHSVJjHCRJjXGQJDXGQZLUGAdJUmMcJEmNcZAkNYvGIcn+JCeSvDgyuzzJ4STHhvv1wzxJHk4ym+T5JNeNvGf3sP2xJLtH5p9N8sLwnoeT5Hx/SUnSuVnKnsOjwM7TZnuAI1W1FTgyPAe4Bdg63KaBR2A+JsBe4AbgemDvQlCGbaZH3nf6z5IkXWCLxqGqvg2cOm28CzgwPD4A3DYyf6zmPQNcluQq4GbgcFWdqqp3gMPAzuG1T1bVd6qqgMdGPkuSNCHLPedwZVW9CTDcXzHMNwJvjGw3N8zONp8bMx8ryXSSmSQzJ0+eXObSJUmLOd8npMedL6hlzMeqqn1Vta2qtk1NTS1ziZKkxSw3Dm8Nh4QY7k8M8zlg88h2m4Dji8w3jZlLkiZouXE4CCxccbQbeGpkftdw1dJ24N3hsNMhYEeS9cOJ6B3AoeG195JsH65SumvksyRJE7JusQ2SPA7cCGxIMsf8VUcPAU8muQd4Hbh92Pxp4FZgFngfuBugqk4leRA4Omz3QFUtnOT+PPNXRH0c+OZwkyRN0KJxqKo7z/DSTWO2LeDeM3zOfmD/mPkM8JnF1iFJunD8DWlJUmMcJEmNcZAkNcZBktQYB0lSYxwkSY1xkCQ1xkGS1BgHSVJjHCRJjXGQJDXGQZLUGAdJUmMcJEmNcZAkNcZBktQYB0lSs+j/CU7SR9T9PzHpFXy03P/upFdwXrnnIElqjIMkqTEOkqTGOEiSGuMgSWqMgySpMQ6SpMY4SJIa4yBJaoyDJKkxDpKkxjhIkhrjIElqjIMkqTEOkqTGOEiSGuMgSWqMgySpWVEckryW5IUkzyWZGWaXJzmc5Nhwv36YJ8nDSWaTPJ/kupHP2T1sfyzJ7pV9JUnSSp2PPYdfqqprq2rb8HwPcKSqtgJHhucAtwBbh9s08AjMxwTYC9wAXA/sXQiKJGkyPozDSruAA8PjA8BtI/PHat4zwGVJrgJuBg5X1amqegc4DOz8ENYlSVqilcahgL9J8myS6WF2ZVW9CTDcXzHMNwJvjLx3bpidaS5JmpB1K3z/56rqeJIrgMNJ/vks22bMrM4y7x8wH6BpgKuvvvpc1ypJWqIV7TlU1fHh/gTwdebPGbw1HC5iuD8xbD4HbB55+ybg+Fnm437evqraVlXbpqamVrJ0SdJZLDsOSX4syY8vPAZ2AC8CB4GFK452A08Njw8Cdw1XLW0H3h0OOx0CdiRZP5yI3jHMJEkTspLDSlcCX0+y8Dl/WlV/neQo8GSSe4DXgduH7Z8GbgVmgfeBuwGq6lSSB4Gjw3YPVNWpFaxLkrRCy45DVb0K/MKY+X8AN42ZF3DvGT5rP7B/uWuRJJ1f/oa0JKkxDpKkxjhIkhrjIElqjIMkqTEOkqTGOEiSGuMgSWqMgySpMQ6SpMY4SJIa4yBJaoyDJKkxDpKkxjhIkhrjIElqjIMkqTEOkqTGOEiSGuMgSWqMgySpMQ6SpMY4SJIa4yBJaoyDJKkxDpKkxjhIkhrjIElqjIMkqTEOkqTGOEiSGuMgSWqMgySpMQ6SpMY4SJIa4yBJaoyDJKlZNXFIsjPJK0lmk+yZ9HokaS1bFXFIcgnwVeAW4BrgziTXTHZVkrR2rYo4ANcDs1X1alV9H3gC2DXhNUnSmrVu0gsYbATeGHk+B9xw+kZJpoHp4el/J3nlAqxtLdgAvD3pRSwmX5n0CjQhF8WfT76USa9gqX56KRutljiM+6dabVC1D9j34S9nbUkyU1XbJr0OaRz/fE7GajmsNAdsHnm+CTg+obVI0pq3WuJwFNia5FNJLgXuAA5OeE2StGatisNKVfVBkvuAQ8AlwP6qemnCy1pLPFSn1cw/nxOQqnZoX5K0xq2Ww0qSpFXEOEiSGuMgSWpWxQlpXVhJfo7530DfyPzvkxwHDlbVyxNdmKRVwz2HNSbJF5n/60kCfI/5y4gDPO5feChpgVcrrTFJ/gX4dFX972nzS4GXqmrrZFYmnV2Su6vqjye9jrXCPYe154fAT42ZXzW8Jq1WX5r0AtYSzzmsPV8AjiQ5xv//ZYdXAz8L3DexVUlAkufP9BJw5YVcy1rnYaU1KMnHmP9r0jcy/y/dHHC0qn4w0YVpzUvyFnAz8M7pLwF/X1Xj9nr1IXDPYQ2qqh8Cz0x6HdIY3wA+UVXPnf5Ckr+98MtZu9xzkCQ1npCWJDXGQZLUGAdJUmMcJEnN/wGBEAVINvPkfAAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "adult_df['income-per-year'].value_counts().plot(kind='bar')" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD4CAYAAADlwTGnAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAADKFJREFUeJzt3W2o3vV9x/H3p2a2a0u9qadik2zJMKzTwagEdSvsQR3edSw+qJAx1iCBPLFbOwar7om9EyqMuRVWIVRLWkqtuIKhLZPgzYMxvIlV3DRzCbqZM52ekuhuSm+i3z04P/UYTnKuo8dzab7vF4Rz/X//3/86v7/EvM//f67rnFQVkqR+3jXtBUiSpsMASFJTBkCSmjIAktSUAZCkpgyAJDVlACSpKQMgSU0ZAElqas20F3A8Z5xxRm3YsGHay5Ckd5SHHnrox1U1s9S8t3UANmzYwN69e6e9DEl6R0nyH5PM8xaQJDVlACSpKQMgSU0ZAElqygBIUlMGQJKaMgCS1JQBkKSm3tZvBHun2HDND6a9hBPKv3/lE9NegtSCVwCS1JQBkKSmDIAkNWUAJKkpAyBJTRkASWrKAEhSUwZAkpoyAJLUlAGQpKYMgCQ1ZQAkqamJApDkz5I8luRfknwnyXuSbExyf5L9Sb6b5OQx991j+8DYv2HB81w7xp9Icslbc0qSpEksGYAka4E/BTZX1W8CJwFbgRuAG6tqE3AY2D4O2Q4crqqzgRvHPJKcM447F7gU+FqSk1b2dCRJk5r0FtAa4JeTrAHeCzwLfBy4fezfBVwxHm8Z24z9FyXJGL+1qn5WVU8BB4Dz3/wpSJLeiCUDUFX/CfwV8DTz//C/CDwEvFBVR8a0WWDteLwWODiOPTLmf3Dh+CLHvCrJjiR7k+ydm5t7I+ckSZrAJLeATmP+q/eNwIeB9wGXLTK1XjnkGPuONf76gaqdVbW5qjbPzMwstTxJ0hs0yS2g3wOeqqq5qvoF8D3gd4BTxy0hgHXAM+PxLLAeYOw/BTi0cHyRYyRJq2ySADwNXJjkveNe/kXA48A9wCfHnG3AHePx7rHN2H93VdUY3zpeJbQR2AQ8sDKnIUlariV/J3BV3Z/kduBHwBHgYWAn8APg1iRfHmM3j0NuBr6V5ADzX/lvHc/zWJLbmI/HEeDqqnpphc9HkjShiX4pfFVdB1x31PCTLPIqnqr6KXDlMZ7neuD6Za5RkvQW8J3AktSUAZCkpgyAJDVlACSpKQMgSU0ZAElqygBIUlMGQJKaMgCS1JQBkKSmDIAkNWUAJKkpAyBJTRkASWrKAEhSUwZAkpoyAJLUlAGQpKYMgCQ1ZQAkqSkDIElNGQBJasoASFJTBkCSmjIAktSUAZCkpgyAJDVlACSpKQMgSU0ZAElqygBIUlMGQJKaMgCS1JQBkKSmDIAkNWUAJKkpAyBJTRkASWrKAEhSUxMFIMmpSW5P8q9J9iX57SSnJ9mTZP/4eNqYmyRfTXIgyaNJzlvwPNvG/P1Jtr1VJyVJWtqkVwB/C/xDVX0E+C1gH3ANcFdVbQLuGtsAlwGbxp8dwE0ASU4HrgMuAM4HrnslGpKk1bdkAJJ8APhd4GaAqvp5Vb0AbAF2jWm7gCvG4y3AN2vefcCpSc4CLgH2VNWhqjoM7AEuXdGzkSRNbJIrgF8D5oBvJHk4ydeTvA84s6qeBRgfPzTmrwUOLjh+dowda/x1kuxIsjfJ3rm5uWWfkCRpMpMEYA1wHnBTVX0U+D9eu92zmCwyVscZf/1A1c6q2lxVm2dmZiZYniTpjZgkALPAbFXdP7ZvZz4Iz41bO4yPzy+Yv37B8euAZ44zLkmagiUDUFX/BRxM8utj6CLgcWA38MorebYBd4zHu4FPjVcDXQi8OG4R3QlcnOS08c3fi8eYJGkK1kw470+Abyc5GXgSuIr5eNyWZDvwNHDlmPtD4HLgAPCTMZeqOpTkS8CDY94Xq+rQipyFJGnZJgpAVT0CbF5k10WLzC3g6mM8zy3ALctZoCTpreE7gSWpKQMgSU0ZAElqygBIUlMGQJKaMgCS1JQBkKSmDIAkNWUAJKkpAyBJTRkASWrKAEhSUwZAkpoyAJLUlAGQpKYMgCQ1ZQAkqSkDIElNGQBJasoASFJTBkCSmjIAktSUAZCkpgyAJDVlACSpKQMgSU0ZAElqygBIUlMGQJKaMgCS1JQBkKSmDIAkNWUAJKkpAyBJTRkASWrKAEhSUwZAkpoyAJLUlAGQpKYmDkCSk5I8nOT7Y3tjkvuT7E/y3SQnj/F3j+0DY/+GBc9x7Rh/IsklK30ykqTJLecK4DPAvgXbNwA3VtUm4DCwfYxvBw5X1dnAjWMeSc4BtgLnApcCX0ty0ptbviTpjZooAEnWAZ8Avj62A3wcuH1M2QVcMR5vGduM/ReN+VuAW6vqZ1X1FHAAOH8lTkKStHyTXgH8DfAXwMtj+4PAC1V1ZGzPAmvH47XAQYCx/8Ux/9XxRY55VZIdSfYm2Ts3N7eMU5EkLceSAUjy+8DzVfXQwuFFptYS+453zGsDVTuranNVbZ6ZmVlqeZKkN2jNBHM+BvxBksuB9wAfYP6K4NQka8ZX+euAZ8b8WWA9MJtkDXAKcGjB+CsWHiNJWmVLXgFU1bVVta6qNjD/Tdy7q+qPgHuAT45p24A7xuPdY5ux/+6qqjG+dbxKaCOwCXhgxc5EkrQsk1wBHMvngFuTfBl4GLh5jN8MfCvJAea/8t8KUFWPJbkNeBw4AlxdVS+9ic8vSXoTlhWAqroXuHc8fpJFXsVTVT8FrjzG8dcD1y93kZKklec7gSWpKQMgSU0ZAElqygBIUlMGQJKaMgCS1JQBkKSmDIAkNWUAJKkpAyBJTRkASWrKAEhSUwZAkpoyAJLUlAGQpKYMgCQ1ZQAkqSkDIElNGQBJasoASFJTBkCSmjIAktSUAZCkpgyAJDVlACSpKQMgSU0ZAElqygBIUlMGQJKaMgCS1JQBkKSmDIAkNWUAJKkpAyBJTRkASWrKAEhSUwZAkpoyAJLU1JIBSLI+yT1J9iV5LMlnxvjpSfYk2T8+njbGk+SrSQ4keTTJeQuea9uYvz/JtrfutCRJS5nkCuAI8OdV9RvAhcDVSc4BrgHuqqpNwF1jG+AyYNP4swO4CeaDAVwHXACcD1z3SjQkSatvyQBU1bNV9aPx+H+AfcBaYAuwa0zbBVwxHm8Bvlnz7gNOTXIWcAmwp6oOVdVhYA9w6YqejSRpYsv6HkCSDcBHgfuBM6vqWZiPBPChMW0tcHDBYbNj7FjjkqQpmDgASd4P/D3w2ar67+NNXWSsjjN+9OfZkWRvkr1zc3OTLk+StEwTBSDJLzH/j/+3q+p7Y/i5cWuH8fH5MT4LrF9w+DrgmeOMv05V7ayqzVW1eWZmZjnnIklahkleBRTgZmBfVf31gl27gVdeybMNuGPB+KfGq4EuBF4ct4juBC5Octr45u/FY0ySNAVrJpjzMeCPgX9O8sgY+0vgK8BtSbYDTwNXjn0/BC4HDgA/Aa4CqKpDSb4EPDjmfbGqDq3IWUiSlm3JAFTVP7L4/XuAixaZX8DVx3iuW4BblrNASdJbw3cCS1JTBkCSmjIAktSUAZCkpgyAJDVlACSpKQMgSU0ZAElqygBIUlMGQJKaMgCS1JQBkKSmDIAkNWUAJKkpAyBJTRkASWpqkt8IJumd7POnTHsFJ47PvzjtFaworwAkqSkDIElNGQBJasoASFJTBkCSmjIAktSUAZCkpgyAJDVlACSpKQMgSU0ZAElqygBIUlMGQJKaMgCS1JQBkKSmDIAkNWUAJKkpAyBJTRkASWrKAEhSUwZAkpoyAJLU1KoHIMmlSZ5IciDJNav9+SVJ81Y1AElOAv4OuAw4B/jDJOes5hokSfNW+wrgfOBAVT1ZVT8HbgW2rPIaJEnAmlX+fGuBgwu2Z4ELFk5IsgPYMTb/N8kTq7S2Ds4AfjztRSwlN0x7BZqCd8TfTb6Qaa9gUr86yaTVDsBi//XqdRtVO4Gdq7OcXpLsrarN016HdDT/bk7Hat8CmgXWL9heBzyzymuQJLH6AXgQ2JRkY5KTga3A7lVegySJVb4FVFVHknwauBM4Cbilqh5bzTU05601vV35d3MKUlVLz5IknXB8J7AkNWUAJKkpAyBJTa32+wC0ipJ8hPl3Wq9l/v0WzwC7q2rfVBcm6W3BK4ATVJLPMf+jNgI8wPxLcAN8xx/CJwl8FdAJK8m/AedW1S+OGj8ZeKyqNk1nZdLxJbmqqr4x7XV04BXAietl4MOLjJ819klvV1+Y9gK68HsAJ67PAncl2c9rP4DvV4CzgU9PbVUSkOTRY+0CzlzNtXTmLaATWJJ3Mf8juNcy/z/WLPBgVb001YWpvSTPAZcAh4/eBfxTVS129aoV5hXACayqXgbum/Y6pEV8H3h/VT1y9I4k967+cnryCkCSmvKbwJLUlAGQpKYMgCQ1ZQAkqan/B5tR3ZmRvbX4AAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# compute score histograms by race and by gender \n", "adult_df_f = adult_df[(adult_df.sex == 'Female')]\n", "adult_df_f[\"income-per-year\"].value_counts().plot(kind='bar')" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAD4CAYAAAAHHSreAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAETFJREFUeJzt3X+s3Xddx/Hny9XxQ4R27IKjLbZKgw6icd5sUxNjmG4dGro/WNKFuAabNMGh4I/Iponl1xKIxumizFRW6QzZWCZmDQ5nMyDEyMbuAAdjzl6HrtdOdknLRIlg4e0f51M59HNub3vOpafzPh/Jyfl+35/P59z3We7uq98f595UFZIkDfuuaTcgSTr7GA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqrJl2A+M6//zza9OmTdNuQ5KeUR566KEvVdXMcvOeseGwadMm5ubmpt2GJD2jJPnXU5nnaSVJUsdwkCR1DAdJUsdwkCR1DAdJUmfZcEiyN8lTST43Yuw3k1SS89t+ktycZD7Jw0kuGpq7I8nB9tgxVP/xJJ9ta25OkpV6c5Kk8ZzKkcP7gK0nFpNsBH4OeGKofCWwpT12Abe0uecBu4FLgIuB3UnWtTW3tLnH13VfS5J0Zi0bDlX1ceDIiKGbgN8Chv/O6Dbgthq4H1ib5ALgCuBAVR2pqqPAAWBrG3t+VX2iBn+v9DbgqsnekiRpUmN9CC7Ja4B/q6p/OOEs0Hrg0ND+QqudrL4wor7U193F4CiDl770peO0fsZtuv6vp93C/xv/8q6fn3YL0qpx2hekkzwX+B3gd0cNj6jVGPWRqmpPVc1W1ezMzLKf/pYkjWmcu5V+ENgM/EOSfwE2AJ9K8n0M/uW/cWjuBuDwMvUNI+qSpCk67XCoqs9W1YuqalNVbWLwA/6iqvp3YD9wbbtr6VLg6ap6ErgXuDzJunYh+nLg3jb2lSSXtruUrgXuXqH3Jkka06ncyno78Ang5UkWkuw8yfR7gMeBeeDPgF8GqKojwDuAB9vj7a0G8AbgvW3NPwMfHu+tSJJWyrIXpKvqmmXGNw1tF3DdEvP2AntH1OeAVy7XhyTpzPET0pKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeosGw5J9iZ5Ksnnhmq/l+Qfkzyc5K+SrB0auyHJfJLHklwxVN/aavNJrh+qb07yQJKDST6Q5NyVfIOSpNN3KkcO7wO2nlA7ALyyqn4E+CfgBoAkFwLbgVe0Ne9Jck6Sc4A/Aa4ELgSuaXMB3g3cVFVbgKPAzonekSRpYsuGQ1V9HDhyQu1vq+pY270f2NC2twF3VNXXquoLwDxwcXvMV9XjVfV14A5gW5IArwLuauv3AVdN+J4kSRNaiWsOvwR8uG2vBw4NjS202lL1FwJfHgqa43VJ0hRNFA5Jfgc4Brz/eGnEtBqjvtTX25VkLsnc4uLi6bYrSTpFY4dDkh3ALwCvq6rjP9AXgI1D0zYAh09S/xKwNsmaE+ojVdWeqpqtqtmZmZlxW5ckLWOscEiyFXgL8Jqq+urQ0H5ge5JnJdkMbAE+CTwIbGl3Jp3L4KL1/hYqHwVe29bvAO4e761IklbKqdzKejvwCeDlSRaS7AT+GPhe4ECSzyT5U4CqegS4E/g88DfAdVX1jXZN4Y3AvcCjwJ1tLgxC5teTzDO4BnHrir5DSdJpW7PchKq6ZkR5yR/gVXUjcOOI+j3APSPqjzO4m0mSdJbwE9KSpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpM6y4ZBkb5KnknxuqHZekgNJDrbnda2eJDcnmU/ycJKLhtbsaPMPJtkxVP/xJJ9ta25OkpV+k5Kk03MqRw7vA7aeULseuK+qtgD3tX2AK4Et7bELuAUGYQLsBi4BLgZ2Hw+UNmfX0LoTv5Yk6QxbNhyq6uPAkRPK24B9bXsfcNVQ/bYauB9Ym+QC4ArgQFUdqaqjwAFgaxt7flV9oqoKuG3otSRJUzLuNYcXV9WTAO35Ra2+Hjg0NG+h1U5WXxhRHynJriRzSeYWFxfHbF2StJyVviA96npBjVEfqar2VNVsVc3OzMyM2aIkaTnjhsMX2ykh2vNTrb4AbByatwE4vEx9w4i6JGmKxg2H/cDxO452AHcP1a9tdy1dCjzdTjvdC1yeZF27EH05cG8b+0qSS9tdStcOvZYkaUrWLDchye3AzwDnJ1lgcNfRu4A7k+wEngCubtPvAV4NzANfBV4PUFVHkrwDeLDNe3tVHb/I/QYGd0Q9B/hwe0iSpmjZcKiqa5YYumzE3AKuW+J19gJ7R9TngFcu14ck6czxE9KSpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpM5E4ZDk15I8kuRzSW5P8uwkm5M8kORgkg8kObfNfVbbn2/jm4Ze54ZWfyzJFZO9JUnSpMYOhyTrgV8FZqvqlcA5wHbg3cBNVbUFOArsbEt2Aker6mXATW0eSS5s614BbAXek+SccfuSJE1u0tNKa4DnJFkDPBd4EngVcFcb3wdc1ba3tX3a+GVJ0up3VNXXquoLwDxw8YR9SZImMHY4VNW/Ab8PPMEgFJ4GHgK+XFXH2rQFYH3bXg8camuPtfkvHK6PWPNtkuxKMpdkbnFxcdzWJUnLmOS00joG/+rfDLwE+B7gyhFT6/iSJcaWqvfFqj1VNVtVszMzM6fftCTplExyWulngS9U1WJV/Q/wQeAngbXtNBPABuBw214ANgK08RcAR4brI9ZIkqZgknB4Arg0yXPbtYPLgM8DHwVe2+bsAO5u2/vbPm38I1VVrb693c20GdgCfHKCviRJE1qz/JTRquqBJHcBnwKOAZ8G9gB/DdyR5J2tdmtbcivwF0nmGRwxbG+v80iSOxkEyzHguqr6xrh9SZImN3Y4AFTVbmD3CeXHGXG3UVX9N3D1Eq9zI3DjJL1IklaOn5CWJHUMB0lSx3CQJHUMB0lSx3CQJHUMB0lSx3CQJHUMB0lSx3CQJHUMB0lSx3CQJHUMB0lSx3CQJHUMB0lSx3CQJHUMB0lSx3CQJHUMB0lSZ6I/EyrpGeytL5h2B/+/vPXpaXewojxykCR1JgqHJGuT3JXkH5M8muQnkpyX5ECSg+15XZubJDcnmU/ycJKLhl5nR5t/MMmOSd+UJGkykx45/BHwN1X1Q8CPAo8C1wP3VdUW4L62D3AlsKU9dgG3ACQ5D9gNXAJcDOw+HiiSpOkYOxySPB/4aeBWgKr6elV9GdgG7GvT9gFXte1twG01cD+wNskFwBXAgao6UlVHgQPA1nH7kiRNbpIjhx8AFoE/T/LpJO9N8j3Ai6vqSYD2/KI2fz1waGj9QqstVZckTckk4bAGuAi4pap+DPgvvnUKaZSMqNVJ6v0LJLuSzCWZW1xcPN1+JUmnaJJwWAAWquqBtn8Xg7D4YjtdRHt+amj+xqH1G4DDJ6l3qmpPVc1W1ezMzMwErUuSTmbscKiqfwcOJXl5K10GfB7YDxy/42gHcHfb3g9c2+5auhR4up12uhe4PMm6diH68laTJE3JpB+C+xXg/UnOBR4HXs8gcO5MshN4Ari6zb0HeDUwD3y1zaWqjiR5B/Bgm/f2qjoyYV+SpAlMFA5V9RlgdsTQZSPmFnDdEq+zF9g7SS+SpJXjJ6QlSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSZ2JwyHJOUk+neRDbX9zkgeSHEzygSTntvqz2v58G9809Bo3tPpjSa6YtCdJ0mRW4sjhTcCjQ/vvBm6qqi3AUWBnq+8EjlbVy4Cb2jySXAhsB14BbAXek+ScFehLkjSmicIhyQbg54H3tv0ArwLualP2AVe17W1tnzZ+WZu/Dbijqr5WVV8A5oGLJ+lLkjSZSY8c/hD4LeCbbf+FwJer6ljbXwDWt+31wCGANv50m/9/9RFrvk2SXUnmkswtLi5O2LokaSljh0OSXwCeqqqHhssjptYyYydb8+3Fqj1VNVtVszMzM6fVryTp1K2ZYO1PAa9J8mrg2cDzGRxJrE2yph0dbAAOt/kLwEZgIcka4AXAkaH6ccNrJElTMPaRQ1XdUFUbqmoTgwvKH6mq1wEfBV7bpu0A7m7b+9s+bfwjVVWtvr3dzbQZ2AJ8cty+JEmTm+TIYSlvAe5I8k7g08CtrX4r8BdJ5hkcMWwHqKpHktwJfB44BlxXVd/4DvQlSTpFKxIOVfUx4GNt+3FG3G1UVf8NXL3E+huBG1eiF0nS5PyEtCSpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpM3Y4JNmY5KNJHk3ySJI3tfp5SQ4kOdie17V6ktycZD7Jw0kuGnqtHW3+wSQ7Jn9bkqRJTHLkcAz4jar6YeBS4LokFwLXA/dV1RbgvrYPcCWwpT12AbfAIEyA3cAlwMXA7uOBIkmajrHDoaqerKpPte2vAI8C64FtwL42bR9wVdveBtxWA/cDa5NcAFwBHKiqI1V1FDgAbB23L0nS5FbkmkOSTcCPAQ8AL66qJ2EQIMCL2rT1wKGhZQuttlR91NfZlWQuydzi4uJKtC5JGmHicEjyPOAvgTdX1X+cbOqIWp2k3her9lTVbFXNzszMnH6zkqRTMlE4JPluBsHw/qr6YCt/sZ0uoj0/1eoLwMah5RuAwyepS5KmZJK7lQLcCjxaVX8wNLQfOH7H0Q7g7qH6te2upUuBp9tpp3uBy5OsaxeiL281SdKUrJlg7U8Bvwh8NslnWu23gXcBdybZCTwBXN3G7gFeDcwDXwVeD1BVR5K8A3iwzXt7VR2ZoC9J0oTGDoeq+jtGXy8AuGzE/AKuW+K19gJ7x+1FkrSy/IS0JKljOEiSOoaDJKljOEiSOoaDJKljOEiSOoaDJKljOEiSOoaDJKljOEiSOoaDJKljOEiSOoaDJKljOEiSOoaDJKljOEiSOoaDJKljOEiSOoaDJKljOEiSOmdNOCTZmuSxJPNJrp92P5K0mp0V4ZDkHOBPgCuBC4Frklw43a4kafU6K8IBuBiYr6rHq+rrwB3Atin3JEmr1pppN9CsBw4N7S8Al5w4KckuYFfb/c8kj52B3laD84EvTbuJ5eTd0+5AU/KM+P7kbZl2B6fq+09l0tkSDqP+q1ZXqNoD7PnOt7O6JJmrqtlp9yGN4vfndJwtp5UWgI1D+xuAw1PqRZJWvbMlHB4EtiTZnORcYDuwf8o9SdKqdVacVqqqY0neCNwLnAPsrapHptzWauKpOp3N/P6cglR1p/YlSavc2XJaSZJ0FjEcJEkdw0GS1DkrLkjrzEryQww+gb6ewedJDgP7q+rRqTYm6azhkcMqk+QtDH49SYBPMriNOMDt/sJDScd5t9Iqk+SfgFdU1f+cUD8XeKSqtkynM+nkkry+qv582n2sFh45rD7fBF4yon5BG5POVm+bdgOridccVp83A/clOci3ftnhS4GXAW+cWlcSkOThpYaAF5/JXlY7TyutQkm+i8GvSV/P4H+6BeDBqvrGVBvTqpfki8AVwNETh4C/r6pRR736DvDIYRWqqm8C90+7D2mEDwHPq6rPnDiQ5GNnvp3VyyMHSVLHC9KSpI7hIEnqGA6SpI7hIEnq/C9W8LqT4nfxfAAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# compute score histograms by race and by gender \n", "adult_df_m = adult_df[(adult_df.sex == 'Male')]\n", "adult_df_m[\"income-per-year\"].value_counts().plot(kind='bar')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### To add your own dataset to aif360:\n", "Identify the following in the dataset:\n", "\n", "Favorable label\n", "\n", "Unfavorable label\n", "\n", "Privileged group\n", "\n", "Unprivileged group" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### For this dataset:\n", "Sex is the protected attribute. \n", "#label 0: Income is less than 50K\n", "\n", "#label 1: Income is more than 50K\n", "\n", "Sex 0: Female and Gender 1: Male\n", "\n", "privileged_groups = [{'sex': 0}]\n", "\n", "unprivileged_groups = [{'sex': 1}]\n", "\n", "favorable_label = 1 \n", "\n", "unfavorable_label = 0" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "#Create binary label dataset that can be used by bias mitigation algorithms\n", "BM_dataset = BinaryLabelDataset(favorable_label=favorable_label,\n", " unfavorable_label=unfavorable_label,\n", " df=your_preprocessed_data,\n", " label_names=['income-per-year'],\n", " protected_attribute_names=['sex'],\n", " unprivileged_protected_attributes=unprivileged_groups)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Importing the same dataset from the aif360 toolkit" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "AdultDataset (label_name='income-per-year', \n", " favorable_classes=['>50K', '>50K.'], \n", " protected_attribute_names=['race', 'sex'], \n", " privileged_classes=[['White'], ['Male']], \n", " instance_weights_name=None, \n", " categorical_features=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'native-country'], \n", " features_to_keep=[], \n", " features_to_drop=['fnlwgt'], \n", " na_values=['?'], \n", " metadata={'label_maps': [{0.0: '<=50K', 1.0: '>50K'}], 'protected_attribute_maps': [{0.0: 'Non-white', 1.0: 'White'}, {0.0: 'Female', 1.0: 'Male'}]})" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "protected = 'sex'\n", "\n", "# AdultDataset(label_name='income-per-year', favorable_classes=['>50K', '>50K.'], protected_attribute_names=['race', 'sex'], privileged_classes=[['White'], ['Male']], instance_weights_name=None, categorical_features=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'native-country'], features_to_keep=[], features_to_drop=['fnlwgt'], na_values=['?'], custom_preprocessing=None, metadata={'label_maps': [{0.0: '<=50K', 1.0: '>50K'}], 'protected_attribute_maps': [{0.0: 'Non-white', 1.0: 'White'}, {0.0: 'Female', 1.0: 'Male'}]})\n", "ad = AdultDataset(protected_attribute_names=[protected],\n", " privileged_classes=[['Male']], categorical_features=[],\n", " features_to_keep=['age', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'])" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "#### Dataset shape" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "(48842, 6)\n" ] }, { "data": { "text/markdown": [ "#### Favorable and unfavorable labels" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "1.0 , 0.0\n" ] }, { "data": { "text/markdown": [ "#### Protected attribute names" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "['sex']\n" ] }, { "data": { "text/markdown": [ "#### Privileged and unprivileged protected attribute values" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "[array([1.])] , [array([0.])]\n" ] }, { "data": { "text/markdown": [ "#### Dataset feature names" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "['age', 'education-num', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week']\n" ] } ], "source": [ "# print out some labels, names, etc.\n", "display(Markdown(\"#### Dataset shape\"))\n", "print(ad.features.shape)\n", "display(Markdown(\"#### Favorable and unfavorable labels\"))\n", "print(ad.favorable_label, \",\",ad.unfavorable_label)\n", "display(Markdown(\"#### Protected attribute names\"))\n", "print(ad.protected_attribute_names)\n", "display(Markdown(\"#### Privileged and unprivileged protected attribute values\"))\n", "print(ad.privileged_protected_attributes, \",\", ad.unprivileged_protected_attributes)\n", "display(Markdown(\"#### Dataset feature names\"))\n", "print(ad.feature_names)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "scaler = MinMaxScaler(copy=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "split(num_or_size_splits, shuffle=False)\n", "\n", "If num_or_size_splits is an int, k, the value is the number of equal-sized folds to make (if k does not evenly divide the dataset these folds are approximately equal-sized). \n", "\n", "If num_or_size_splits is an array of type int, the values are taken as the indices at which to split the dataset. \n", "\n", "If the values are floats (< 1.), they are considered to be fractional proportions of the dataset at which to split." ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "train, test = ad.split([32561])" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(32561, 6)" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train.features.shape" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(16281, 6)" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test.features.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### We then fit the data to the scalar object" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "train.features = scaler.fit_transform(train.features)\n", "test.features = scaler.fit_transform(test.features)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### We will take the index of the protect attribute column and delete it while fitting the end classifier on the data" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2\n" ] } ], "source": [ "index = train.feature_names.index(protected)\n", "print(index)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['age',\n", " 'education-num',\n", " 'sex',\n", " 'capital-gain',\n", " 'capital-loss',\n", " 'hours-per-week']" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train.feature_names" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Logistic Regression\n", "Like many other learning algorithms in scikit-learn, LogisticRegression comes with a built-in method of handling imbalanced classes. If we have highly imbalanced classes and have no addressed it during preprocessing, we have the option of using the class_weight parameter to weight the classes to make certain we have a balanced mix of each class. Specifically, the balanced argument will automatically weigh classes inversely proportional to their frequency:\n", "\n", "wj = n/(k*nj)\n", "\n", "where \n", "wj is the weight to class j,\n", "\n", "n is the number of observations, \n", "\n", "nj is the number of observations in class j, and \n", "\n", "k is the total number of classes." ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 0%| | 0/11 [00:00');\n", " this._root_extra_style(this.root)\n", " this.root.attr('style', 'display: inline-block');\n", "\n", " $(parent_element).append(this.root);\n", "\n", " this._init_header(this);\n", " this._init_canvas(this);\n", " this._init_toolbar(this);\n", "\n", " var fig = this;\n", "\n", " this.waiting = false;\n", "\n", " this.ws.onopen = function () {\n", " fig.send_message(\"supports_binary\", {value: fig.supports_binary});\n", " fig.send_message(\"send_image_mode\", {});\n", " if (mpl.ratio != 1) {\n", " fig.send_message(\"set_dpi_ratio\", {'dpi_ratio': mpl.ratio});\n", " }\n", " fig.send_message(\"refresh\", {});\n", " }\n", "\n", " this.imageObj.onload = function() {\n", " if (fig.image_mode == 'full') {\n", " // Full images could contain transparency (where diff images\n", " // almost always do), so we need to clear the canvas so that\n", " // there is no ghosting.\n", " fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n", " }\n", " fig.context.drawImage(fig.imageObj, 0, 0);\n", " };\n", "\n", " this.imageObj.onunload = function() {\n", " fig.ws.close();\n", " }\n", "\n", " this.ws.onmessage = this._make_on_message_function(this);\n", "\n", " this.ondownload = ondownload;\n", "}\n", "\n", "mpl.figure.prototype._init_header = function() {\n", " var titlebar = $(\n", " '
');\n", " var titletext = $(\n", " '
');\n", " titlebar.append(titletext)\n", " this.root.append(titlebar);\n", " this.header = titletext[0];\n", "}\n", "\n", "\n", "\n", "mpl.figure.prototype._canvas_extra_style = function(canvas_div) {\n", "\n", "}\n", "\n", "\n", "mpl.figure.prototype._root_extra_style = function(canvas_div) {\n", "\n", "}\n", "\n", "mpl.figure.prototype._init_canvas = function() {\n", " var fig = this;\n", "\n", " var canvas_div = $('
');\n", "\n", " canvas_div.attr('style', 'position: relative; clear: both; outline: 0');\n", "\n", " function canvas_keyboard_event(event) {\n", " return fig.key_event(event, event['data']);\n", " }\n", "\n", " canvas_div.keydown('key_press', canvas_keyboard_event);\n", " canvas_div.keyup('key_release', canvas_keyboard_event);\n", " this.canvas_div = canvas_div\n", " this._canvas_extra_style(canvas_div)\n", " this.root.append(canvas_div);\n", "\n", " var canvas = $('');\n", " canvas.addClass('mpl-canvas');\n", " canvas.attr('style', \"left: 0; top: 0; z-index: 0; outline: 0\")\n", "\n", " this.canvas = canvas[0];\n", " this.context = canvas[0].getContext(\"2d\");\n", "\n", " var backingStore = this.context.backingStorePixelRatio ||\n", "\tthis.context.webkitBackingStorePixelRatio ||\n", "\tthis.context.mozBackingStorePixelRatio ||\n", "\tthis.context.msBackingStorePixelRatio ||\n", "\tthis.context.oBackingStorePixelRatio ||\n", "\tthis.context.backingStorePixelRatio || 1;\n", "\n", " mpl.ratio = (window.devicePixelRatio || 1) / backingStore;\n", "\n", " var rubberband = $('');\n", " rubberband.attr('style', \"position: absolute; left: 0; top: 0; z-index: 1;\")\n", "\n", " var pass_mouse_events = true;\n", "\n", " canvas_div.resizable({\n", " start: function(event, ui) {\n", " pass_mouse_events = false;\n", " },\n", " resize: function(event, ui) {\n", " fig.request_resize(ui.size.width, ui.size.height);\n", " },\n", " stop: function(event, ui) {\n", " pass_mouse_events = true;\n", " fig.request_resize(ui.size.width, ui.size.height);\n", " },\n", " });\n", "\n", " function mouse_event_fn(event) {\n", " if (pass_mouse_events)\n", " return fig.mouse_event(event, event['data']);\n", " }\n", "\n", " rubberband.mousedown('button_press', mouse_event_fn);\n", " rubberband.mouseup('button_release', mouse_event_fn);\n", " // Throttle sequential mouse events to 1 every 20ms.\n", " rubberband.mousemove('motion_notify', mouse_event_fn);\n", "\n", " rubberband.mouseenter('figure_enter', mouse_event_fn);\n", " rubberband.mouseleave('figure_leave', mouse_event_fn);\n", "\n", " canvas_div.on(\"wheel\", function (event) {\n", " event = event.originalEvent;\n", " event['data'] = 'scroll'\n", " if (event.deltaY < 0) {\n", " event.step = 1;\n", " } else {\n", " event.step = -1;\n", " }\n", " mouse_event_fn(event);\n", " });\n", "\n", " canvas_div.append(canvas);\n", " canvas_div.append(rubberband);\n", "\n", " this.rubberband = rubberband;\n", " this.rubberband_canvas = rubberband[0];\n", " this.rubberband_context = rubberband[0].getContext(\"2d\");\n", " this.rubberband_context.strokeStyle = \"#000000\";\n", "\n", " this._resize_canvas = function(width, height) {\n", " // Keep the size of the canvas, canvas container, and rubber band\n", " // canvas in synch.\n", " canvas_div.css('width', width)\n", " canvas_div.css('height', height)\n", "\n", " canvas.attr('width', width * mpl.ratio);\n", " canvas.attr('height', height * mpl.ratio);\n", " canvas.attr('style', 'width: ' + width + 'px; height: ' + height + 'px;');\n", "\n", " rubberband.attr('width', width);\n", " rubberband.attr('height', height);\n", " }\n", "\n", " // Set the figure to an initial 600x600px, this will subsequently be updated\n", " // upon first draw.\n", " this._resize_canvas(600, 600);\n", "\n", " // Disable right mouse context menu.\n", " $(this.rubberband_canvas).bind(\"contextmenu\",function(e){\n", " return false;\n", " });\n", "\n", " function set_focus () {\n", " canvas.focus();\n", " canvas_div.focus();\n", " }\n", "\n", " window.setTimeout(set_focus, 100);\n", "}\n", "\n", "mpl.figure.prototype._init_toolbar = function() {\n", " var fig = this;\n", "\n", " var nav_element = $('
')\n", " nav_element.attr('style', 'width: 100%');\n", " this.root.append(nav_element);\n", "\n", " // Define a callback function for later on.\n", " function toolbar_event(event) {\n", " return fig.toolbar_button_onclick(event['data']);\n", " }\n", " function toolbar_mouse_event(event) {\n", " return fig.toolbar_button_onmouseover(event['data']);\n", " }\n", "\n", " for(var toolbar_ind in mpl.toolbar_items) {\n", " var name = mpl.toolbar_items[toolbar_ind][0];\n", " var tooltip = mpl.toolbar_items[toolbar_ind][1];\n", " var image = mpl.toolbar_items[toolbar_ind][2];\n", " var method_name = mpl.toolbar_items[toolbar_ind][3];\n", "\n", " if (!name) {\n", " // put a spacer in here.\n", " continue;\n", " }\n", " var button = $('