{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# LIME\n", "API ref: https://lime-ml.readthedocs.io/en/latest/index.html\n", "\n", "For more information on LIME, see [https://github.com/marcotcr/lime](https://github.com/marcotcr/lime)." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from __future__ import print_function\n", "\n", "%matplotlib inline\n", "\n", "import sklearn.model_selection\n", "import sklearn.metrics\n", "import sklearn.datasets\n", "import sklearn.ensemble\n", "import sklearn.preprocessing\n", "import numpy as np\n", "import lime\n", "import lime.lime_tabular\n", "from IPython.display import Markdown, display\n", "import matplotlib.pyplot as plt\n", "import sys\n", "sys.path.append(\"../\")\n", "import numpy as np\n", "from aif360.datasets import BinaryLabelDataset\n", "from aif360.metrics.binary_label_dataset_metric import BinaryLabelDatasetMetric\n", "from aif360.metrics.classification_metric import ClassificationMetric\n", "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult\n", "from aif360.algorithms.preprocessing.reweighing import Reweighing\n", "from aif360.explainers import MetricTextExplainer, MetricJSONExplainer\n", "\n", "import json\n", "from collections import OrderedDict\n", "\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.metrics import accuracy_score\n", "\n", "from IPython.display import Markdown, display\n", "import matplotlib.pyplot as plt\n", "\n", "from aif360.datasets.lime_encoder import LimeEncoder \n", "\n", "\n", "from aif360.datasets.adult_dataset import AdultDataset\n", "\n", "import pandas as pd\n", "\n", "np.random.seed(1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Numerical and Categorical features in the same dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We now turn to a dataset that has both numerical and categorical features. Here, the task is to predict whether a person makes over 50K dollars per year. Downloads the data [here](http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Load dataset and display statistics**" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "feature_names = [\"Age\", \"Workclass\", \"fnlwgt\", \"Education\", \"Education-Num\", \"Marital Status\",\"Occupation\", \"Relationship\", \"Race\", \"Sex\", \"Capital Gain\", \"Capital Loss\",\"Hours per week\", \"Country\"]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "data_df = pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data',names=[\"Age\", \"Workclass\", \"fnlwgt\", \"Education\", \"Education-Num\", \"Marital-Status\",\"Occupation\", \"Relationship\", \"Race\", \"Sex\", \"Capital-Gain\", \"Capital-Loss\",\"Hours-per-week\", \"Country\", \"income\"])\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AgeWorkclassfnlwgtEducationEducation-NumMarital-StatusOccupationRelationshipRaceSexCapital-GainCapital-LossHours-per-weekCountryincome
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50K
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50K
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50K
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50K
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50K
\n", "
" ], "text/plain": [ " Age Workclass fnlwgt Education Education-Num \\\n", "0 39 State-gov 77516 Bachelors 13 \n", "1 50 Self-emp-not-inc 83311 Bachelors 13 \n", "2 38 Private 215646 HS-grad 9 \n", "3 53 Private 234721 11th 7 \n", "4 28 Private 338409 Bachelors 13 \n", "\n", " Marital-Status Occupation Relationship Race Sex \\\n", "0 Never-married Adm-clerical Not-in-family White Male \n", "1 Married-civ-spouse Exec-managerial Husband White Male \n", "2 Divorced Handlers-cleaners Not-in-family White Male \n", "3 Married-civ-spouse Handlers-cleaners Husband Black Male \n", "4 Married-civ-spouse Prof-specialty Wife Black Female \n", "\n", " Capital-Gain Capital-Loss Hours-per-week Country income \n", "0 2174 0 40 United-States <=50K \n", "1 0 0 13 United-States <=50K \n", "2 0 0 40 United-States <=50K \n", "3 0 0 40 United-States <=50K \n", "4 0 0 40 Cuba <=50K " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_df.head()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(32561, 15)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_df.shape" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "labels = data_df.iloc[:,-1]\n", "le= sklearn.preprocessing.LabelEncoder()\n", "le.fit(labels)\n", "labels = le.transform(labels)\n", "class_names = le.classes_\n", "data = data_df.iloc[:,:-1]\n", "le_label_mapping = dict(zip(le.classes_, le.transform(le.classes_)))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([' <=50K', ' >50K'], dtype=object)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "class_names" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0, 0, 0, ..., 0, 0, 1])" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "labels" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(32561,)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "labels.shape" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{' <=50K': 0, ' >50K': 1}" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "le_label_mapping" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "categorical_features = [1,3,5,6,7,8,9,13]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Feature: 1\n", "{' ?': 0, ' Federal-gov': 1, ' Local-gov': 2, ' Never-worked': 3, ' Private': 4, ' Self-emp-inc': 5, ' Self-emp-not-inc': 6, ' State-gov': 7, ' Without-pay': 8}\n", "==================================================\n", "Feature: 3\n", "{' 10th': 0, ' 11th': 1, ' 12th': 2, ' 1st-4th': 3, ' 5th-6th': 4, ' 7th-8th': 5, ' 9th': 6, ' Assoc-acdm': 7, ' Assoc-voc': 8, ' Bachelors': 9, ' Doctorate': 10, ' HS-grad': 11, ' Masters': 12, ' Preschool': 13, ' Prof-school': 14, ' Some-college': 15}\n", "==================================================\n", "Feature: 5\n", "{' Divorced': 0, ' Married-AF-spouse': 1, ' Married-civ-spouse': 2, ' Married-spouse-absent': 3, ' Never-married': 4, ' Separated': 5, ' Widowed': 6}\n", "==================================================\n", "Feature: 6\n", "{' ?': 0, ' Adm-clerical': 1, ' Armed-Forces': 2, ' Craft-repair': 3, ' Exec-managerial': 4, ' Farming-fishing': 5, ' Handlers-cleaners': 6, ' Machine-op-inspct': 7, ' Other-service': 8, ' Priv-house-serv': 9, ' Prof-specialty': 10, ' Protective-serv': 11, ' Sales': 12, ' Tech-support': 13, ' Transport-moving': 14}\n", "==================================================\n", "Feature: 7\n", "{' Husband': 0, ' Not-in-family': 1, ' Other-relative': 2, ' Own-child': 3, ' Unmarried': 4, ' Wife': 5}\n", "==================================================\n", "Feature: 8\n", "{' Amer-Indian-Eskimo': 0, ' Asian-Pac-Islander': 1, ' Black': 2, ' Other': 3, ' White': 4}\n", "==================================================\n", "Feature: 9\n", "{' Female': 0, ' Male': 1}\n", "==================================================\n", "Feature: 13\n", "{' ?': 0, ' Cambodia': 1, ' Canada': 2, ' China': 3, ' Columbia': 4, ' Cuba': 5, ' Dominican-Republic': 6, ' Ecuador': 7, ' El-Salvador': 8, ' England': 9, ' France': 10, ' Germany': 11, ' Greece': 12, ' Guatemala': 13, ' Haiti': 14, ' Holand-Netherlands': 15, ' Honduras': 16, ' Hong': 17, ' Hungary': 18, ' India': 19, ' Iran': 20, ' Ireland': 21, ' Italy': 22, ' Jamaica': 23, ' Japan': 24, ' Laos': 25, ' Mexico': 26, ' Nicaragua': 27, ' Outlying-US(Guam-USVI-etc)': 28, ' Peru': 29, ' Philippines': 30, ' Poland': 31, ' Portugal': 32, ' Puerto-Rico': 33, ' Scotland': 34, ' South': 35, ' Taiwan': 36, ' Thailand': 37, ' Trinadad&Tobago': 38, ' United-States': 39, ' Vietnam': 40, ' Yugoslavia': 41}\n", "==================================================\n" ] } ], "source": [ "categorical_names = {}\n", "for feature in categorical_features:\n", " print(\"Feature: \", feature)\n", " le = sklearn.preprocessing.LabelEncoder()\n", " le.fit(data.iloc[:, feature])\n", " data.iloc[:, feature] = le.transform(data.iloc[:, feature])\n", " categorical_names[feature] = le.classes_\n", " le_label_mapping = dict(zip(le.classes_, le.transform(le.classes_)))\n", " print(le_label_mapping)\n", " print(\"==================================================\")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "{1: array([' ?', ' Federal-gov', ' Local-gov', ' Never-worked', ' Private',\n", " ' Self-emp-inc', ' Self-emp-not-inc', ' State-gov', ' Without-pay'],\n", " dtype=object),\n", " 3: array([' 10th', ' 11th', ' 12th', ' 1st-4th', ' 5th-6th', ' 7th-8th',\n", " ' 9th', ' Assoc-acdm', ' Assoc-voc', ' Bachelors', ' Doctorate',\n", " ' HS-grad', ' Masters', ' Preschool', ' Prof-school',\n", " ' Some-college'], dtype=object),\n", " 5: array([' Divorced', ' Married-AF-spouse', ' Married-civ-spouse',\n", " ' Married-spouse-absent', ' Never-married', ' Separated',\n", " ' Widowed'], dtype=object),\n", " 6: array([' ?', ' Adm-clerical', ' Armed-Forces', ' Craft-repair',\n", " ' Exec-managerial', ' Farming-fishing', ' Handlers-cleaners',\n", " ' Machine-op-inspct', ' Other-service', ' Priv-house-serv',\n", " ' Prof-specialty', ' Protective-serv', ' Sales', ' Tech-support',\n", " ' Transport-moving'], dtype=object),\n", " 7: array([' Husband', ' Not-in-family', ' Other-relative', ' Own-child',\n", " ' Unmarried', ' Wife'], dtype=object),\n", " 8: array([' Amer-Indian-Eskimo', ' Asian-Pac-Islander', ' Black', ' Other',\n", " ' White'], dtype=object),\n", " 9: array([' Female', ' Male'], dtype=object),\n", " 13: array([' ?', ' Cambodia', ' Canada', ' China', ' Columbia', ' Cuba',\n", " ' Dominican-Republic', ' Ecuador', ' El-Salvador', ' England',\n", " ' France', ' Germany', ' Greece', ' Guatemala', ' Haiti',\n", " ' Holand-Netherlands', ' Honduras', ' Hong', ' Hungary', ' India',\n", " ' Iran', ' Ireland', ' Italy', ' Jamaica', ' Japan', ' Laos',\n", " ' Mexico', ' Nicaragua', ' Outlying-US(Guam-USVI-etc)', ' Peru',\n", " ' Philippines', ' Poland', ' Portugal', ' Puerto-Rico',\n", " ' Scotland', ' South', ' Taiwan', ' Thailand', ' Trinadad&Tobago',\n", " ' United-States', ' Vietnam', ' Yugoslavia'], dtype=object)}" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "categorical_names" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AgeWorkclassfnlwgtEducationEducation-NumMarital-StatusOccupationRelationshipRaceSexCapital-GainCapital-LossHours-per-weekCountry
03977751691341141217404039
15068331191324041001339
238421564611906141004039
35342347211726021004039
428433840991321052000405
5374284582121424540004039
64941601876538120001623
752620964211924041004539
83144578112144101401408405039
942415944991324041517804039
10374280464151024021008039
11307141297913210011004019
1223412227291341340003039
13324205019712412121005039
144041217728112301100400
1534424548754214001004526
1625617675611945341003539
1732418682411947441004039
183842888717212041005039
19436292175121404440004539
204041935241016210041006039
2154430214611958420002039
22351768456525021004039
2343411703717214041020424039
24594109015119013440004039
25562216851913213041004039
2619416829411943341004039
27540180211151020011006035
2839436726011904141008039
2949419336611923041004039
.............................................
325313003381191340110009939
325323442044611016210041006039
3253354433799291324011005024
32534374179137151001440003939
3253522432503328411321003539
3253634416021691344140005539
3253730434589811943121004639
325383841391809130104201502004539
32539710287372101620041001039
3254045725220811951340004039
3254141020282211950120003239
3254272012991211920041002539
32543452119199712010440004839
32544314199655121408130003039
3254539211149971221540002039
32546374198216712013140004039
3254743426076111927041004026
32548656993591415410141108606039
32549437255835151001240004039
3255043627242151023041005039
32551324340660626001004039
3255243484661811212041004539
325533241161381214413111001136
32554534321865121424041004039
325552243101521510411141004039
32556274257302712213540003839
3255740415437411927041004039
3255858415191011961440004039
3255922420149011941341002039
32560525287927119245401502404039
\n", "

32561 rows × 14 columns

\n", "
" ], "text/plain": [ " Age Workclass fnlwgt Education Education-Num Marital-Status \\\n", "0 39 7 77516 9 13 4 \n", "1 50 6 83311 9 13 2 \n", "2 38 4 215646 11 9 0 \n", "3 53 4 234721 1 7 2 \n", "4 28 4 338409 9 13 2 \n", "5 37 4 284582 12 14 2 \n", "6 49 4 160187 6 5 3 \n", "7 52 6 209642 11 9 2 \n", "8 31 4 45781 12 14 4 \n", "9 42 4 159449 9 13 2 \n", "10 37 4 280464 15 10 2 \n", "11 30 7 141297 9 13 2 \n", "12 23 4 122272 9 13 4 \n", "13 32 4 205019 7 12 4 \n", "14 40 4 121772 8 11 2 \n", "15 34 4 245487 5 4 2 \n", "16 25 6 176756 11 9 4 \n", "17 32 4 186824 11 9 4 \n", "18 38 4 28887 1 7 2 \n", "19 43 6 292175 12 14 0 \n", "20 40 4 193524 10 16 2 \n", "21 54 4 302146 11 9 5 \n", "22 35 1 76845 6 5 2 \n", "23 43 4 117037 1 7 2 \n", "24 59 4 109015 11 9 0 \n", "25 56 2 216851 9 13 2 \n", "26 19 4 168294 11 9 4 \n", "27 54 0 180211 15 10 2 \n", "28 39 4 367260 11 9 0 \n", "29 49 4 193366 11 9 2 \n", "... ... ... ... ... ... ... \n", "32531 30 0 33811 9 13 4 \n", "32532 34 4 204461 10 16 2 \n", "32533 54 4 337992 9 13 2 \n", "32534 37 4 179137 15 10 0 \n", "32535 22 4 325033 2 8 4 \n", "32536 34 4 160216 9 13 4 \n", "32537 30 4 345898 11 9 4 \n", "32538 38 4 139180 9 13 0 \n", "32539 71 0 287372 10 16 2 \n", "32540 45 7 252208 11 9 5 \n", "32541 41 0 202822 11 9 5 \n", "32542 72 0 129912 11 9 2 \n", "32543 45 2 119199 7 12 0 \n", "32544 31 4 199655 12 14 0 \n", "32545 39 2 111499 7 12 2 \n", "32546 37 4 198216 7 12 0 \n", "32547 43 4 260761 11 9 2 \n", "32548 65 6 99359 14 15 4 \n", "32549 43 7 255835 15 10 0 \n", "32550 43 6 27242 15 10 2 \n", "32551 32 4 34066 0 6 2 \n", "32552 43 4 84661 8 11 2 \n", "32553 32 4 116138 12 14 4 \n", "32554 53 4 321865 12 14 2 \n", "32555 22 4 310152 15 10 4 \n", "32556 27 4 257302 7 12 2 \n", "32557 40 4 154374 11 9 2 \n", "32558 58 4 151910 11 9 6 \n", "32559 22 4 201490 11 9 4 \n", "32560 52 5 287927 11 9 2 \n", "\n", " Occupation Relationship Race Sex Capital-Gain Capital-Loss \\\n", "0 1 1 4 1 2174 0 \n", "1 4 0 4 1 0 0 \n", "2 6 1 4 1 0 0 \n", "3 6 0 2 1 0 0 \n", "4 10 5 2 0 0 0 \n", "5 4 5 4 0 0 0 \n", "6 8 1 2 0 0 0 \n", "7 4 0 4 1 0 0 \n", "8 10 1 4 0 14084 0 \n", "9 4 0 4 1 5178 0 \n", "10 4 0 2 1 0 0 \n", "11 10 0 1 1 0 0 \n", "12 1 3 4 0 0 0 \n", "13 12 1 2 1 0 0 \n", "14 3 0 1 1 0 0 \n", "15 14 0 0 1 0 0 \n", "16 5 3 4 1 0 0 \n", "17 7 4 4 1 0 0 \n", "18 12 0 4 1 0 0 \n", "19 4 4 4 0 0 0 \n", "20 10 0 4 1 0 0 \n", "21 8 4 2 0 0 0 \n", "22 5 0 2 1 0 0 \n", "23 14 0 4 1 0 2042 \n", "24 13 4 4 0 0 0 \n", "25 13 0 4 1 0 0 \n", "26 3 3 4 1 0 0 \n", "27 0 0 1 1 0 0 \n", "28 4 1 4 1 0 0 \n", "29 3 0 4 1 0 0 \n", "... ... ... ... ... ... ... \n", "32531 0 1 1 0 0 0 \n", "32532 10 0 4 1 0 0 \n", "32533 4 0 1 1 0 0 \n", "32534 1 4 4 0 0 0 \n", "32535 11 3 2 1 0 0 \n", "32536 4 1 4 0 0 0 \n", "32537 3 1 2 1 0 0 \n", "32538 10 4 2 0 15020 0 \n", "32539 0 0 4 1 0 0 \n", "32540 1 3 4 0 0 0 \n", "32541 0 1 2 0 0 0 \n", "32542 0 0 4 1 0 0 \n", "32543 10 4 4 0 0 0 \n", "32544 8 1 3 0 0 0 \n", "32545 1 5 4 0 0 0 \n", "32546 13 1 4 0 0 0 \n", "32547 7 0 4 1 0 0 \n", "32548 10 1 4 1 1086 0 \n", "32549 1 2 4 0 0 0 \n", "32550 3 0 4 1 0 0 \n", "32551 6 0 0 1 0 0 \n", "32552 12 0 4 1 0 0 \n", "32553 13 1 1 1 0 0 \n", "32554 4 0 4 1 0 0 \n", "32555 11 1 4 1 0 0 \n", "32556 13 5 4 0 0 0 \n", "32557 7 0 4 1 0 0 \n", "32558 1 4 4 0 0 0 \n", "32559 1 3 4 1 0 0 \n", "32560 4 5 4 0 15024 0 \n", "\n", " Hours-per-week Country \n", "0 40 39 \n", "1 13 39 \n", "2 40 39 \n", "3 40 39 \n", "4 40 5 \n", "5 40 39 \n", "6 16 23 \n", "7 45 39 \n", "8 50 39 \n", "9 40 39 \n", "10 80 39 \n", "11 40 19 \n", "12 30 39 \n", "13 50 39 \n", "14 40 0 \n", "15 45 26 \n", "16 35 39 \n", "17 40 39 \n", "18 50 39 \n", "19 45 39 \n", "20 60 39 \n", "21 20 39 \n", "22 40 39 \n", "23 40 39 \n", "24 40 39 \n", "25 40 39 \n", "26 40 39 \n", "27 60 35 \n", "28 80 39 \n", "29 40 39 \n", "... ... ... \n", "32531 99 39 \n", "32532 60 39 \n", "32533 50 24 \n", "32534 39 39 \n", "32535 35 39 \n", "32536 55 39 \n", "32537 46 39 \n", "32538 45 39 \n", "32539 10 39 \n", "32540 40 39 \n", "32541 32 39 \n", "32542 25 39 \n", "32543 48 39 \n", "32544 30 39 \n", "32545 20 39 \n", "32546 40 39 \n", "32547 40 26 \n", "32548 60 39 \n", "32549 40 39 \n", "32550 50 39 \n", "32551 40 39 \n", "32552 45 39 \n", "32553 11 36 \n", "32554 40 39 \n", "32555 40 39 \n", "32556 38 39 \n", "32557 40 39 \n", "32558 40 39 \n", "32559 20 39 \n", "32560 40 39 \n", "\n", "[32561 rows x 14 columns]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Age int64\n", "Workclass int64\n", "fnlwgt int64\n", "Education int64\n", "Education-Num int64\n", "Marital-Status int64\n", "Occupation int64\n", "Relationship int64\n", "Race int64\n", "Sex int64\n", "Capital-Gain int64\n", "Capital-Loss int64\n", "Hours-per-week int64\n", "Country int64\n", "dtype: object" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.dtypes" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "data = data.astype(float)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "encoder = sklearn.preprocessing.OneHotEncoder(categorical_features=categorical_features)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/uditagupta/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_split.py:2179: FutureWarning: From version 0.21, test_size will always complement train_size unless both are specified.\n", " FutureWarning)\n" ] } ], "source": [ "np.random.seed(1)\n", "train, test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, train_size=0.80, random_state=0)\n" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AgeWorkclassfnlwgtEducationEducation-NumMarital-StatusOccupationRelationshipRaceSexCapital-GainCapital-LossHours-per-weekCountry
2227827.04.0177119.015.010.00.01.04.04.00.00.00.044.039.0
895027.04.0216481.09.013.04.010.01.04.00.00.00.040.039.0
783825.04.0256263.07.012.02.012.00.04.01.00.00.040.039.0
1650546.04.0147640.04.03.02.014.00.00.01.00.01902.040.039.0
1914045.04.0172822.01.07.00.014.01.04.01.00.02824.076.039.0
1231929.04.0203697.09.013.02.010.03.04.01.00.00.075.039.0
2858942.02.0226020.011.09.05.08.01.02.00.00.00.060.00.0
1000034.04.0120461.015.010.00.01.01.04.00.00.00.050.039.0
2853060.02.0101110.08.011.00.010.04.04.01.00.00.040.039.0
2423719.04.0102723.015.010.04.01.03.04.00.00.00.040.039.0
1222721.04.0204596.07.012.04.01.03.04.00.00.00.08.039.0
82131.04.0171871.012.014.04.010.01.04.00.00.00.046.039.0
2501253.04.0123011.012.014.02.04.00.04.01.00.00.045.039.0
2457935.00.0226379.011.09.02.00.02.04.00.00.00.025.039.0
2929551.02.0153908.012.014.00.04.01.04.00.00.00.040.039.0
476331.01.0206823.09.013.00.011.01.04.01.00.00.050.039.0
1037137.04.0164898.015.010.02.03.00.04.01.00.00.052.039.0
696429.04.087507.010.016.02.010.00.01.01.00.00.060.019.0
990768.00.0117542.011.09.02.00.00.04.01.01409.00.015.039.0
3059424.04.0176389.011.09.02.03.00.04.01.00.00.040.039.0
2863546.04.0268358.01.07.05.07.01.04.00.00.00.040.039.0
2881953.04.0126592.015.010.02.03.00.02.01.07688.00.040.039.0
2698054.04.0421561.015.010.02.010.00.04.01.00.00.040.039.0
2214821.04.0322931.015.010.04.03.01.04.01.00.00.055.039.0
2184056.04.0245215.010.016.02.010.00.04.01.00.00.050.039.0
1533435.06.097277.014.015.02.010.00.04.01.00.00.065.039.0
2040946.04.0219611.015.010.02.01.00.04.01.00.00.040.039.0
369536.06.036425.015.010.00.012.04.04.00.00.00.035.039.0
152320.04.0170800.015.010.04.05.03.04.00.00.00.040.039.0
1483329.04.0193152.011.09.04.06.01.04.01.00.01408.040.039.0
.............................................
2028250.01.065160.09.013.02.04.00.04.01.00.00.040.039.0
2198748.04.0395368.015.010.00.06.02.02.01.00.00.040.039.0
2807246.02.0175428.012.014.02.010.00.04.01.00.00.040.039.0
921133.00.0193172.08.011.02.00.03.04.00.07688.00.050.039.0
1179741.04.0204410.011.09.02.03.00.04.01.00.01485.044.039.0
116533.04.0178506.011.09.00.01.01.02.00.00.00.040.039.0
3172642.04.0147251.015.010.02.010.05.04.00.00.00.036.039.0
2664926.04.0190650.09.013.04.01.03.01.01.00.00.040.036.0
1691029.05.0241431.010.016.02.010.00.04.01.00.00.035.039.0
2856817.02.0195262.01.07.04.03.03.04.01.00.00.035.039.0
1666241.04.0156566.015.010.02.03.00.04.01.00.00.040.039.0
615620.04.0308239.015.010.04.08.03.04.01.00.00.016.039.0
216217.04.0195262.01.07.04.03.03.04.01.00.00.017.039.0
1108349.02.0193249.07.012.02.01.00.04.01.00.00.040.039.0
1297440.04.0167919.011.09.02.03.00.04.01.00.00.040.039.0
2521546.04.0197332.012.014.02.010.00.04.01.07688.00.046.039.0
489736.04.0398931.09.013.02.010.00.04.01.00.01485.050.039.0
855320.04.0211968.08.011.04.08.03.04.00.00.00.020.039.0
281741.04.0237321.03.02.02.07.00.04.01.00.00.040.026.0
1271447.07.054887.010.016.02.010.00.04.01.00.00.040.039.0
2108459.02.0105866.09.013.02.011.00.02.01.00.00.030.039.0
1092754.06.0114520.015.010.02.012.00.04.01.00.00.040.039.0
1145223.04.0188409.01.07.02.014.00.04.01.04508.00.025.039.0
2860933.04.0348152.011.09.02.03.00.04.01.00.00.040.039.0
2129739.04.090646.08.011.00.01.04.04.00.00.00.040.039.0
769437.05.039089.015.010.02.04.00.04.01.03103.00.050.039.0
1041028.06.0211032.01.07.04.03.01.04.01.00.00.040.026.0
104344.04.0167005.09.013.02.04.00.04.01.07688.00.060.039.0
3086053.04.0257940.011.09.02.07.00.04.01.02829.00.040.039.0
1246747.04.0120131.011.09.02.03.00.04.01.00.00.050.039.0
\n", "

6513 rows × 14 columns

\n", "
" ], "text/plain": [ " Age Workclass fnlwgt Education Education-Num Marital-Status \\\n", "22278 27.0 4.0 177119.0 15.0 10.0 0.0 \n", "8950 27.0 4.0 216481.0 9.0 13.0 4.0 \n", "7838 25.0 4.0 256263.0 7.0 12.0 2.0 \n", "16505 46.0 4.0 147640.0 4.0 3.0 2.0 \n", "19140 45.0 4.0 172822.0 1.0 7.0 0.0 \n", "12319 29.0 4.0 203697.0 9.0 13.0 2.0 \n", "28589 42.0 2.0 226020.0 11.0 9.0 5.0 \n", "10000 34.0 4.0 120461.0 15.0 10.0 0.0 \n", "28530 60.0 2.0 101110.0 8.0 11.0 0.0 \n", "24237 19.0 4.0 102723.0 15.0 10.0 4.0 \n", "12227 21.0 4.0 204596.0 7.0 12.0 4.0 \n", "821 31.0 4.0 171871.0 12.0 14.0 4.0 \n", "25012 53.0 4.0 123011.0 12.0 14.0 2.0 \n", "24579 35.0 0.0 226379.0 11.0 9.0 2.0 \n", "29295 51.0 2.0 153908.0 12.0 14.0 0.0 \n", "4763 31.0 1.0 206823.0 9.0 13.0 0.0 \n", "10371 37.0 4.0 164898.0 15.0 10.0 2.0 \n", "6964 29.0 4.0 87507.0 10.0 16.0 2.0 \n", "9907 68.0 0.0 117542.0 11.0 9.0 2.0 \n", "30594 24.0 4.0 176389.0 11.0 9.0 2.0 \n", "28635 46.0 4.0 268358.0 1.0 7.0 5.0 \n", "28819 53.0 4.0 126592.0 15.0 10.0 2.0 \n", "26980 54.0 4.0 421561.0 15.0 10.0 2.0 \n", "22148 21.0 4.0 322931.0 15.0 10.0 4.0 \n", "21840 56.0 4.0 245215.0 10.0 16.0 2.0 \n", "15334 35.0 6.0 97277.0 14.0 15.0 2.0 \n", "20409 46.0 4.0 219611.0 15.0 10.0 2.0 \n", "3695 36.0 6.0 36425.0 15.0 10.0 0.0 \n", "1523 20.0 4.0 170800.0 15.0 10.0 4.0 \n", "14833 29.0 4.0 193152.0 11.0 9.0 4.0 \n", "... ... ... ... ... ... ... \n", "20282 50.0 1.0 65160.0 9.0 13.0 2.0 \n", "21987 48.0 4.0 395368.0 15.0 10.0 0.0 \n", "28072 46.0 2.0 175428.0 12.0 14.0 2.0 \n", "9211 33.0 0.0 193172.0 8.0 11.0 2.0 \n", "11797 41.0 4.0 204410.0 11.0 9.0 2.0 \n", "1165 33.0 4.0 178506.0 11.0 9.0 0.0 \n", "31726 42.0 4.0 147251.0 15.0 10.0 2.0 \n", "26649 26.0 4.0 190650.0 9.0 13.0 4.0 \n", "16910 29.0 5.0 241431.0 10.0 16.0 2.0 \n", "28568 17.0 2.0 195262.0 1.0 7.0 4.0 \n", "16662 41.0 4.0 156566.0 15.0 10.0 2.0 \n", "6156 20.0 4.0 308239.0 15.0 10.0 4.0 \n", "2162 17.0 4.0 195262.0 1.0 7.0 4.0 \n", "11083 49.0 2.0 193249.0 7.0 12.0 2.0 \n", "12974 40.0 4.0 167919.0 11.0 9.0 2.0 \n", "25215 46.0 4.0 197332.0 12.0 14.0 2.0 \n", "4897 36.0 4.0 398931.0 9.0 13.0 2.0 \n", "8553 20.0 4.0 211968.0 8.0 11.0 4.0 \n", "2817 41.0 4.0 237321.0 3.0 2.0 2.0 \n", "12714 47.0 7.0 54887.0 10.0 16.0 2.0 \n", "21084 59.0 2.0 105866.0 9.0 13.0 2.0 \n", "10927 54.0 6.0 114520.0 15.0 10.0 2.0 \n", "11452 23.0 4.0 188409.0 1.0 7.0 2.0 \n", "28609 33.0 4.0 348152.0 11.0 9.0 2.0 \n", "21297 39.0 4.0 90646.0 8.0 11.0 0.0 \n", "7694 37.0 5.0 39089.0 15.0 10.0 2.0 \n", "10410 28.0 6.0 211032.0 1.0 7.0 4.0 \n", "1043 44.0 4.0 167005.0 9.0 13.0 2.0 \n", "30860 53.0 4.0 257940.0 11.0 9.0 2.0 \n", "12467 47.0 4.0 120131.0 11.0 9.0 2.0 \n", "\n", " Occupation Relationship Race Sex Capital-Gain Capital-Loss \\\n", "22278 1.0 4.0 4.0 0.0 0.0 0.0 \n", "8950 10.0 1.0 4.0 0.0 0.0 0.0 \n", "7838 12.0 0.0 4.0 1.0 0.0 0.0 \n", "16505 14.0 0.0 0.0 1.0 0.0 1902.0 \n", "19140 14.0 1.0 4.0 1.0 0.0 2824.0 \n", "12319 10.0 3.0 4.0 1.0 0.0 0.0 \n", "28589 8.0 1.0 2.0 0.0 0.0 0.0 \n", "10000 1.0 1.0 4.0 0.0 0.0 0.0 \n", "28530 10.0 4.0 4.0 1.0 0.0 0.0 \n", "24237 1.0 3.0 4.0 0.0 0.0 0.0 \n", "12227 1.0 3.0 4.0 0.0 0.0 0.0 \n", "821 10.0 1.0 4.0 0.0 0.0 0.0 \n", "25012 4.0 0.0 4.0 1.0 0.0 0.0 \n", "24579 0.0 2.0 4.0 0.0 0.0 0.0 \n", "29295 4.0 1.0 4.0 0.0 0.0 0.0 \n", "4763 11.0 1.0 4.0 1.0 0.0 0.0 \n", "10371 3.0 0.0 4.0 1.0 0.0 0.0 \n", "6964 10.0 0.0 1.0 1.0 0.0 0.0 \n", "9907 0.0 0.0 4.0 1.0 1409.0 0.0 \n", "30594 3.0 0.0 4.0 1.0 0.0 0.0 \n", "28635 7.0 1.0 4.0 0.0 0.0 0.0 \n", "28819 3.0 0.0 2.0 1.0 7688.0 0.0 \n", "26980 10.0 0.0 4.0 1.0 0.0 0.0 \n", "22148 3.0 1.0 4.0 1.0 0.0 0.0 \n", "21840 10.0 0.0 4.0 1.0 0.0 0.0 \n", "15334 10.0 0.0 4.0 1.0 0.0 0.0 \n", "20409 1.0 0.0 4.0 1.0 0.0 0.0 \n", "3695 12.0 4.0 4.0 0.0 0.0 0.0 \n", "1523 5.0 3.0 4.0 0.0 0.0 0.0 \n", "14833 6.0 1.0 4.0 1.0 0.0 1408.0 \n", "... ... ... ... ... ... ... \n", "20282 4.0 0.0 4.0 1.0 0.0 0.0 \n", "21987 6.0 2.0 2.0 1.0 0.0 0.0 \n", "28072 10.0 0.0 4.0 1.0 0.0 0.0 \n", "9211 0.0 3.0 4.0 0.0 7688.0 0.0 \n", "11797 3.0 0.0 4.0 1.0 0.0 1485.0 \n", "1165 1.0 1.0 2.0 0.0 0.0 0.0 \n", "31726 10.0 5.0 4.0 0.0 0.0 0.0 \n", "26649 1.0 3.0 1.0 1.0 0.0 0.0 \n", "16910 10.0 0.0 4.0 1.0 0.0 0.0 \n", "28568 3.0 3.0 4.0 1.0 0.0 0.0 \n", "16662 3.0 0.0 4.0 1.0 0.0 0.0 \n", "6156 8.0 3.0 4.0 1.0 0.0 0.0 \n", "2162 3.0 3.0 4.0 1.0 0.0 0.0 \n", "11083 1.0 0.0 4.0 1.0 0.0 0.0 \n", "12974 3.0 0.0 4.0 1.0 0.0 0.0 \n", "25215 10.0 0.0 4.0 1.0 7688.0 0.0 \n", "4897 10.0 0.0 4.0 1.0 0.0 1485.0 \n", "8553 8.0 3.0 4.0 0.0 0.0 0.0 \n", "2817 7.0 0.0 4.0 1.0 0.0 0.0 \n", "12714 10.0 0.0 4.0 1.0 0.0 0.0 \n", "21084 11.0 0.0 2.0 1.0 0.0 0.0 \n", "10927 12.0 0.0 4.0 1.0 0.0 0.0 \n", "11452 14.0 0.0 4.0 1.0 4508.0 0.0 \n", "28609 3.0 0.0 4.0 1.0 0.0 0.0 \n", "21297 1.0 4.0 4.0 0.0 0.0 0.0 \n", "7694 4.0 0.0 4.0 1.0 3103.0 0.0 \n", "10410 3.0 1.0 4.0 1.0 0.0 0.0 \n", "1043 4.0 0.0 4.0 1.0 7688.0 0.0 \n", "30860 7.0 0.0 4.0 1.0 2829.0 0.0 \n", "12467 3.0 0.0 4.0 1.0 0.0 0.0 \n", "\n", " Hours-per-week Country \n", "22278 44.0 39.0 \n", "8950 40.0 39.0 \n", "7838 40.0 39.0 \n", "16505 40.0 39.0 \n", "19140 76.0 39.0 \n", "12319 75.0 39.0 \n", "28589 60.0 0.0 \n", "10000 50.0 39.0 \n", "28530 40.0 39.0 \n", "24237 40.0 39.0 \n", "12227 8.0 39.0 \n", "821 46.0 39.0 \n", "25012 45.0 39.0 \n", "24579 25.0 39.0 \n", "29295 40.0 39.0 \n", "4763 50.0 39.0 \n", "10371 52.0 39.0 \n", "6964 60.0 19.0 \n", "9907 15.0 39.0 \n", "30594 40.0 39.0 \n", "28635 40.0 39.0 \n", "28819 40.0 39.0 \n", "26980 40.0 39.0 \n", "22148 55.0 39.0 \n", "21840 50.0 39.0 \n", "15334 65.0 39.0 \n", "20409 40.0 39.0 \n", "3695 35.0 39.0 \n", "1523 40.0 39.0 \n", "14833 40.0 39.0 \n", "... ... ... \n", "20282 40.0 39.0 \n", "21987 40.0 39.0 \n", "28072 40.0 39.0 \n", "9211 50.0 39.0 \n", "11797 44.0 39.0 \n", "1165 40.0 39.0 \n", "31726 36.0 39.0 \n", "26649 40.0 36.0 \n", "16910 35.0 39.0 \n", "28568 35.0 39.0 \n", "16662 40.0 39.0 \n", "6156 16.0 39.0 \n", "2162 17.0 39.0 \n", "11083 40.0 39.0 \n", "12974 40.0 39.0 \n", "25215 46.0 39.0 \n", "4897 50.0 39.0 \n", "8553 20.0 39.0 \n", "2817 40.0 26.0 \n", "12714 40.0 39.0 \n", "21084 30.0 39.0 \n", "10927 40.0 39.0 \n", "11452 25.0 39.0 \n", "28609 40.0 39.0 \n", "21297 40.0 39.0 \n", "7694 50.0 39.0 \n", "10410 40.0 26.0 \n", "1043 60.0 39.0 \n", "30860 40.0 39.0 \n", "12467 50.0 39.0 \n", "\n", "[6513 rows x 14 columns]" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0, 0, 0, ..., 1, 0, 1])" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "labels_test" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(6513,)" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "labels_test.shape" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(26048, 14)" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train.shape" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/uditagupta/anaconda3/lib/python3.7/site-packages/sklearn/preprocessing/_encoders.py:368: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.\n", "If you want the future behaviour and silence this warning, you can specify \"categories='auto'\".\n", "In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.\n", " warnings.warn(msg, FutureWarning)\n", "/Users/uditagupta/anaconda3/lib/python3.7/site-packages/sklearn/preprocessing/_encoders.py:390: DeprecationWarning: The 'categorical_features' keyword is deprecated in version 0.20 and will be removed in 0.22. You can use the ColumnTransformer instead.\n", " \"use the ColumnTransformer instead.\", DeprecationWarning)\n" ] } ], "source": [ "encoder.fit(data)\n", "encoded_train = encoder.transform(train)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This time, we use gradient boosted trees as the model, using the [xgboost](https://github.com/dmlc/xgboost) package." ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n", " colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,\n", " max_depth=5, min_child_weight=1, missing=None, n_estimators=300,\n", " n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,\n", " reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,\n", " silent=True, subsample=1)" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import xgboost\n", "gbtree = xgboost.XGBClassifier(n_estimators=300, max_depth=5)\n", "gbtree.fit(encoded_train, labels_train)\n" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.869031168432366" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sklearn.metrics.accuracy_score(labels_test, gbtree.predict(encoder.transform(test)))" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "predict_fn = lambda x: gbtree.predict_proba(encoder.transform(x)).astype(float)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Explaining predictions" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/uditagupta/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:1: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n", " \"\"\"Entry point for launching an IPython kernel.\n" ] } ], "source": [ "explainer = lime.lime_tabular.LimeTabularExplainer(train.as_matrix() ,feature_names = feature_names,class_names=class_names,\n", " categorical_features=categorical_features, \n", " categorical_names=categorical_names)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "classifier_fn – classifier prediction probability function, which takes a numpy array and outputs prediction probabilities. For ScikitClassifiers , this is classifier.predict_proba." ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/uditagupta/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:3: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n", " This is separate from the ipykernel package so we can avoid doing imports until\n" ] } ], "source": [ "np.random.seed(1)\n", "i = 1653\n", "exp = explainer.explain_instance((test.as_matrix())[i], predict_fn, num_features=5)\n" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Actual class 0\n" ] } ], "source": [ "print('Actual class', labels_test[i])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Visualizing explanations" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAegAAAEICAYAAACUFGeOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3XmYHVW97vHvC2EUAsgkgxBkkBsQo2kZVBQQMXqOIooHcqIGRVGRi6ioiCiK6AFHDkePGhmVKEMUjIgCAhEEIulACAQZIoZLBCFKZJAZ3vtHrZadzd7dO50eKun38zz7Se21Vq36repO/2qtqt4t20RERES9rDTcAURERMTzJUFHRETUUBJ0REREDSVBR0RE1FASdERERA0lQUdERNRQEnTEckqSJW0zxMc8Q9LxQ3nMhmPPk7THIPS7saQrJT0s6ZsD3X/TsRZI2nswjxErjiToiAGSH74Dp9WFgO0dbM8YhMMdAvwNGG37k4PQ/3JF0kGSnpH0SMNrj4b6MZKukPSopFsbv+clfVHSWQ3vNyttTpakIR7Kci8JOiJGui2BW9yPT22SNGoQ4hkUkl4oaZUOm19re62G14yGup8CNwDrA58DpknasMXxtgSuBKbbPrw/53ekS4KOGAKSPihpvqQHJE2XtGlD3Q6SLi1190k6upTvLOlaSf+QdK+k70hatcPjrSPp1LLfXyQdL2nlUvc9SdMa2p4o6TJV9pC0UNLRkv5WVgUmtTnGepIulLRI0uKyvXlD/QxJX5Z0dVk+vkTSBg3150n6q6QHyxLzDqX8EGAS8Okye/tlKf/XCoWk1SSdJOme8jpJ0mqlrmcMn5R0fzkH72szhjOAyQ3H2rvDvj8j6a/A6W36/aCkP5Zx3yLplS3atP36lq/Ft0v8D0qaK2nHUveW0ufD5Wt7ZOvvgud5I7BQ0jd7+lpakrYDXgkca/sx2z8DbgLe2dRua6rk/BPbn+7PsSIJOmLQSdoL+C/gP4BNgLuAs0vd2sBvgd8AmwLbAJeVXZ8BPg5sAOwGvAE4tMPDngk8Xfp7BbAP8IFS90lgJ1VLmbsDBwOTG2Y4LyrH3IwqeU2R9NIWx1iJKkFtCWwBPAZ8p6nNfwLvAzYCVgUak8mvgW1L3fXAVADbU8r218rs7a0tjv05YFdgHPByYGfgmIb6FwHrlDEcDHxX0nrNndg+qOlYv+2w7xeWcR/S3KekdwFfBN4LjAbeBvy9xRh6+/ruA7wO2A5YFzigoY9TgQ/ZXhvYEbi8Rd/PY/uccoxngUskzZJ0aKvzAryiXKDdLunzem6lYAfgTtsPN7S9sZT3eAlVcv6B7c93Elu0YTuvvPIagBewANi7RfmpVAmg5/1awFPAGGAicEOH/R8BnN/w3sA2LdptDDwBrNFQNhG4ouH9zsADVBcLExvK96BK7C9oKDsX+HzZPgM4vk1844DFDe9nAMc0vD8U+E2bfdct41mn3XEazy/wJ+AtDXVvAhY0jOExYFRD/f3Arm2OvcSxOuj7SWD1Xr5OFwMfW5rvkeavL7AXcDvVhcJKTe3+H/Ahqnvm/f1eXRl4S/na/oPqgnF0qXsJsBXVBdjLgFuAz5a69wAzm/r6CnBG2f4i8FDpc+vB/P82El6ZQUcMvk2pEiEAth+hmg1tBryYKiE8j6TtyrLxXyU9BHyVarbVly2BVYB7y/LpP4AfUM1Ue2K4DrgTENUP6UaLbf+z4f1dZQzN8a0p6QeS7irxXQms27OUXvy1YftRqosTJK0s6QRJfyr7LihtOhkfNJ3TFjH+3fbTrY49AH0vsv14L/u3/Zo26u3ra/tyqtWI7wL3SZoiaXTZ9Z1UyfUuSb+TtFuH4/oX288AN1PNfh+gmomvUurutP1n28/avgk4Dti/7PoI1apAo9FA44x6OnAacLmq+9DRT0nQEYPvHqqkCYCkF1A9YPMX4G5g6zb7fQ+4FdjW9mjgaKqE2pe7qWbQG9het7xG2/7XMqSkjwKrldia7xGuV2LssUVp1+yTwEuBXUp8r+vpvoMY/xPYF9ibail6TNO+fT1QtMQ57SXG/uir775i6+1r2qjXr6/tk22Pp1o+3g74VCmfZXtfqguuC3j+BVZbktYqtzYup7qtsBlwgO0dbbdahodqvD1xzQNeUm7N9Hh5KX9uB/sTwIVUSXqzTuOLJSVBRwysVSSt3vAaBfwEeJ+kceVho68Cf7C9gOqH2IskHVEeTlpb0i6lr7WplgsfkbQ98JFOArB9L3AJ8E1JoyWtJGlrSa+Hfz3oczzwbqoly09LGtfUzZckrVruUf87cF6LQ61NtZT8D0kvBI7t7BT9a98nqFYS1qQ6J43uo1pqbeenwDGSNlT14NkXgLN6ab80lrXvU4AjJY0vD3tt02Ym2fbrK+lVknZR9dT1P4HHgWfK12SSpHVsP1X2f6aToCRNoLrQOIBqRWUz24fantXU7s2SNi7b2wOfB34BYPt2YA5wbPn+3g/YCfhZi0MeRnV//LKe/mLpJEFHDKyLqJJWz+uLti+j+iH3M+BeqtnVgQCuHrZ5I/BWquXgO4A9S19HUs00HwZ+CJyzFHG8l+qhrFuAxcA0YJNywXAWcKLtG23fQTVz+3G5eKDEsZjqh/lU4MO2b21xjJOANah+h3gm1YNunfoR1dLxX0qMM5vqTwXGliX6C1rsfzzQDcyleor4+lI2EJapb9vnUd2X/QnV1+4CqofKmvX29R1dyhZTnae/A98ode8BFpRl8Q9TXWh14jZge9tvtn2O7SfatHsDMFfSP6m+n3/OkhdQBwJdJbYTgP1tL2ruxNVN6Q8B1wG/VcMT/NEZlRv7ERGo+kCKs2xv3lfbiBhcmUFHRETUUBJ0REREDWWJOyIiooYyg46IiKih5eaD3qN+NthgA48ZM2a4w4iIWK7Mnj37b7af9wdGmiVBR7+NGTOG7u7u4Q4jImK5IumuvltliTsiIqKWkqAjIiJqKAk6IiKihpKgIyIiaigJOiIiooaSoCMiImooCToiIqKGkqAjIiJqKB9UEsNHGu4IIqLuRvDfi8gMOiIiooaSoCMiImooCToiIqKGkqAjIiJqKAk6IiKihpKgIyIiaigJOiIiooaSoCMiImooCToiIqKGkqAjIiJqKAk6IiKihpKgIyIiaigJOiIiooaGNUFLepGksyX9SdItki6StF0/+zpF0tiyfXSH+yyQtEGL8rUkfa/EdYOk2ZI+2EF/1yx95ANH0nhJN0maL+lk6fl/LkqVk0ubuZJe2VA3WdId5TV5aKOPiIhGw5agS/I4H5hhe2vbY4GjgY3705/tD9i+pbztKEH34hRgMbCt7VcAE4AXdhDDq5fxuABI6vNYbXwPOATYtrwmtGjz5ob6Q8o+Pcc8FtgF2Bk4VtJ6/YwjIiKW0XDOoPcEnrL9/Z4C23NsX1VmsJdJur7MCPcFkDRG0q2Sziyzv2mS1ix1MyR1SToBWEPSHElTS90FZRY8T9IhvQUlaWuqBHWM7WdLXItsn1jqW8ZW6h4p/+5R4plW4p3aajbbdNzRkj4k6TrgyKU9mZI2AUbbvta2gR8Bb2/RdF/gR67MBNYt+74JuNT2A7YXA5fSOsFHRMQQGDWMx94RmN2m7nFgP9sPlSXomZKml7qXAgfbvlrSacChwDd6drR9lKTDbI9r6O/9th+QtAYwS9LPbP+9zbF3AG7sSc6dxlaSYqNXlL7uAa4GXgP8vrkzSa8FPlDqfwa82/btpW5P4NstYni0xWx9M2Bhw/uFpazZZsDdLdq1K2+O9xCqmTdbbLFFi+4jImIgDGeC7o2Ar0p6HfAsVaLoWfq+2/bVZfss4HAaEnQbh0var2y/mGp5t12CXjIQ6XPAu4CNbG/aS2x/bdr1OtsLSx9zgDE0JWhJJwPvAT5KddHxTGO97SuAxguNXkNtUdZ80dBbu472tz0FmALQ1dXVqv+IiBgAw5mg5wH7t6mbBGwIjLf9lKQFwOqlrjkp9JokJO0B7A3sZvtRSTMa+mrlFuDlklay/aztrwBf6Vm+7iO2Rk80bD9D63P9LeAhqnu/EySdTnVP3iX2pZlBLwQ2b3i/OdXsvdlCqouU5nYLgT2ayme02D8iIobAcN6DvhxYrfHpaEmvkvR6YB3g/pIA9wS2bNhvC0m7le2JtFg2Bp6StErZXgdYXJLz9sCuvQVlez7QDRwvaeUS1+o8N8PsLbalYnuB7WOAscDZwGHArZImlforbI9r8Xrew2i27wUelrRrud/9XuAXLQ47HXhveZp7V+DBsu/FwD6S1isPh+1TyiIiYhgM2wzatsuy80mSjqK6t7sAOIJqdv1LSd3AHODWhl3/CEyW9APgDspTyE2mAHMlXQ+8H/iwpLnAbcDMDsL7APB1YL6kB4DHgM+Uuqm9xNYvZWn7IuAiSRsB/fpVM+AjwBnAGsCvywtJHy7H+X45zluA+cCjwPtK3QOSvgzMKn0dZ/uBfsYRERHLSM9/tqm+JI0BLrS94zCHElT3oLu7u/vfQe8PtkdEwHKUozolabbtrr7a5ZPEIiIiaqiuT3G3ZHsB1a9nRURErNAyg46IiKihJOiIiIgaSoKOiIiooSToiIiIGkqCjoiIqKEk6IiIiBpKgo6IiKihJOiIiIgaWq4+qCRWMCvgR/hFRAyUzKAjIiJqKAk6IiKihpKgIyIiaigJOiIiooaSoCMiImooCToiIqKGkqAjIiJqKL8HHRGxIpCGO4LBMYI/LyEz6IiIiBpKgo6IiKihJOiIiIgaSoKOiIiooSToiIiIGkqCjoiIqKEk6IiIiBpKgo6IiKihJOiIiIgaSoKOiIiooSToiIiIGkqCjoiIqKEk6IiIiBoa9gQt6UWSzpb0J0m3SLpI0nb97OsUSWPL9tEd7rNA0gadlteZpAmSbpM0X9JRbdqsJumc0uYPksY01H22lN8m6U1DFXdERDzfsCZoSQLOB2bY3tr2WOBoYOP+9Gf7A7ZvKW87StB1JGm9fuyzMvBd4M3AWGBiz8VKk4OBxba3Ab4NnFj2HwscCOwATAD+t/QZERHDYLhn0HsCT9n+fk+B7Tm2r5K0lqTLJF0v6SZJ+wJIGiPpVklnSporaZqkNUvdDEldkk4A1pA0R9LUUneBpNmS5kk6pD/BSnph6WeupJmSdirlry/HmiPpBklrS9pE0pWl7GZJu/fR90aSjpR0M3BAP8LbGZhv+07bTwJnA/u2aLcvcGbZnga8oVwo7QucbfsJ238G5pc+IyJiGAx3gt4RmN2m7nFgP9uvpErk3yyJBOClwBTbOwEPAYc27mj7KOAx2+NsTyrF77c9HugCDpe0fj/i/RJwQznu0cCPSvmRwEdtjwN2Bx4D/hO4uJS9HJjT3Jmklcqy9DRgBrA6MKHngkXSpIbE3/ia1iK2zYC7G94vLGVt29l+GngQWL/T/SUdIqlbUveiRYtadB8REQNh1HAH0AsBX5X0OuBZqmTRs/R9t+2ry/ZZwOHAN/ro73BJ+5XtFwPbAn9fypheC7wTwPblktaXtA5wNfCtMlv/ue2FkmYBp0laBbjA9vMSNHAB8ErgA1TJ3I2VtqcCUzuMTS3K3KKsXbuO9rc9BZgC0NXV1ar/iIgYAMM9g54HjG9TNwnYEBhfZqH3Uc0w4fmJo9dEIWkPYG9gN9svB25o6GtptExitk+gSrJrADMlbW/7SuB1wF+AH0t6b4t9P0u1zPw/wHclvaop7qWZQS+kuvDosTlwT2/tJI0C1gEeWIr9IyJiCAx3gr4cWE3SB3sKJL1K0uupEsf9tp+StCewZcN+W0jarWxPBH7fou+nyuyV0tdi249K2h7YtZ/xXkl14dCT9P9m+yFJW9u+yfaJQDewvaQtS/w/BE6lmikvwfY820dQPZj1O+Ar5f72PqV+almmb37t3yK2WcC2kraStCrVA1/TW7SbDkwu2/sDl5eZ+3TgwPKU91ZUKwzX9essRUTEMhvWJW7bLsvOJ5VfC3ocWAAcQTW7/qWkbqr7t7c27PpHYLKkHwB3AN9r0f0UYK6k64H3Ax+WNBe4DZjZYYhzJT1bts8FvgicXvp5lOcS3RHlIuIZ4Bbg11QJ8lOSngIeAVrNoHvOw5PAOcA5JbEv9a932X5a0mHAxcDKwGm25wFIOg7otj2d6mLhx5LmU82cDyz7z5N0bon/aap76s8sbRwRETEw1HTbs/bK7+1eaHvHYQ5lxOvq6nJ3d/dwhxERAGp1B24FsJzlqE5Imm27q692w73EHRERES3U+SnulmwvoPr1rIiIiBVWZtARERE1lAQdERFRQ0nQERERNZQEHRERUUNJ0BERETWUBB0REVFDSdARERE1lAQdERFRQ8vdB5VEREQLK+BHYo50mUFHRETUUBJ0REREDSVBR0RE1FASdERERA0lQUdERNRQEnREREQNJUFHRETUUBJ0REREDeWDSiIiVjTScEcwcEbwB7BkBh0REVFDSdARERE1lAQdERFRQ0nQERERNZQEHRERUUNJ0BERETWUBB0REVFDSdARERE1lAQdERFRQ0nQERERNZQEHRERUUNJ0BERETXUZ4KW9EjT+4MkfWfwQlq+SJohqWsYjruypBskXdhQtpWkP0i6Q9I5klZts+9nJc2XdJukNzWUTyhl8yUdNRTjiIiI1oZtBi1p5SE4Rq3/Wpek9ZZh948Bf2wqOxH4tu1tgcXAwS2OORY4ENgBmAD8b0n2KwPfBd4MjAUmlrYRETEMlilBS9pS0mWS5pZ/tyjlZ0jav6HdI+XfPSRdIeknwE2SXiDpV5JulHSzpANaHGOGpJMkXVPa7FzKXyDpNEmzykxy31J+kKTzJP0SuKSpr09LOrxsf1vS5WX7DZLOKtv7SLpW0vWln7VK+XhJv5M0W9LFkjZp6nslSWdKOr6Pc7aRpCMl3Qw8b7ydkLQ58G/AKQ1lAvYCppWiM4G3t9h9X+Bs20/Y/jMwH9i5vObbvtP2k8DZpW1ERAyDThL0GpLm9LyA4xrqvgP8yPZOwFTg5A762xn4nO2xVDO4e2y/3PaOwG/a7PMC268GDgVOK2WfAy63/SpgT+Drkl5Q6nYDJtveq6mfK4Hdy3YXsJakVYDXAldJ2gA4Btjb9iuBbuATpc3/APvbHl9i+EpDv6PK+G+3fUxz8CV5T5A0DZgBrA5MsP39Uj+p8Rw3vKY191WcBHwaeLahbH3gH7afLu8XApu12Hcz4O6G9z3t2pU3j+UQSd2SuhctWtQmvIiIWFadLAE/ZntczxtJB1ElN6gS4TvK9o+Br3XQ33Vl5gZwE/ANSScCF9q+qs0+PwWwfaWk0ZLWBfYB3ibpyNJmdWCLsn2p7Qda9DMbGC9pbeAJ4Poylt2Bw4FdqZZ3r64mpKwKXAu8FNgRuLSUrwzc29DvD4BzbTcm7UYXAK8EPgBcbC/5F8htT6VK8H2S9O/A/bZnS9qjsapF81Z/6bxdu1YXa8/b3/YUYApAV1fXyP1L6hERg2yg79H2/MB+mvIDvyy9Nj6s9M9/NbZvlzQeeAvwX5Iusd04Q2/ut/G9gHfavq2xQtIujcdYYif7KUkLgPcB1wBzqWbfW1Pdz92aKrlPbOrzZcA827u1Gfc1wJ6Svmn78Rb1nwU+SDULv1TS6bZnNfQ/CfhUi/3m296/qew1VBcmb6G6KBldluffA6wraVSZRW8O3NOiz4XAixveN7ZrVx4REUNsWR8Su4bqgSOAScDvy/YCYHzZ3hdYpdXOkjYFHrV9FvANqllmKweU9q8FHrT9IHAx8H/LBQCSXtFhzFcCR5Z/rwI+DMwps9qZwGskbVP6XFPSdsBtwIaSdivlq0jaoaHPU4GLgPNaPZhme57tI6gezPod8JVy336fUj/V9rgWr+bkjO3P2t7c9hiqc3+57XeX+K8AevaZDPyixfinAwdKWk3SVsC2wHXALGBbVU+Cr1r6nt7ZKY2IiIG2rAn6cOB9kuZSzeA+Vsp/CLxe0nVA2xkt8DLgunJv+3NAuwesFku6Bvg+zz2Z/GWqxD+3PHD15Q5jvgrYBLjW9n3A46UM24uAg4CfljHNBLYvD03tD5wo6UZgDvDqxk5tf4tqyfzHklqeV9tP2j7H9j7AW4G/dxhzpz5Ddc98PtU96VMBJL1N0nElhnnAucAtVPf8P2r7mTLrPozqwuePVEv28wY4voiI6JCabofWjqQZwJG2u4c7llhSV1eXu7vzZYmoHbV61GQ5VfMc1R+SZtvu8/Mz8kliERERNVTrD/IAsL3HcMcQEREx1DKDjoiIqKEk6IiIiBpKgo6IiKihJOiIiIgaSoKOiIiooSToiIiIGkqCjoiIqKEk6IiIiBpKgo6IiKih2n+SWERELKUV8POrR6LMoCMiImooCToiIqKGkqAjIiJqKAk6IiKihpKgIyIiaigJOiIiooaSoCMiImoovwcdEbGikYY7goEzgn+nOzPoiIiIGkqCjoiIqKEk6IiIiBpKgo6IiKihJOiIiIgaSoKOiIiooSToiIiIGkqCjoiIqKEk6IiIiBpKgo6IiKihJOiIiIgaSoKOiIiooaVO0JIs6ccN70dJWiTpwqXsZ1NJ08r2OElv6WCfPVodR9KakqZKuknSzZJ+L2ktSetKOrSDfjtqN1AkPSNpjqR5km6U9AlJK5W6LkknD1UsvcTY8lxHRMTQ6M8M+p/AjpLWKO/fCPxlaTqQNMr2Pbb3L0XjgD4TdC8+Btxn+2W2dwQOBp4C1gU6Sbydthsoj9keZ3sHqvP3FuBYANvdtg9f1gNIyl8qi4hYjvV3ifvXwL+V7YnAT3sqJO0s6RpJN5R/X1rKD5J0nqRfApdIGlNmu6sCxwEHlFnlAe366MUmNFwk2L7N9hPACcDWpd+vl1n1ZZKuL7Ptfcsuze2WmD1K+o6kg8r2CZJukTRX0jf6ef7+xfb9wCHAYarsIelCSStJWiBp3YY45kvaWNKWZRxzy79blPozJH1L0hXAiWW8p5exzpX0ztJuH0nXlvNwnqS1SvkESbdK+j3wjmUdW0RE9F9/Z1lnA18oSWwn4DRg91J3K/A6209L2hv4KvDOUrcbsJPtBySNAbD9pKQvAF22DwOQNLqXPlo5jSrp7w9cBpxp+w7gKGBH2+NKv6OA/Ww/JGkDYKak6S3a7dHqIJJeCOwHbG/bPclT0iTgUy12md+wStCW7TvLEvdGDWXPSvpFOd7pknYBFti+r1zk/Mj2mZLeD5wMvL3suh2wt+1nJJ0IPGj7ZSXO9cq4jylt/inpM8AnJH0N+CGwFzAfOKfNOTiE6oKCLbbYoq+hRUREP/UrQdueWxLsROCipup1gDMlbQsYWKWh7lLbD3RwiN76aBXPHEkvAfYB9gZmSdoNeKypqYCvSnod8CywGbBxB/H0eAh4HDhF0q+AC8vxpwJTl6KfVlr9hfVzgC8ApwMH8lzS3I3nZrg/Br7WsM95tp8p23uX/ShxLpb078BY4GpVf9R9VeBaYHvgz+XCBklnURJxI9tTgCkAXV1dI/cvqUdEDLJleYp7OvANGpa3iy8DV5R7wW8FVm+o+2eHfffWR0u2H7H9c9uHAmfR+p72JGBDYHyZLd/Xpu+nWfLcrF6O8TSwM/Azqhnrb6CaQZfl8ebXtE4GWy4ungHub6q6FthG0obleD9v00Vjomw8x2qq6ym7tNwDH2d7rO2DW/QTERHDaFkS9GnAcbZvaipfh+fuBx/UYV8PA2v3tw9Jr5G0XtlelWqGeFebfu+3/ZSkPYEt2xz/LmCspNUkrQO8ofS9FrCO7YuAI6gebsP21IaE1/jqc3m7JN/vA9+xvUSCLO/PB74F/NH230vVNTw3M54E/L5N95cAhzUcaz1gJvAaSduUsjUlbUd1a2IrSVuX5hP7ij0iIgZPvxO07YW2/7tF1deA/5J0NbByh91dQZUQ50g6oB99bA38TtJNwA1AN/CzktCuLg+jfZ1qGbpLUjdVYru1jGWJdrbvBs4F5pZ9bijHWRu4UNJc4HfAxzscX7M1yljnAb+lSqRfatP2HODdLHlP+HDgfSWO91A9xd7K8cB6ZVw3AnvaXkR10fPTsv9Mqnvqj1Mtaf+qPCR2Vz/HFhERA0BNk7aIjnV1dbm7u3u4w4iIZmr1SMtyagXMUZJm2+7qq10+SSwiIqKGkqAjIiJqKAk6IiKihpKgIyIiaigJOiIiooaSoCMiImooCToiIqKGkqAjIiJqKAk6IiKihpKgIyIiaigJOiIioob69fegIyKixlbAz68eiTKDjoiIqKEk6IiIiBpKgo6IiKihJOiIiIgaSoKOiIiooSToiIiIGkqCjoiIqKH8HnRERNSWvqThDqElHzv4v2ueGXREREQNJUFHRETUUBJ0REREDSVBR0RE1FASdERERA0lQUdERNRQEnREREQNJUFHRETUUBJ0REREDSVBR0RE1FASdERERA0lQUdERNTQoCZoSatLuk7SjZLmSfpSQ91VkuaU1z2SLmjTx2RJd5TX5Iby8ZJukjRf0smSBuQT1SV9XNLjktYZiP4Gk6Tpkm5ueH9OwzldIGlOm/0mSLqtnLujGsq3kvSHcq7PkbTqUIwjIiKeb7Bn0E8Ae9l+OTAOmCBpVwDbu9seZ3sccC3w8+adJb0QOBbYBdgZOFbSeqX6e8AhwLblNaHToEq/7UwEZgH7ddpff/URR1/7vgN4pLHM9gEN5/RntD6nKwPfBd4MjAUmShpbqk8Evm17W2AxcHB/44uIiGUzqAnalZ4kskp5LfE3uiStDewFtJpBvwm41PYDthcDl1Il+U2A0bavtW3gR8Dbe4ulzOYnSboCOLlNm62BtYBjqBJ1T/maks6VNLfMLP8gqavU7SPpWknXSzpP0lp9xDFa0ockXQcc2VvbXvpYC/gEcHybegH/Afy0RfXOwHzbd9p+Ejgb2LfssxcwrbQ7kz7OaUREDJ5BvwctaeWy1Ho/VbL9Q1OT/YDLbD/UYvfNgLsb3i8sZZuV7ebyVsd/uaT/AW4GdgOOtP3uNuFOpEpqVwEvlbRRKT8UWGx7J+DLwPjS9wZUyXxv268EuqkSZ6s4XivpDGA2sBXwbttHl7o9G5amG1/XtInzy8A3gUfbb76XAAAHKElEQVTb1O8O3Gf7jhZ17c7p+sA/bD/dVN48jkMkdUvqXrRoUZvDR0TEsho12Aew/QwwTtK6wPmSdrR9c0OTicApbXZvdV/ZvZQvubP0CeCrwKeoEvMTfYR7ILCf7Wcl/Rx4F9Vy8GuB/y7juVnS3NJ+V6pl4qvLLfBVqZbrm+M4GXgP8FHg4HJOngvcvoLqFkCfJI0DtrH9cUlj2jTrudBo2UWLso7Pqe0pwBSArq6uwf+L5RERI9SgJ+getv8haQbVveKbASStT7Xk2u5+70Jgj4b3mwMzSvnmTeX3tNj/LKpl9Q8Be0o6Hfh1wyzxXyTtRHUv+9KGZHsnVYJu9wCaqFYFJrap7/Et4CGq++kTShwzyvI8kvYEvt1iv0dtv7qpbDdgvKQFVF+/jSTNsL1H6WsU8A7KLL+FhcCLG973nLu/AetKGlXOT7tzGhERQ2Cwn+LesMyckbQGsDdwa0OTdwEX2n68TRcXA/tIWq88HLYPcLHte4GHJe1a7p2+F/hF886277d9ou0dgZOA/YHby8y62UTgi7bHlNemwGaStgR+T3VPl/JA1cvKPjOB10japtStKWm7FnEssH0M1Wz7bOAw4FZJk0r9FT0PdzW9mpMztr9ne1PbY6hm9rf3JOdib+BW2wub9y1mAduWJ7ZXpVo1mF4uFq4o5whgMi3OaUREDI3Bvge9CXBFWRKeRTXbvLCh/kCalmIldUk6BcD2A1T3W2eV13GlDOAjVEvj84E/Ab/uLRDbV9qeTLWUPLdFkwOB85vKzi/l/wtsWMbxmbL/g7YXAQcBPy11M4Hte4nhGdsX2X4n1X3iu3qLuZ9andNNJV1UYnia6gLhYuCPwLm255WmnwE+IWk+1T3pUwchvoiI6IDKKmv0ovxq0iq2Hy9Pel8GbFeegh6xurq63N3dPdxhRMQKTF8akI+4GHA+tv+5U9Js2119tRuye9DLuTWpVgJWobrv/JGRnpwjImJwJUF3wPbDQJ9XOxEREQMln8UdERFRQ0nQERERNZQEHRERUUNJ0BERETWUBB0REVFDSdARERE1lAQdERFRQ0nQERERNZQEHRERUUP5JLGIiKitZfnM6+VdZtARERE1lAQdERFRQ0nQERERNZQEHRERUUNJ0BERETWUBB0REVFDSdARERE1lAQdERFRQ0nQERERNSR75H5KSywbSYuAu4Y7jgG0AfC34Q5ikI2EMcLIGOdIGCOsmOPc0vaGfTVKgo4oJHXb7hruOAbTSBgjjIxxjoQxwsgZZytZ4o6IiKihJOiIiIgaSoKOeM6U4Q5gCIyEMcLIGOdIGCOMnHE+T+5BR0RE1FBm0BERETWUBB0REVFDSdAxYkl6oaRLJd1R/l2vl7ajJf1F0neGMsaB0Mk4JW0pabakOZLmSfrwcMTaXx2OcZyka8v45ko6YDhi7a9Ov18l/UbSPyRdONQx9pekCZJukzRf0lEt6leTdE6p/4OkMUMf5dBLgo6R7CjgMtvbApeV9+18GfjdkEQ18DoZ573Aq22PA3YBjpK06RDGuKw6GeOjwHtt7wBMAE6StO4QxrisOv1+/TrwniGLahlJWhn4LvBmYCwwUdLYpmYHA4ttbwN8GzhxaKMcHknQMZLtC5xZts8E3t6qkaTxwMbAJUMU10Drc5y2n7T9RHm7Gsvfz4ZOxni77TvK9j3A/UCfn+ZUIx19v9q+DHh4qIIaADsD823faftJ4GyqsTZqHPs04A2SNIQxDovl7T9hxEDa2Pa9AOXfjZobSFoJ+CbwqSGObSD1OU4ASS+WNBe4GzixJLHlRUdj7CFpZ2BV4E9DENtAWaoxLkc2o/qe67GwlLVsY/tp4EFg/SGJbhiNGu4AIgaTpN8CL2pR9bkOuzgUuMj23XW+YB+AcWL7bmCnsrR9gaRptu8bqBiX1UCMsfSzCfBjYLLtZwcitoEyUGNczrT6j9X8+7+dtFnhJEHHCs323u3qJN0naRPb95Yf2ve3aLYbsLukQ4G1gFUlPWK7t/vVQ24AxtnY1z2S5gG7Uy0n1sJAjFHSaOBXwDG2Zw5SqP02kF/H5chC4MUN7zcHmldvetoslDQKWAd4YGjCGz5Z4o6RbDowuWxPBn7R3MD2JNtb2B4DHAn8qG7JuQN9jlPS5pLWKNvrAa8BbhuyCJddJ2NcFTif6mt43hDGNlD6HONyahawraStytfoQKqxNmoc+/7A5R4Bn7KVBB0j2QnAGyXdAbyxvEdSl6RThjWygdXJOP8P8AdJN1I9rf4N2zcNS7T908kY/wN4HXBQ+XWyOZLGDU+4/dLR96ukq4DzqB6kWijpTcMSbYfKPeXDgIuBPwLn2p4n6ThJbyvNTgXWlzQf+AS9/8bFCiMf9RkREVFDmUFHRETUUBJ0REREDSVBR0RE1FASdERERA0lQUdERNRQEnREREQNJUFHRETU0P8H+8BtRjg3uzEAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "%matplotlib inline\n", "fig = exp.as_pyplot_figure()\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The explanations can also be exported as an html page (which we can render here in this notebook), using D3.js to render graphs. \n" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", "
\n", " \n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "exp.show_in_notebook(show_all=False)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", "
\n", " \n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "exp.show_in_notebook(show_all=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The explanation is presented below as a list of weighted features." ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('Capital Gain <= 0.00', -0.46352867055426844),\n", " ('Capital Loss <= 0.00', -0.09288199266604677),\n", " ('Hours per week <= 40.00', -0.08777192883996468),\n", " ('Marital Status= Divorced', -0.0874646710131535),\n", " ('37.00 < Age <= 47.00', 0.06711907353384895)]" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "exp.as_list()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Alternatively, we can save the fully contained html page to a file:" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "exp.save_to_file('/tmp/oi.html')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Note that capital gain has very high weight. This makes sense. Now let's see an example where the person has a capital gain below the mean:" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/uditagupta/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:2: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n", " \n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Actual class 1\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", "
\n", " \n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "i = 6510\n", "exp = explainer.explain_instance((test.as_matrix())[i], predict_fn, num_features=5)\n", "print('Actual class', labels_test[i])\n", "exp.show_in_notebook(show_all=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## SP-LIME " ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "import warnings\n", "from lime import submodular_pick" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Args to the function:\n", "\n", "- explainer\n", "- data \n", "- predict_fn,\n", "- method='sample',\n", "- sample_size=1000,\n", "- num_exps_desired=5\n", "- num_features=10,\n", "\n", "Args:\n", "data: a numpy array where each row is a single input into predict_fn\n", "\n", "predict_fn: prediction function. For classifiers, this should be a\n", " function that takes a numpy array and outputs prediction\n", " probabilities. For regressors, this takes a numpy array and\n", " returns the predictions. For ScikitClassifiers, this is\n", " `classifier.predict_proba()`. For ScikitRegressors, this\n", " is `regressor.predict()`. The prediction function needs to work\n", " on multiple feature vectors (the vectors randomly perturbed\n", " from the data_row).\n", " \n", "method: The method to use to generate candidate explanations\n", " method == 'sample' will sample the data uniformly at\n", " random. The sample size is given by sample_size. Otherwise\n", " if method == 'full' then explanations will be generated for the\n", " entire data. \n", " \n", "sample_size: The number of instances to explain if method == 'sample'\n", "\n", "num_exps_desired: The number of explanation objects returned\n", "\n", "num_features: maximum number of features present in explanation" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/uditagupta/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:1: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n", " \"\"\"Entry point for launching an IPython kernel.\n" ] } ], "source": [ "sp_obj = submodular_pick.SubmodularPick(explainer, train.as_matrix(), predict_fn, sample_size=10, num_features=5, num_exps_desired=5)\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "V has the best indices from the test set to explain the overall predictions of the classifier" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[4, 8, 3, 0, 6]" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sp_obj.V" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/uditagupta/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:2: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n", " \n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Actual class: 1\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", "
\n", " \n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "==========================\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/uditagupta/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:2: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n", " \n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Actual class: 1\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", "
\n", " \n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "==========================\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/uditagupta/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:2: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n", " \n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Actual class: 0\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", "
\n", " \n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "==========================\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/uditagupta/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:2: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n", " \n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Actual class: 0\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", "
\n", " \n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "==========================\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/uditagupta/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:2: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n", " \n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Actual class: 0\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", "
\n", " \n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "==========================\n" ] } ], "source": [ "for ind in sp_obj.V:\n", " exp = explainer.explain_instance(test.as_matrix()[ind], predict_fn, num_features=5)\n", " print(\"Actual class: \", labels_test[ind])\n", " exp.show_in_notebook(show_all=False)\n", " print(\"==========================\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## AIF360 with LIME" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING:root:Missing Data: 3620 rows removed from AdultDataset.\n" ] } ], "source": [ "np.random.seed(1)\n", "\n", "dataset_orig = AdultDataset()\n", "dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)\n" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "#### Original training dataset" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Difference in mean outcomes between privileged and unprivileged groups = -0.197435\n" ] } ], "source": [ "# Metric for the original dataset\n", "sens_attr = dataset_orig_train.protected_attribute_names[1]\n", "sens_idx = dataset_orig_train.protected_attribute_names.index(sens_attr)\n", "privileged_groups = [{sens_attr:dataset_orig_train.privileged_protected_attributes[sens_idx][0]}] \n", "unprivileged_groups = [{sens_attr:dataset_orig_train.unprivileged_protected_attributes[sens_idx][0]}] \n", "metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train, \n", " unprivileged_groups=unprivileged_groups,\n", " privileged_groups=privileged_groups)\n", "display(Markdown(\"#### Original training dataset\"))\n", "print(\"Difference in mean outcomes between privileged and unprivileged groups = %f\" % metric_orig_train.mean_difference())" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'sex'" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sens_attr" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Priviledged group is sex: 1 which is Male\n", "\n", "Un-priviledged group is sex: 0 which is Female" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "#### Training Data Details" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "shape of the training dataset (31655, 98)\n", "Training data favorable label 1.0\n", "Training data unfavorable label 0.0\n", "Training data protected attribute ['race', 'sex']\n", "Training data privileged protected attribute [array([1.]), array([1.])]\n", "Training data unprivileged protected attribute [array([0.]), array([0.])]\n" ] } ], "source": [ "display(Markdown(\"#### Training Data Details\"))\n", "print(\"shape of the training dataset\", dataset_orig_train.features.shape)\n", "print(\"Training data favorable label\", dataset_orig_train.favorable_label)\n", "print(\"Training data unfavorable label\", dataset_orig_train.unfavorable_label)\n", "print(\"Training data protected attribute\", dataset_orig_train.protected_attribute_names)\n", "print(\"Training data privileged protected attribute \", \n", " dataset_orig_train.privileged_protected_attributes)\n", "print(\"Training data unprivileged protected attribute \",\n", " dataset_orig_train.unprivileged_protected_attributes)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Model learnt on original data" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/uditagupta/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n" ] } ], "source": [ "#Train model on given dataset\n", "\n", "dataset = dataset_orig_train # data to train on\n", "\n", "scale = StandardScaler().fit(dataset.features) # remember the scale\n", "\n", "model = LogisticRegression() # model to learn\n", "\n", "X_train = scale.transform(dataset.features) #apply the scale\n", "y_train = dataset.labels.ravel()\n", "\n", "\n", "model.fit(X_train, y_train, sample_weight=dataset.instance_weights)\n", "\n", "#save model\n", "lr_orig = model\n", "lr_scale_orig = scale" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "def format_json(json_str):\n", " return json.dumps(json.loads(json_str, object_pairs_hook=OrderedDict), indent=2)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.8466868135918036\n", "Corresponding abs(1-disparate impact) value: 0.29231897850291744\n", "Corresponding average odds difference value: -0.08632279207120046\n" ] } ], "source": [ "scale = lr_scale_orig\n", "\n", "model = lr_orig #model to test\n", "dataset = dataset_orig_test #data to test on\n", "\n", "X_test = scale.transform(dataset.features) #apply the same scale as applied to the training data\n", "y_test = dataset.labels.ravel()\n", "y_test_pred = model.predict(X_test)\n", "\n", "dataset_pred = dataset.copy()\n", "dataset_pred.labels = y_test_pred\n", "\n", "classified_metric = ClassificationMetric(dataset, \n", " dataset_pred,\n", " unprivileged_groups=unprivileged_groups,\n", " privileged_groups=privileged_groups)\n", "metric_pred = BinaryLabelDatasetMetric(dataset_pred,\n", " unprivileged_groups=unprivileged_groups,\n", " privileged_groups=privileged_groups)\n", "\n", "acc = accuracy_score(y_true=dataset.labels,\n", " y_pred=dataset_pred.labels)\n", "\n", "avg_odds_diff = classified_metric.average_odds_difference()\n", "disp_imp = metric_pred.disparate_impact()\n", "\n", "\n", "\n", "print(\"Accuracy: \", acc)\n", "print(\"Corresponding abs(1-disparate impact) value: \", disp_imp)\n", "print(\"Corresponding average odds difference value: \" , avg_odds_diff)\n" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"metric\": \"Disparate Impact\",\n", " \"message\": \"Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 0.29231897850291744\",\n", " \"numPositivePredictionsUnprivileged\": 341.0,\n", " \"numUnprivileged\": 4498.0,\n", " \"numPositivePredictionsPrivileged\": 2352.0,\n", " \"numPrivileged\": 9069.0,\n", " \"description\": \"Computed as the ratio of rate of favorable outcome for the unprivileged group to that of the privileged group.\",\n", " \"ideal\": \"The ideal value of this metric is 1.0 A value < 1 implies higher benefit for the privileged group and a value >1 implies a higher benefit for the unprivileged group.\"\n", "}\n" ] } ], "source": [ "json_expl = MetricJSONExplainer(metric_pred)\n", "print(format_json(json_expl.disparate_impact()))" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"metric\": \"Average Odds Difference\",\n", " \"message\": \"Average odds difference (average of TPR difference and FPR difference, 0 = equality of odds): -0.08632279207120046\",\n", " \"numFalsePositivesUnprivileged\": 91.0,\n", " \"numNegativesUnprivileged\": 4012.0,\n", " \"numTruePositivesUnprivileged\": 250.0,\n", " \"numPositivesUnprivileged\": 486.0,\n", " \"numFalsePositivesPrivileged\": 646.0,\n", " \"numNegativesPrivileged\": 6256.0,\n", " \"numTruePositivesPrivileged\": 1706.0,\n", " \"numPositivesPrivileged\": 2813.0,\n", " \"description\": \"Computed as average difference of false positive rate (false positives / negatives) and true positive rate (true positives / positives) between unprivileged and privileged groups.\",\n", " \"ideal\": \"The ideal value of this metric is 0. A value of < 0 implies higher benefit for the privileged group and a value > 0 implies higher benefit for the unprivileged group.\"\n", "}\n" ] } ], "source": [ "json_expl_class = MetricJSONExplainer(classified_metric)\n", "print(format_json(json_expl_class.average_odds_difference()))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Use LIME to generate explanations for predictions made using the learnt Logistic Regression model" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", "
\n", " \n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ " Actual label: [0.]\n", "=========================================\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", "
\n", " \n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ " Actual label: [1.]\n" ] } ], "source": [ "limeData = LimeEncoder().fit(dataset_orig_train)\n", "s_train = limeData.transform(dataset_orig_train.features)\n", "s_test = limeData.transform(dataset_orig_test.features)\n", "\n", "scale = lr_scale_orig\n", "\n", "model = lr_orig #model to test\n", "\n", "\n", "\n", "\n", "explainer = lime.lime_tabular.LimeTabularExplainer(s_train ,class_names=limeData.s_class_names, \n", " feature_names = limeData.s_feature_names,\n", " categorical_features=limeData.s_categorical_features, \n", " categorical_names=limeData.s_categorical_names, \n", " verbose=False)\n", "\n", "s_predict_fn = lambda x: model.predict_proba(scale.transform(limeData.inverse_transform(x)))\n", "\n", "import random\n", "\n", "i1 = 1\n", "exp = explainer.explain_instance(s_test[i1], s_predict_fn, num_features=5)\n", "exp.show_in_notebook(show_all=False)\n", "print(\" Actual label: \" + str(dataset_orig_test.labels[i1]))\n", "\n", "print(\"=========================================\")\n", "\n", "i2 = 100\n", "exp = explainer.explain_instance(s_test[i2], s_predict_fn, num_features=5)\n", "exp.show_in_notebook(show_all=False)\n", "print(\" Actual label: \" + str(dataset_orig_test.labels[i2]))\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Transform the data using the Re-Weighing (pre-processing) algorithm" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "RW = Reweighing(unprivileged_groups=unprivileged_groups,\n", " privileged_groups=privileged_groups)\n", "RW.fit(dataset_orig_train)\n", "dataset_transf_train = RW.transform(dataset_orig_train)" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/uditagupta/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n" ] } ], "source": [ "#Train model on given dataset\n", "\n", "dataset = dataset_transf_train # data to train on\n", "\n", "scale = StandardScaler().fit(dataset.features) # remember the scale\n", "\n", "model = LogisticRegression() # model to learn\n", "\n", "X_train = scale.transform(dataset.features) #apply the scale\n", "y_train = dataset.labels.ravel()\n", "\n", "\n", "model.fit(X_train, y_train, sample_weight=dataset.instance_weights)\n", "\n", "#save model\n", "lr_orig = model\n", "lr_scale_orig = scale" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.8409375691014963\n", "Corresponding abs(1-disparate impact) value: 0.6039349106264875\n", "Corresponding average odds difference value: 0.07761123916896892\n" ] } ], "source": [ "scale = lr_scale_orig\n", "\n", "model = lr_orig #model to test\n", "dataset = dataset_orig_test #data to test on\n", "\n", "X_test = scale.transform(dataset.features) #apply the same scale as applied to the training data\n", "y_test = dataset.labels.ravel()\n", "y_test_pred = model.predict(X_test)\n", "\n", "dataset_pred = dataset.copy()\n", "dataset_pred.labels = y_test_pred\n", "\n", "classified_metric = ClassificationMetric(dataset, \n", " dataset_pred,\n", " unprivileged_groups=unprivileged_groups,\n", " privileged_groups=privileged_groups)\n", "metric_pred = BinaryLabelDatasetMetric(dataset_pred,\n", " unprivileged_groups=unprivileged_groups,\n", " privileged_groups=privileged_groups)\n", "\n", "acc = accuracy_score(y_true=dataset.labels,\n", " y_pred=dataset_pred.labels)\n", "\n", "avg_odds_diff = classified_metric.average_odds_difference()\n", "disp_imp = metric_pred.disparate_impact()\n", "\n", "\n", "\n", "print(\"Accuracy: \", acc)\n", "print(\"Corresponding abs(1-disparate impact) value: \", disp_imp)\n", "print(\"Corresponding average odds difference value: \" , avg_odds_diff)\n" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"metric\": \"Disparate Impact\",\n", " \"message\": \"Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 0.6039349106264875\",\n", " \"numPositivePredictionsUnprivileged\": 582.0,\n", " \"numUnprivileged\": 4498.0,\n", " \"numPositivePredictionsPrivileged\": 1943.0,\n", " \"numPrivileged\": 9069.0,\n", " \"description\": \"Computed as the ratio of rate of favorable outcome for the unprivileged group to that of the privileged group.\",\n", " \"ideal\": \"The ideal value of this metric is 1.0 A value < 1 implies higher benefit for the privileged group and a value >1 implies a higher benefit for the unprivileged group.\"\n", "}\n" ] } ], "source": [ "json_expl = MetricJSONExplainer(metric_pred)\n", "print(format_json(json_expl.disparate_impact()))" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"metric\": \"Average Odds Difference\",\n", " \"message\": \"Average odds difference (average of TPR difference and FPR difference, 0 = equality of odds): 0.07761123916896892\",\n", " \"numFalsePositivesUnprivileged\": 243.0,\n", " \"numNegativesUnprivileged\": 4012.0,\n", " \"numTruePositivesUnprivileged\": 339.0,\n", " \"numPositivesUnprivileged\": 486.0,\n", " \"numFalsePositivesPrivileged\": 449.0,\n", " \"numNegativesPrivileged\": 6256.0,\n", " \"numTruePositivesPrivileged\": 1494.0,\n", " \"numPositivesPrivileged\": 2813.0,\n", " \"description\": \"Computed as average difference of false positive rate (false positives / negatives) and true positive rate (true positives / positives) between unprivileged and privileged groups.\",\n", " \"ideal\": \"The ideal value of this metric is 0. A value of < 0 implies higher benefit for the privileged group and a value > 0 implies higher benefit for the unprivileged group.\"\n", "}\n" ] } ], "source": [ "json_expl_class = MetricJSONExplainer(classified_metric)\n", "print(format_json(json_expl_class.average_odds_difference()))" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", "
\n", " \n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ " Actual label: [0.]\n", "=========================================\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", "
\n", " \n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ " Actual label: [1.]\n" ] } ], "source": [ "limeData = LimeEncoder().fit(dataset_orig_train)\n", "s_train = limeData.transform(dataset_orig_train.features)\n", "s_test = limeData.transform(dataset_orig_test.features)\n", "\n", "scale = lr_scale_orig\n", "\n", "model = lr_orig #model to test\n", "\n", "\n", "\n", "\n", "explainer = lime.lime_tabular.LimeTabularExplainer(s_train ,class_names=limeData.s_class_names, \n", " feature_names = limeData.s_feature_names,\n", " categorical_features=limeData.s_categorical_features, \n", " categorical_names=limeData.s_categorical_names, \n", " verbose=False)\n", "\n", "s_predict_fn = lambda x: model.predict_proba(scale.transform(limeData.inverse_transform(x)))\n", "\n", "import random\n", "\n", "\n", "exp = explainer.explain_instance(s_test[i1], s_predict_fn, num_features=5)\n", "exp.show_in_notebook(show_all=False)\n", "print(\" Actual label: \" + str(dataset_orig_test.labels[i1]))\n", "\n", "print(\"=========================================\")\n", "\n", "exp = explainer.explain_instance(s_test[i2], s_predict_fn, num_features=5)\n", "exp.show_in_notebook(show_all=False)\n", "print(\" Actual label: \" + str(dataset_orig_test.labels[i2]))\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can see that Sex (protected_attribute) is no longer being used in the explanations of the classifier" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.1" } }, "nbformat": 4, "nbformat_minor": 2 }