Skip to content

Commit aea0342

Browse files
authored
Merge pull request #196 from AutoFairAthenaRC/master
Adding Global Actions in A Nutshell For Counterfactual Explainability (GLANCE) framework
2 parents ed7dc7e + 3516dc2 commit aea0342

35 files changed

+5986
-0
lines changed

.github/workflows/Build.yml

+54
Original file line numberDiff line numberDiff line change
@@ -448,3 +448,57 @@ jobs:
448448

449449
- name: Step 5 - Test GroupedCEExplainer
450450
run: python ./tests/gce/test_gce.py
451+
452+
build-glance-on-py310:
453+
# The type of runner that the job will run on
454+
runs-on: "${{ matrix.os }}"
455+
strategy:
456+
matrix:
457+
#os: [ubuntu-18.04, ubuntu-latest, macos-latest, windows-latest]
458+
os: [ubuntu-20.04, macos-latest, windows-latest]
459+
python-version: ["3.10"]
460+
461+
# Steps represent a sequence of tasks that will be executed as part of the job
462+
steps:
463+
- name: Step 1 - checkout aix360 repository
464+
uses: actions/checkout@v3
465+
466+
- name: Step 2 - set up python version
467+
uses: actions/setup-python@v4
468+
with:
469+
python-version: "${{ matrix.python-version }}"
470+
471+
- name: Step 3 - upgrade setuptools
472+
run: pip3 install pytest nbmake wheel --upgrade setuptools
473+
474+
- name: Step 4 - Install aix360 with dipvae algorithm related dependencies
475+
run: pip3 install .[glance]
476+
477+
- name: Step 5 - Test Base
478+
run: pytest ./tests/glance/test_base.py
479+
480+
- name: Step 6 - Test Counterfactual Costs
481+
run: pytest ./tests/glance/test_counterfactual_costs.py
482+
483+
- name: Step 7 - Test Counterfactual Tree
484+
run: pytest ./tests/glance/test_counterfactual_tree.py
485+
486+
- name: Step 8 - Test Iterative Merges
487+
run: pytest ./tests/glance/test_iterative_merges.py
488+
489+
- name: Step 9 - Test KMeans
490+
run: pytest ./tests/glance/test_KMeans.py
491+
492+
- name: Step 10 - Test Local Cfs
493+
run: pytest ./tests/glance/test_local_cfs.py
494+
495+
- name: Step 11 - Test Node
496+
run: pytest ./tests/glance/test_node.py
497+
498+
- name: Step 12 - Test Phase2
499+
run: pytest ./tests/glance/test_phase2.py
500+
501+
- name: Step 13 - Test Utils
502+
run: pytest ./tests/glance/test_utils.py
503+
504+

aix360/algorithms/glance/__init__.py

Whitespace-only changes.

aix360/algorithms/glance/base.py

+115
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
from abc import ABC, abstractmethod
2+
import pandas as pd
3+
import numpy as np
4+
5+
6+
class ClusteringMethod(ABC):
7+
"""
8+
Abstract base class for clustering methods.
9+
"""
10+
11+
def __init__(self):
12+
"""
13+
Initialize the ClusteringMethod.
14+
"""
15+
pass
16+
17+
@abstractmethod
18+
def fit(self, data: pd.DataFrame):
19+
"""
20+
Fit the clustering model on the given data.
21+
22+
Parameters:
23+
- data (pd.DataFrame): DataFrame of input data to fit the model.
24+
"""
25+
pass
26+
27+
@abstractmethod
28+
def predict(self, instances: pd.DataFrame) -> np.ndarray:
29+
"""
30+
Predict the cluster labels for the given instances.
31+
32+
Parameters:
33+
- instances (pd.DataFrame): DataFrame of input instances.
34+
35+
Returns:
36+
- cluster_labels (np.ndarray): Array of cluster labels for each instance.
37+
"""
38+
pass
39+
40+
41+
class LocalCounterfactualMethod(ABC):
42+
"""
43+
Abstract base class for local counterfactual methods.
44+
"""
45+
46+
def __init__(self):
47+
"""
48+
Initialize the LocalCounterfactualMethod.
49+
"""
50+
pass
51+
52+
@abstractmethod
53+
def fit(self, **kwargs):
54+
"""
55+
Fit the counterfactual method.
56+
57+
Parameters:
58+
- **kwargs: Additional keyword arguments for fitting.
59+
"""
60+
pass
61+
62+
@abstractmethod
63+
def explain_instances(
64+
self, instances: pd.DataFrame, num_counterfactuals: int
65+
) -> pd.DataFrame:
66+
"""
67+
Find the local counterfactuals for the given instances.
68+
69+
Parameters:
70+
- instances (pd.DataFrame): DataFrame of input instances for which counterfactuals are desired.
71+
- num_counterfactuals (int): Number of counterfactuals to generate for each instance.
72+
73+
Returns:
74+
- counterfactuals (pd.DataFrame): DataFrame of counterfactual instances.
75+
"""
76+
pass
77+
78+
79+
class GlobalCounterfactualMethod(ABC):
80+
"""
81+
Abstract base class for global counterfactual methods.
82+
"""
83+
84+
def __init__(self, **kwargs):
85+
"""
86+
Initialize the LocalCounterfactualMethod.
87+
88+
Parameters:
89+
- **kwargs: Additional keyword arguments for init.
90+
"""
91+
pass
92+
93+
@abstractmethod
94+
def fit(self, X, y, **kwargs):
95+
"""
96+
Fit the counterfactual method.
97+
98+
Parameters:
99+
- **kwargs: Additional keyword arguments for fitting.
100+
"""
101+
pass
102+
103+
@abstractmethod
104+
def explain_group(self, instances: pd.DataFrame) -> pd.DataFrame:
105+
"""
106+
Find the global counterfactuals for the given group of instances.
107+
108+
Parameters:
109+
- instances (pd.DataFrame, optional): DataFrame of input instances for which global counterfactuals are desired.
110+
If None, explain the whole group of affected instances.
111+
112+
Returns:
113+
- counterfactuals (pd.DataFrame): DataFrame of counterfactual instances.
114+
"""
115+
pass
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .kmeans import KMeansMethod
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
from ..base import ClusteringMethod
2+
from sklearn.cluster import KMeans
3+
4+
5+
class KMeansMethod(ClusteringMethod):
6+
"""
7+
Implementation of a clustering method using KMeans.
8+
9+
This class provides an interface to apply KMeans clustering to a dataset.
10+
"""
11+
12+
def __init__(self, num_clusters, random_seed):
13+
"""
14+
Initializes the KMeansMethod class.
15+
16+
Parameters:
17+
----------
18+
num_clusters : int
19+
The number of clusters to form as well as the number of centroids to generate.
20+
random_seed : int
21+
A seed for the random number generator to ensure reproducibility.
22+
"""
23+
24+
self.num_clusters = num_clusters
25+
self.random_seed = random_seed
26+
self.model = KMeans()
27+
28+
def fit(self, data):
29+
"""
30+
Fits the KMeans model on the provided dataset.
31+
32+
Parameters:
33+
----------
34+
data : array-like or sparse matrix, shape (n_samples, n_features)
35+
Training instances to cluster.
36+
37+
Returns:
38+
-------
39+
None
40+
"""
41+
self.model = KMeans(
42+
n_clusters=self.num_clusters, n_init=10, random_state=self.random_seed
43+
)
44+
self.model.fit(data)
45+
46+
def predict(self, instances):
47+
"""
48+
Predicts the nearest cluster each sample in the provided data belongs to.
49+
50+
Parameters:
51+
----------
52+
instances : array-like or sparse matrix, shape (n_samples, n_features)
53+
New data to predict.
54+
55+
Returns:
56+
-------
57+
labels : array, shape (n_samples,)
58+
Index of the cluster each sample belongs to.
59+
"""
60+
return self.model.predict(instances)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
from typing import Callable, List, Dict
2+
import numpy as np
3+
import pandas as pd
4+
5+
6+
def build_dist_func_dataframe(
7+
X: pd.DataFrame,
8+
numerical_columns: List[str],
9+
categorical_columns: List[str],
10+
n_bins: int = 10,
11+
) -> Callable[[pd.DataFrame, pd.DataFrame], pd.Series]:
12+
"""
13+
Builds and returns a custom distance function for computing distances between rows of two DataFrames based on specified numerical and categorical columns.
14+
15+
For numerical columns, the values are first binned into intervals based on the provided number of bins (`n_bins`).
16+
The distance between numerical features is computed as the sum of the absolute differences between binned values. For categorical columns, the distance is calculated as the number of mismatched categorical values.
17+
18+
Parameters:
19+
----------
20+
X : pd.DataFrame
21+
The reference DataFrame used to determine the bin intervals for numerical columns.
22+
numerical_columns : List[str]
23+
List of column names in `X` that contain numerical features.
24+
categorical_columns : List[str]
25+
List of column names in `X` that contain categorical features.
26+
n_bins : int, optional
27+
The number of bins to use when normalizing numerical columns, by default 10.
28+
29+
Returns:
30+
-------
31+
Callable[[pd.DataFrame, pd.DataFrame], pd.Series]
32+
A distance function that takes two DataFrames as input (`X1` and `X2`) and returns a Series of distances between corresponding rows in `X1` and `X2`.
33+
34+
The distance function works as follows:
35+
- For numerical columns: the absolute differences between binned values are summed.
36+
- For categorical columns: the number of mismatches between values is counted.
37+
"""
38+
feat_intervals = {
39+
col: ((max(X[col]) - min(X[col])) / n_bins) for col in numerical_columns
40+
}
41+
42+
def bin_numericals(instances: pd.DataFrame):
43+
ret = instances.copy()
44+
for col in numerical_columns:
45+
ret[col] /= feat_intervals[col]
46+
return ret
47+
48+
def dist_f(X1: pd.DataFrame, X2: pd.DataFrame) -> pd.Series:
49+
X1 = bin_numericals(X1)
50+
X2 = bin_numericals(X2)
51+
52+
ret = (X1[numerical_columns] - X2[numerical_columns]).abs().sum(axis="columns")
53+
ret += (X1[categorical_columns] != X2[categorical_columns]).astype(int).sum(axis="columns")
54+
55+
return ret
56+
57+
return dist_f
58+

aix360/algorithms/glance/counterfactual_tree/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)