Source code for XLEMOO.ruleset_interpreter

from typing import List, Dict, Tuple
from imodels import SlipperClassifier, SkopeRulesClassifier
import numpy as np

Rules = Dict[Tuple[str, str], str]


[docs]def extract_slipper_rules( classifier: SlipperClassifier, ) -> Tuple[List[Rules], List[float]]: """Given a trained SlipperClassifier, extracts the trained rules alongside the weight for each rule. The rules are returned in a list of dictionaries. Each rule is represented by one dictionary. Each dictionary is of the format: {("feature_name", "comparison_op"): "value"} where comparison_op can be "<", "<=", ">", or ">=". The weight represents the importance of each rule. The feature names are expected to be of the format "x_i" where 'i' is zero-indexed (first feature is 'x_0' etc.). """ weights = classifier.estimator_weights_ raw_rules = classifier.rules_ # TODO: Use mean prediction instead! classifier.estimator_mean_prediction_ (list) if weights == []: # The error of the classifier is so small that it does not even begin to fit. Just set the weights of all rules # to one weights = [1] * len(raw_rules) rules = [rule.agg_dict for rule in raw_rules] return rules, weights
def extract_skoped_rules( classifier: SkopeRulesClassifier, ) -> Tuple[List[Rules], List[float]]: precisions = [rule.args[0] for rule in classifier.rules_] rules = [rule.agg_dict for rule in classifier.rules_] return rules, precisions
[docs]def instantiate_rules( rules: Rules, n_features: int, feature_limits: List[Tuple[float, float]], n_samples: int, ) -> np.ndarray: """Takes Rules and instantiates them producing n_samples of new decision variable vectors corresponding to the rules. If there are no rules for a variable, a random value is generated for that variable between its limits. Notice that when rules define a range for a variable, then that variable's value will be generated between those ranges randomly. Args: rules (Rules): Should be a dict with the following structure: {("feature_name", "comparison_op"): "value"} where comparison_op can be "<", "<=", ">", or ">=". The feature names are expected to be formatted as "x_i" where 'i' is zero indexed (i.e., x_0, x_1, x_2, etc.). n_features (int): Number of features to instantiate based on the rules provided. feature_limits (List[Tuple[float, float]]): 2D array, each row corresponds to a decision variable. The first column has the lower limits for each variable and the second the upper limit. n_samples (int): How many samples to generate based on the rules provided. Returns: np.ndarray: The new samples generated based on the provided rules in a 2D array. """ # collect generated samples samples = [] # collect each rule in tuple of threes and put them in a list # cast the indices to int and limits to float index_op_value = list( map( lambda key: (int(key[0].split("_")[-1]), key[1], float(rules[key])), rules.keys(), ) ) # group each rule according to the feature index op_value_per_index = {} for i, op, val in index_op_value: if i in op_value_per_index: op_value_per_index[i].append((op, val)) else: op_value_per_index[i] = [(op, val)] new_samples = np.zeros((n_samples, n_features)) # go through each feature index and instantiate the rules # for feature_i in op_value_per_index: for feature_i in range(n_features): # keep track of the lower and upper bounds for each feature, by default, # the bounds should be the given feature limits current_min = feature_limits[feature_i][0] current_max = feature_limits[feature_i][1] if not feature_i in op_value_per_index: # no rules for feature, instantiate between min and max new_samples[:, feature_i] = np.random.uniform( current_min, current_max, n_samples ) continue for (rule_i, (op, value)) in enumerate(op_value_per_index[feature_i]): if op in ["<", "<="]: # less than if value < current_max and value > current_min: current_max = value elif op in [">", ">="]: # greater than if value > current_min and value < current_max: current_min = value elif op in ["=", "=="]: # equality current_min = value current_max = value else: # unkown operator print( f"When instantiating rule {rules} got unknown operator {op}. Skipping.." ) pass # instantiate features in the samples according to rules new_samples[:, feature_i] = np.random.uniform( current_min, current_max, n_samples ) return new_samples
def _instantiate_ruleset_rules( rules: List[Rules], weights: List[float], n_features: int, feature_limits: List[Tuple[float, float]], n_samples: int, ) -> List[np.ndarray]: """Helper to 'instantiate_ruleset_rules'. See its description. List[np.ndarray]: A list of samples per rule. """ # based on the weights, figure out how many of the samples should be generated based on # each rule in the rule set. # ignore rules with negative weight if len(weights) < len(rules): # weights have been stopped to be added since the error of the # classifier is so small, just use the rules thus far with the given weights. rules = rules[: len(weights)] w_arr = np.array(weights) # ignore negative weights fractions = w_arr[w_arr >= 0] / np.sum(w_arr[w_arr >= 0]) n_per_rule = np.round(fractions * n_samples) instantiated = [] rules_pos_w = np.array(rules)[w_arr >= 0] for (rule_i, rule) in enumerate(rules_pos_w): instantiated.append( instantiate_rules(rule, n_features, feature_limits, int(n_per_rule[rule_i])) ) return instantiated
[docs]def instantiate_ruleset_rules( rules: List[Rules], weights: List[float], n_features: int, feature_limits: List[Tuple[float, float]], n_samples: int, ) -> np.ndarray: """Instantiate samples according to a rule set. Instantiates in total approximately n_samples of new samples according to the rules and features limits. If for some feature there are no rules, then only the feature limits are used. The feature limits will override rules if there is a conflict. The given weights will dictate how large of a fraction of n_samples will be generated for each rule. It is assumed that the rules supplied (in a list) have a weight at the same index in the argument weights. Args: rules (List[Rules]): The rules contained in the rule set. See 'instantiate_rules'. weights (List[float]): The weights for each rule in the rule set. It is assumed tht the weight at index i corresponds to the weight of rules at index i in 'rules'. n_features (int): How many new samples to generate according to the rules. This is approximate, but the total of new samples should be relatively close to this number. feature_limits (List[Tuple[float, float]]): Pairs representing the lower and upper bounds of each feature. n_samples (int): Approximately how many new samples to generate in total. Returns: np.ndarray: A 2D array with all the new generated samples. If a list of samples per rule is desired, see '_instantiate_ruleset_rules'. """ instantiated = _instantiate_ruleset_rules( rules, weights, n_features, feature_limits, n_samples ) return np.vstack(instantiated)