-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodeling_utils.py
125 lines (100 loc) · 3.84 KB
/
modeling_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#Evaluation metric implemented for NN
import tensorflow.compat.v1.keras.backend as K
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
def recall_metric(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
return recall
def precision_metric(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
return precision
def f1_metric(y_true, y_pred):
precision = precision_metric(y_true, y_pred)
recall = recall_metric(y_true, y_pred)
return 2*((precision*recall)/(precision+recall+K.epsilon()))
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np
#Target encoder
class TargetEncoder(BaseEstimator, TransformerMixin):
"""Target encoder.
Replaces categorical column(s) with the mean target value for
each category.
"""
def __init__(self, cols=None):
"""Target encoder
Parameters
----------
cols : list of str
Columns to target encode. Default is to target
encode all categorical columns in the DataFrame.
"""
if isinstance(cols, str):
self.cols = [cols]
else:
self.cols = cols
def fit(self, X, y):
"""Fit target encoder to X and y
Parameters
----------
X : pandas DataFrame, shape [n_samples, n_columns]
DataFrame containing columns to encode
y : pandas Series, shape = [n_samples]
Target values.
Returns
-------
self : encoder
Returns self.
"""
# Encode all categorical cols by default
if self.cols is None:
self.cols = [col for col in X
if str(X[col].dtype)=='object']
# Check columns are in X
for col in self.cols:
if col not in X:
raise ValueError('Column \''+col+'\' not in X')
# Encode each element of each column
self.maps = dict() #dict to store map for each column
for col in self.cols:
tmap = dict()
uniques = X[col].unique()
for unique in uniques:
tmap[unique] = y[X[col]==unique].mean()
self.maps[col] = tmap
return self
def transform(self, X, y=None):
"""Perform the target encoding transformation.
Parameters
----------
X : pandas DataFrame, shape [n_samples, n_columns]
DataFrame containing columns to encode
Returns
-------
pandas DataFrame
Input DataFrame with transformed columns
"""
Xo = X.copy()
for col, tmap in self.maps.items():
vals = np.full(X.shape[0], np.nan)
for val, mean_target in tmap.items():
vals[X[col]==val] = mean_target
Xo[col] = vals
return Xo
def fit_transform(self, X, y=None):
"""Fit and transform the data via target encoding.
Parameters
----------
X : pandas DataFrame, shape [n_samples, n_columns]
DataFrame containing columns to encode
y : pandas Series, shape = [n_samples]
Target values (required!).
Returns
-------
pandas DataFrame
Input DataFrame with transformed columns
"""
return self.fit(X, y).transform(X, y)