-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
30 lines (18 loc) · 930 Bytes
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import pandas as pd
def remove_if_duplicates(train_data):
duplicates_train = train_data.duplicated().sum()
print('Duplicates in train data: {0}'.format(duplicates_train))
train_data.drop_duplicates(keep='first', inplace=True)
duplicates_train = train_data.duplicated().sum()
print('Train data shape:', train_data.shape)
print('Duplicates in train data: {0}'.format(duplicates_train))
return train_data
def remove_if_duplicates_weights(train_data: pd.DataFrame):
duplicates_train = train_data.duplicated().sum()
print('Duplicates in train data: {0}'.format(duplicates_train))
weights = train_data.value_counts().values
train_data.drop_duplicates(keep='first', inplace=True)
duplicates_train = train_data.duplicated().sum()
print('Train data shape:', train_data.shape)
print('Duplicates in train data: {0}'.format(duplicates_train))
return train_data, weights