-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtransform.py
99 lines (78 loc) · 3.47 KB
/
transform.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import json
import os
from datetime import datetime
from dotenv import load_dotenv
from enum import Enum, auto
from utils.logs import log_type,generate_log
from utils.files import read_json_file, get_all_json_files_on_path,write_csv_file
load_dotenv()
# ENVIRONMENTS
RAW_FOLDER = os.getenv('PARAMS_RAW_FOLDER')
DATASETS_FOLDER = os.getenv('PARAMS_DATASETS_FOLDER')
BUNDLE_APP = os.getenv("PARAMS_BUNDLE_APP")
class ResourceType(Enum):
METRIC = auto()
REPORT = auto()
def transform_dimension(dimension):
return {dimension["dimension"]: dimension["stringValue"]}
def transform_metric_row(row):
new_event = {"date": f'{row["startTime"]["year"]}-{row["startTime"]["month"]}-{row["startTime"]["day"]}'}
if "dimensions" in row:
new_event.update({dimension["dimension"]: dimension["stringValue"] for dimension in row["dimensions"]})
if "metrics" in row:
new_event.update({metric["metric"]: metric["decimalValue"]["value"] for metric in row["metrics"]})
return new_event
def transform_response_data_to_event_list(data,resource_type:ResourceType):
print(resource_type)
if resource_type == ResourceType.METRIC:
return [transform_metric_row(row) for row in data]
else:
return [transform_report_row(row) for row in data]
def transform_report_row(row):
return {
"name" : row["name"],
"type" :row["type"],
"cause" :row["cause"],
"location": row.get("location", ""),
"errorReportCount" :row["errorReportCount"],
"distinctUsers" :row["distinctUsers"],
"lastErrorReportTime" :row["lastErrorReportTime"],
"issueUri" : row["issueUri"],
"firstOsVersion" : row["firstOsVersion"]["apiLevel"],
"lastOsVersion" : row["lastOsVersion"]["apiLevel"],
"firstAppVersion": row["firstAppVersion"]["versionCode"],
"lastAppVersion": row["lastAppVersion"]["versionCode"],
"distinctUsersPercent": row["distinctUsersPercent"]["value"]
}
def get_resources_to_transform():
base_path = os.path.join(RAW_FOLDER, BUNDLE_APP)
metric_path = os.path.join(base_path,ResourceType.METRIC.name)
report_path = os.path.join(base_path,ResourceType.REPORT.name)
return [
{
"type": ResourceType.METRIC,
"raw_path":metric_path
},
{
"type": ResourceType.REPORT,
"raw_path":report_path
},
]
def main():
resources = get_resources_to_transform()
for resource in resources:
resource_type = resource["type"]
resource_raw_path = resource["raw_path"]
dataset_path = os.path.join(DATASETS_FOLDER,BUNDLE_APP,resource_type.name)
json_files = get_all_json_files_on_path(resource_raw_path)
if json_files:
for file in json_files:
metric_data = read_json_file(os.path.join(resource_raw_path, file))
generate_log(log_type.FILE_READ, f"File read {file}")
event_list = transform_response_data_to_event_list(metric_data,resource_type)
clean_file_name, _ = os.path.splitext(file)
write_csv_file(dataset_path, clean_file_name, event_list)
generate_log(log_type.PROCESS_FINISHED, "Transform process finished!")
else:
generate_log(log_type.FILE_NOT_FOUND, "Compatible files for transformation not found.")
main()