-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathfunds_data.py
84 lines (46 loc) · 1.91 KB
/
funds_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import time
import pandas as pd
import datetime
def create_interval(start_date, end_date):
b = list()
end = str(datetime.datetime.strptime(end_date, '%Y-%m-%d').year) + '{:02d}'.format(datetime.datetime.strptime(end_date, '%Y-%m-%d').month +1)
for year in range(int(datetime.datetime.strptime(start_date, '%Y-%m-%d').year), int(datetime.datetime.strptime(end_date, '%Y-%m-%d').year)+1):
for month in range(1,13):
a = '{:02d}{:02d}'.format(year, month)
if a == end:
break
b.append(a)
if a == end:
break
year = year +1
return b
def extract_fund_data(dates_list, verbose = False):
start_time = time.time()
# Initialize an empty list to store individual DataFrames
dataframes = []
# Iterate through the files in the folder
for i in dates_list:
url_pre = f'https://dados.cvm.gov.br/dados/FI/DOC/INF_DIARIO/DADOS/inf_diario_fi_{i}.zip'
df_loc = pd.read_csv(url_pre, sep=';', compression='zip')
s = datetime.datetime.strptime(i, "%Y%m")
date = s.strftime('%B %Y')
if verbose == True:
print("Extraction of month",date,"finished")
# Append the DataFrame to the list
dataframes.append(df_loc)
# Combine all DataFrames in the list into one large DataFrame
combined_df = pd.concat(dataframes, ignore_index=True)
if verbose == True:
print("Process took %s seconds" % (time.time() - start_time))
return combined_df
def get_funds_data(start, end, cnpj:object = None, verbose = False):
interval = create_interval(start, end)
data_fund = extract_fund_data(interval, verbose)
data_fund['DT_COMPTC'] = pd.to_datetime(data_fund['DT_COMPTC'])
mask = (data_fund['DT_COMPTC'] >= start) & (data_fund['DT_COMPTC'] <= end)
data_fund = data_fund.loc[mask]
if cnpj != None:
data_fund = data_fund[data_fund['CNPJ_FUNDO'] == cnpj]
else:
data_fund = data_fund
return data_fund.reset_index(drop=True)