-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsetup.py
317 lines (299 loc) · 22.2 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
import constants
from locations import Bus, School, Stop, Student
import numpy as np
from scipy import spatial
from utils import californiafy, timesecs
from fuzzywuzzy import fuzz
#Given an index and a dictionary from geocodes to indices,
#finds the index corresponding with the nearest geocode in
#Euclidean space. (Therefore, if a geocode is in the database,
#its index will be returned, as the distance is 0.)
def fetch_ind(code_to_find, codes_inds_map):
if code_to_find in codes_inds_map:
return codes_inds_map[code_to_find]
if code_to_find in constants.GEOCODE_CACHE:
return constants.GEOCODE_CACHE[code_to_find]
(latitude_to_find, longitude_to_find) = code_to_find.split(";")
latitude_to_find = float(latitude_to_find)
longitude_to_find = float(longitude_to_find)
nearest_code_ind = constants.GEOCODE_KDTREE.query([latitude_to_find,
longitude_to_find])[1]
constants.GEOCODE_CACHE[code_to_find] = nearest_code_ind
return nearest_code_ind
#students_filename: name of file in a format I am using for special ed students
#Columns are latitude, longitude, grade level, human-readable
#description of special ed types (not used), text description.
#all_geocodes: filename for list of all geocodes. gives map from geocode to ind
#geocoded_schools: file name for map from school to geocode
#returns a list of all students, a dict from schools to sets of
#students, and a dict from schools to indices in the travel time matrix.
#bell_sched: file name for which column 3 is cost center and
#column 4 is start time
#sped flags whether this run is for SP students or RG students.
#routing_type: 1 for all students, 2 for school by cost center number,
#3 for school by exact name, 4 for school by approximate name
#school_string: string defining the school to route in the way defined
#by routing_type. This value is not used if routing_type is 1.
def setup_students(students_filename, all_geocodes,
geocoded_schools, sped,
routing_type, school_strings):
schools = open(geocoded_schools, 'r')
schools_codes_map = dict() #maps schools to geocodes
schools_students_map = dict() #maps schools to sets of students
schools_starttimes_map = dict() #maps schools to start times
schools_endtimes_map = dict() #maps schools to end times
schools_names_map = dict() #maps schools to their names
schools_probs_map = dict() #maps schools to their ridership probabilities
schools_customtimes_map = dict() #maps schools to custom pickup/dropoff intervals
schools.readline() #get rid of header
for cost_center in schools.readlines():
fields = cost_center.split(",")
if len(fields) < 6:
continue
schools_codes_map[fields[0]] = (fields[4].strip() + ";"
+ fields[5].strip())
schools_students_map[fields[0]] = set()
schools_names_map[fields[0]] = fields[1]
schools_starttimes_map[fields[0]] = timesecs(fields[2])
schools_endtimes_map[fields[0]] = timesecs(fields[3])
schools_probs_map[fields[0]] = 1.0
if fields[6] != "":
schools_probs_map[fields[0]] = float(fields[6].strip())/100
if len(fields) == 11:
schools_customtimes_map[fields[0]] = [-1, -1, -1, -1]
for i in range(7, 11):
if fields[i].strip() != "":
schools_customtimes_map[fields[0]][i - 7] = timesecs(fields[i])
schools.close()
#if we are doing fuzzy matching, figure out the actual school strings
#to use
if routing_type == 4:
exact_school_strings = []
for school_string in school_strings:
best_fuzzy_score = 0
best_name = ""
school_string = school_string.strip().upper()
for school in schools_names_map:
match_school_string = schools_names_map[school].strip().upper()
this_score = 0
for word in school_string.split():
this_score += (5*fuzz.partial_ratio(word, match_school_string) +
fuzz.token_sort_ratio(word, match_school_string))
if this_score > best_fuzzy_score:
best_fuzzy_score = this_score
best_name = schools_names_map[school]
print("School name to match: " + school_string)
print("Closest match: " + best_name)
exact_school_strings.append(best_name)
routing_type = 3
school_strings = exact_school_strings
#Prepare strings for easy identification later
for i in range(len(school_strings)):
school_strings[i] = school_strings[i].strip().upper()
if routing_type == 2:
school_strings[i] = int(school_strings[i])
#Associate the geocodes with their indices in the travel time matrix
geocodes = open(all_geocodes, 'r')
constants.GEOCODE_STRINGS = []
codes_inds_map = dict()
ind = 0
for code in geocodes.readlines():
constants.GEOCODE_STRINGS.append(code.strip())
codes_inds_map[code.strip()] = ind
ind += 1
geocodes.close()
#Store all of the geocodes in a KD tree for quick
#nearest-neighbor lookup
geocodes_list = []
for code in constants.GEOCODE_STRINGS:
this_code = code.split(";")
this_code[0] = float(this_code[0])
this_code[1] = float(this_code[1])
geocodes_list.append(this_code)
geocodes_list = np.array(geocodes_list)
constants.GEOCODE_KDTREE = spatial.KDTree(geocodes_list)
schools_inds_map = dict()
for school in schools_codes_map:
schools_inds_map[school] = fetch_ind(schools_codes_map[school],
codes_inds_map)
students = []
#Maintain a dictionary of school indices to schools so that
#school objects can be tested for equality.
ind_school_dict = dict()
#Maintain a set of all School objects to return
all_schools = set()
student_records = open(students_filename, 'r')
student_records.readline() #header
ind = 0 #keeping track of row to associate with students
for student_record in student_records.readlines():
ind += 1
fields = student_record.strip().split(",")
school_identifier = fields[6].strip()
stop_ind = fetch_ind(fields[1].strip() + ";" + fields[2].strip(),
codes_inds_map)
school_ind = fetch_ind(schools_codes_map[school_identifier],
codes_inds_map)
grade = fields[3].strip()
stud_sped = (fields[5].strip().upper() == "SP" or fields[5].strip() == "SE".upper())
#Not the type of student we are currently routing
if stud_sped != sped:
continue
#Not in the school we are currently routing
if routing_type > 1:
if routing_type == 2 and int(school.strip()) not in school_strings:
continue
if (routing_type == 3 and
schools_names_map[school_identifier].strip().upper() not in school_strings):
continue
age_type = 'Other'
try:
grade = int(grade)
except:
grade = -1
if int(grade) in constants.GRADES_TYPE_MAP:
age_type = constants.GRADES_TYPE_MAP[int(grade)]
if age_type == 'Other':
print("Unknown grade: " + str(grade))
if school_ind not in ind_school_dict:
starttime = 8*60*60 #default to 8AM start
endtime = 13*60*60 #default to 3PM finish
#None of the 19xxxxx schools have times, so use the defaults
if school_identifier in schools_starttimes_map:
starttime = schools_starttimes_map[school_identifier]
endtime = schools_endtimes_map[school_identifier]
name = schools_names_map[school_identifier]
prob = schools_probs_map[school_identifier]
ind_school_dict[school_ind] = School(school_identifier,
school_ind,
starttime,
endtime,
name,
ridership_probability = prob)
if school_identifier in schools_customtimes_map:
customtimes = schools_customtimes_map[school_identifier]
if customtimes[0] != -1:
ind_school_dict[school_ind].earliest_dropoff = customtimes[0]
if customtimes[1] != -1:
ind_school_dict[school_ind].latest_dropoff = customtimes[1]
if customtimes[2] != -1:
ind_school_dict[school_ind].earliest_pickup = customtimes[2]
if customtimes[3] != -1:
ind_school_dict[school_ind].latest_pickup = customtimes[3]
all_schools.add(ind_school_dict[school_ind])
this_student = Student(stop_ind, ind_school_dict[school_ind],
age_type, fields, ind, fields[0], sped)
students.append(this_student)
schools_students_map[school].add(this_student)
needs = fields[4].split(";")
#Add special needs
for need in needs:
#Splitting an empty string returns one empty string -
#no needs in this case
if len(need) == 0:
continue
if len(need) == 1:
#Most types of needs do not require extra info
assert (need in ["M", "W", "L", "A", "I", "F"]), ("Unknown need type"+str(need))
this_student.add_need(need)
else:
#Custom max travel time does require extra info
#Translate from minutes to seconds
assert (need[0] == "T"), ("Unknown need type"+str(need))
this_student.add_need(need[0], value = int(need[1:])*60/1.5)
student_records.close()
return students, schools_students_map, all_schools
def setup_map_data(mapdata_filename):
constants.TRAVEL_TIMES = np.load(constants.FILENAMES[3])*constants.TT_MULT
#bus_capacities is an input csv file where the first
#column is bus ID and the second is capacity.
def setup_buses(bus_filename, sped):
buses = []
bus_file = open(bus_filename, 'r')
bus_file.readline() #header
for bus_info in bus_file.readlines():
fields = bus_info.split(",")
cap = int(fields[1])
lift = (fields[2] == 'Y')
#Don't include wheelchair buses when routing non-special-ed
if lift and not sped:
continue
#By default, assume no wheelchair capacity.
min_wheel = 0
max_wheel = 0
if len(fields) == 5 and len(fields[3]) > 0 and len(fields[4]) > 0:
min_wheel = int(fields[3])
max_wheel = int(fields[4])
bus = Bus(cap, min_wheel, max_wheel, lift)
buses.append(bus)
bus_file.close()
buses = sorted(buses, key = lambda x:x.capacity)
return buses
#Sets up the stops based on the output of setup_students
#Populates unrouted_stops in the Schools
#Note: students with different cost centers may go to the same
#physical location. The loop variable "cost_cent" represents the cost
#center number, whereas student.school is the school object in memory.
#As a result, this function is the one that will associate different
#cost centers at the same location together.
def setup_stops(schools_students_map):
stops = set()
ttind_stop_map = dict()
for cost_cent in schools_students_map:
for student in schools_students_map[cost_cent]:
dict_key = student.tt_ind
if student.school not in ttind_stop_map:
ttind_stop_map[student.school] = dict()
if dict_key not in ttind_stop_map[student.school]:
new_stop = Stop(student.school)
ttind_stop_map[student.school][dict_key] = new_stop
stops.add(ttind_stop_map[student.school][dict_key])
student.school.unrouted_stops.add(new_stop)
ttind_stop_map[student.school][dict_key].add_student(student)
return stops
#Sets up the map from unmodified capacities to modified capacities.
def setup_mod_caps(mod_caps_filename):
constants.CAPACITY_MODIFIED_MAP = dict()
modcaps_file = open(mod_caps_filename, 'r')
modcaps_file.readline() #header
for modcap in modcaps_file.readlines():
fields = modcap.split(",")
orig_cap = int(fields[0])
resulting_caps = [int(fields[1]), int(fields[2]), int(fields[3])]
constants.CAPACITY_MODIFIED_MAP[orig_cap] = resulting_caps
#If we ever want to not worry about capacity, use virtual
#buses of capacity 10000.
constants.CAPACITY_MODIFIED_MAP[10000] = [10000, 10000, 10000]
modcaps_file.close()
def setup_parameters(parameters_filename, sped):
parameters_file = open(parameters_filename, 'r')
parameters_file.readline() #header
fields = parameters_file.readline().split(",")
if sped:
fields = parameters_file.readline().split(",")
constants.MAX_TIME = 60*float(fields[1])/float(fields[2])
constants.MSTT_WEIGHT = float(fields[3])
constants.MINUTES_PER_SEGMENT = float(fields[4])/2
constants.SLACK = float(fields[5])
constants.MAX_SCHOOL_DIST = float(fields[6])*60
parameters_file.close()
def setup_school_pairs(forbidden_pairs_filename, allowed_pairs_filename):
constants.ALLOWED_SCHOOL_PAIRS = set()
constants.FORBIDDEN_SCHOOL_PAIRS = set()
if forbidden_pairs_filename != "":
forbidden_file = open(forbidden_pairs_filename, 'r')
for forbidden_pair in forbidden_file.readlines():
fields = forbidden_pair.split(",")
if len(fields) < 2:
continue
constants.FORBIDDEN_SCHOOL_PAIRS.add((fields[0].strip(), fields[1].strip()))
constants.FORBIDDEN_SCHOOL_PAIRS.add((fields[1].strip(), fields[0].strip()))
forbidden_file.close()
if allowed_pairs_filename != "":
allowed_file = open(allowed_pairs_filename, 'r')
for allowed_pair in allowed_file.readlines():
fields = allowed_pair.split(",")
if len(fields) < 2:
continue
constants.ALLOWED_SCHOOL_PAIRS.add((fields[0].strip(), fields[1].strip()))
constants.ALLOWED_SCHOOL_PAIRS.add((fields[1].strip(), fields[0].strip()))
allowed_file.close()