-
Notifications
You must be signed in to change notification settings - Fork 206
/
Copy path01_chipotle_homework_solution.py
112 lines (84 loc) · 3.4 KB
/
01_chipotle_homework_solution.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
'''
SOLUTION FILE: Homework with Chipotle data
https://github.com/TheUpshot/chipotle
'''
'''
PART 1: read in the data, parse it, and store it in a list of lists called 'data'
Hint: this is a tsv file, and csv.reader() needs to be told how to handle it
'''
import csv
# specify that the delimiter is a tab character
with open('chipotle_orders.tsv', 'rU') as f:
data = [row for row in csv.reader(f, delimiter='\t')]
'''
PART 2: separate the header and data into two different lists
'''
header = data[0]
data = data[1:]
'''
PART 3: calculate the average price of an order
Hint: examine the data to see if the 'quantity' column is relevant to this calculation
Hint: work smarter, not harder! (this can be done in a few lines of code)
'''
# count the number of unique order_id's
# note: you could assume this is 1834 because that's the maximum order_id, but it's best to check
num_orders = len(set([row[0] for row in data])) # 1834
# create a list of prices
# note: ignore the 'quantity' column because the 'item_price' takes quantity into account
prices = [float(row[4][1:-1]) for row in data] # strip the dollar sign and trailing space
# calculate the average price of an order and round to 2 digits
round(sum(prices) / num_orders, 2) # $18.81
'''
PART 4: create a list (or set) of all unique sodas and soft drinks that they sell
Note: just look for 'Canned Soda' and 'Canned Soft Drink', and ignore other drinks like 'Izze'
'''
# if 'item_name' includes 'Canned', append 'choice_description' to 'sodas' list
sodas = []
for row in data:
if 'Canned' in row[2]:
sodas.append(row[3][1:-1]) # strip the brackets
# create a set of unique sodas
unique_sodas = set(sodas)
'''
PART 5: calculate the average number of toppings per burrito
Note: let's ignore the 'quantity' column to simplify this task
Hint: think carefully about the easiest way to count the number of toppings
Hint: 'hello there'.count('e')
'''
# keep a running total of burritos and toppings
burrito_count = 0
topping_count = 0
# calculate number of toppings by counting the commas and adding 1
# note: x += 1 is equivalent to x = x + 1
for row in data:
if 'Burrito' in row[2]:
burrito_count += 1
topping_count += (row[3].count(',') + 1)
# calculate the average topping count and round to 2 digits
round(topping_count / float(burrito_count), 2) # 5.40
'''
PART 6: create a dictionary in which the keys represent chip orders and
the values represent the total number of orders
Expected output: {'Chips and Roasted Chili-Corn Salsa': 18, ... }
Note: please take the 'quantity' column into account!
Advanced: learn how to use 'defaultdict' to simplify your code
'''
# start with an empty dictionary
chips = {}
# if chip order is not in dictionary, then add a new key/value pair
# if chip order is already in dictionary, then update the value for that key
for row in data:
if 'Chips' in row[2]:
if row[2] not in chips:
chips[row[2]] = int(row[1]) # this is a new key, so create key/value pair
else:
chips[row[2]] += int(row[1]) # this is an existing key, so add to the value
# defaultdict saves you the trouble of checking whether a key already exists
from collections import defaultdict
dchips = defaultdict(int)
for row in data:
if 'Chips' in row[2]:
dchips[row[2]] += int(row[1])
'''
BONUS: think of a question about this data that interests you, and then answer it!
'''