-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_phrasefinder_prior.py
166 lines (152 loc) · 6.48 KB
/
create_phrasefinder_prior.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import os
import sys
import json
import urllib
import requests
import numpy as np
sys.path.insert(0, '..')
from data import get_classes_ordered
variables_file = 'variables.json'
with open(variables_file) as f:
config = json.load(f)
# ============================================================
# VARIABLES TO MODIFY
# ============================================================
path = config['project_folder'] + 'phrasefinder_prior/'
# ============================================================
def transform_obj(obj):
tweakedObj = [obj]
if obj == 'bell_pepper':
tweakedObj = ['bell pepper', 'green pepper', 'red pepper']
elif obj == 'cup':
tweakedObj = ['cup', 'mug']
elif obj == 'pot':
tweakedObj = ['pot', 'saucepan', 'pan']
elif obj == 'pan':
tweakedObj = ['pan', 'frying pan']
elif obj == 'eating_utensil':
tweakedObj = ['eating utensil', 'knife', 'spoon', 'fork']
elif obj == 'cooking_utensil':
tweakedObj = ['cooking utensil', 'knife', 'scissors', 'peeler',
'scale', 'jug', 'colander', 'strainer', 'blender']
elif obj == 'fridge_drawer':
tweakedObj = ['fridge drawer', 'refrigerator drawer']
elif obj == 'cutting_board':
tweakedObj = ['cutting board', 'cut board', 'chopping board',
'chop board']
elif obj == 'cheese_container':
tweakedObj = ['cheese container', 'cheese recipient', 'cheese package']
elif obj == 'oil_container':
tweakedObj = ['oil container', 'oil recipient', 'oil bottle']
elif obj == 'bread_container':
tweakedObj = ['bread container', 'bread recipient', 'bread package',
'bread bag']
elif obj == 'grocery_bag':
tweakedObj = ['grocery bag', 'groceries']
elif obj == 'seasoning_container':
tweakedObj = ['seasoning container', 'seasoning recipient',
'seasoning bottle', 'seasoning package']
elif obj == 'condiment_container':
tweakedObj = ['condiment container', 'condiment recipient',
'condiment bottle']
elif obj == 'tomato_container':
tweakedObj = ['tomato container', 'tomato recipient', 'tomato bottle']
elif obj == 'fridge':
tweakedObj = ['fridge', 'refrigerator']
elif obj == 'paper_towel':
tweakedObj = ['paper towel', 'tissue', 'kitchen paper',
'kitchen towel']
elif obj == 'cabinet':
tweakedObj = ['cabinet', 'locker', 'cupboard']
return tweakedObj
def transform_verb(verb):
tweakedVerb = [verb]
if verb == 'divide/pull apart':
tweakedVerb = ['divide', 'pull apart', 'separate', 'split', 'shred']
elif verb == 'move_around':
tweakedVerb = ['move around', 'move', 'transfer']
elif verb == 'take':
tweakedVerb = ['take', 'pick', 'pick up', 'grab']
elif verb == 'put':
tweakedVerb = ['put', 'leave', 'place']
elif verb == 'cut':
tweakedVerb = ['cut', 'slice', 'mince']
elif verb == 'wash':
tweakedVerb = ['wash', 'clean']
elif verb == 'mix':
tweakedVerb = ['mix', 'mingle', 'blend']
return tweakedVerb
if __name__ == '__main__':
if not os.path.exists(path):
os.makedirs(path)
# Get the set of verbs and objects
objects,_ = get_classes_ordered(config['objects_file'])
verbs,_ = get_classes_ordered(config['verbs_file'])
frequencies = dict()
# For each verb and object (and their synonyms)
for verb in verbs:
v = transform_verb(verb)
frequencies[verb] = dict()
for v_option in v:
for obj in objects:
if not obj in frequencies[verb]:
frequencies[verb][obj] = []
o = transform_obj(obj)
for o_option in o:
# Create and do the query
query = '{} ? {}'.format(v_option, o_option)
encoded_query = urllib.parse.quote(query)
params = {'corpus': 'eng-us', 'query': encoded_query,
'format': 'tsv'}
params = '&'.join(
'{}={}'.format(name, value)
for name, value in params.items()
)
response = requests.get(
'https://api.phrasefinder.io/search?' + params)
# Assert that the query was successful
assert response.status_code == 200
# Get the number of results for the query
if response.text != '':
text = response.text.split('\n')
acum = 0
for t in text:
os = o_option.split(' ')
count = 0
for _o in os:
if _o in t:
count += 1
if count == len(os):
name, s = t[:t.find('\t')], t[t.find('\t')+1:]
mc = int(s[:s.find('\t')]) # number of results
acum += mc
if acum > 0:
frequencies[verb][obj].append(acum)
# Save frequencies (number of instances) before averaging the results
# per action
with open(path + 'frequencies_raw.json', 'w') as f:
json.dump(frequencies, f, ensure_ascii=False, indent=4, sort_keys=True)
# Average the results of each action
action_priors = dict()
total = 0
for verb in frequencies.keys():
for obj in frequencies[verb].keys():
action = verb + ' ' + obj
# If no result
if not len(frequencies[verb][obj]):
action_priors[action] = 0.
else:
action_priors[action] = np.mean(frequencies[verb][obj])
# Accumulate the total
total += action_priors[action]
# Save frequencies (number of instances)
with open(path + 'frequencies.json', 'w') as f:
json.dump(action_priors, f, ensure_ascii=False,
indent=4, sort_keys=True)
# Normalies frequency to obtain a probability distribution
for action in action_priors.keys():
action_priors[action] = action_priors[action] / float(total)
# Save prior
with open(path + 'action_prior.json', 'w') as f:
json.dump(action_priors, f, ensure_ascii=False,
indent=4, sort_keys=True)