hsaest commited on
Commit
cd6ca15
1 Parent(s): 60276d5

Update eval.py

Browse files
Files changed (1) hide show
  1. eval.py +31 -10
eval.py CHANGED
@@ -37,6 +37,16 @@ def statistics(commonsense_statistic):
37
  return result
38
 
39
 
 
 
 
 
 
 
 
 
 
 
40
  def eval_score(validation_or_test: str, file_path: str, TOKEN):
41
 
42
  if validation_or_test == 'validation':
@@ -76,10 +86,6 @@ def eval_score(validation_or_test: str, file_path: str, TOKEN):
76
  commonsenseConstraint_statistic[query_data['level']][query_data['days']].append(commonsense_info_box)
77
  hardConstraint_statistic[query_data['level']][query_data['days']].append(hard_info_box)
78
 
79
- commonsenseConstraint_statistic_processed = statistics(commonsenseConstraint_statistic)
80
- hardConstraint_statistic_processed = statistics(hardConstraint_statistic)
81
- # print(commonsenseConstraint_statistic_processed)
82
- # print(hardConstraint_statistic_processed)
83
  constraint_record = {key: {day: {'house rule':0, 'cuisine':0, 'room type':0, 'transportation':0} for day in [3,5,7]} for key in ['medium','hard']}
84
  constraint_mapping = {'house rule':'valid_room_rule','cuisine':'valid_cuisine','room type':'valid_room_type','transportation':'valid_transportation'}
85
  mapping_constraint_record = {key: {day: {'valid_room_rule':0, 'valid_cuisine':0, 'valid_room_type':0, 'valid_transportation':0} for day in [3,5,7]} for key in ['medium','hard']}
@@ -91,10 +97,15 @@ def eval_score(validation_or_test: str, file_path: str, TOKEN):
91
  if unit['local_constraint'][key] != None:
92
  constraint_record[unit['level']][unit['days']][key] += 1
93
  mapping_constraint_record[unit['level']][unit['days']][constraint_mapping[key]] += 1
 
 
 
 
94
 
95
  data_record = {key:{day:[] for day in [3,5,7]} for key in ['easy','medium','hard']}
96
 
97
  constraint_dis_record = {"commonsense":{"pass":0,"total":0},"hard":{"pass":0,"total":0}}
 
98
 
99
  for constraint in ['commonsense','hard']:
100
  if constraint == 'commonsense':
@@ -105,11 +116,7 @@ def eval_score(validation_or_test: str, file_path: str, TOKEN):
105
  key_dict = {'commonsense':['is_valid_information_in_current_city','is_valid_information_in_sandbox','is_reasonalbe_visiting_city','is_valid_restaurants','is_valid_transportation','is_valid_attractions','is_valid_accommodation','is_not_absent'],'hard':['valid_cost','valid_room_rule','valid_cuisine','valid_room_type','valid_transportation']}
106
 
107
  for key in constraint_statistic:
108
- # level
109
  for key2 in constraint_statistic[key]:
110
- # day
111
- # print(key2)
112
- # key2 = eval(key2)
113
  if key2 == -1:
114
  print(constraint_statistic[key])
115
  exit(0)
@@ -121,17 +128,29 @@ def eval_score(validation_or_test: str, file_path: str, TOKEN):
121
  if key == 'hard' and key3 in ['valid_room_rule','valid_cuisine','valid_room_type','valid_transportation']:
122
  data_record[key][key2][-1] = f"{constraint_statistic[key][key2][key3]['true']}/{mapping_constraint_record[key][key2][key3]}"
123
  constraint_dis_record[constraint]['total'] += mapping_constraint_record[key][key2][key3]
 
 
 
124
  elif key == 'medium' and key3 in ['valid_room_rule','valid_cuisine','valid_room_type']:
125
  data_record[key][key2][-1] = f"{constraint_statistic[key][key2][key3]['true']}/{mapping_constraint_record[key][key2][key3]}"
126
  constraint_dis_record[constraint]['total'] += mapping_constraint_record[key][key2][key3]
 
 
 
127
  else:
128
  data_record[key][key2][-1] = f"{constraint_statistic[key][key2][key3]['true']}/{count_record[key][key2]}"
129
  if key3 in ['valid_cost','valid_visitng_city_number','valid_days']:
130
  constraint_dis_record[constraint]['total'] += count_record[key][key2]
 
 
 
 
131
  else:
132
  data_record[key][key2][-1] = f"{constraint_statistic[key][key2][key3]['true']}/{count_record[key][key2]}"
133
  constraint_dis_record[constraint]['total'] += count_record[key][key2]
134
-
 
 
135
  final_all_cnt = 0
136
  final_commonsense_cnt = 0
137
  final_hardConstraint_cnt = 0
@@ -161,6 +180,8 @@ def eval_score(validation_or_test: str, file_path: str, TOKEN):
161
 
162
  result = {}
163
 
 
 
164
  if validation_or_test == 'validation':
165
  result['Delivery Rate'] = delivery_cnt / 180
166
  result['Commonsense Constraint Micro Pass Rate'] = constraint_dis_record['commonsense']['pass'] / 1440
@@ -177,5 +198,5 @@ def eval_score(validation_or_test: str, file_path: str, TOKEN):
177
  result['Hard Constraint Macro Pass Rate'] = final_hardConstraint_cnt / 1000
178
  result['Final Pass Rate'] = final_all_cnt / 1000
179
 
180
- return result
181
 
 
37
  return result
38
 
39
 
40
+ def paper_term_mapping(commonsense_constraint_record, hard_constraint_record):
41
+ mapping_dict = {'is_valid_information_in_current_city':'Within Current City','is_valid_information_in_sandbox':'Within Sandbox','is_reasonalbe_visiting_city':'Reasonable City Route','is_valid_restaurants':'Diverse Restaurants','is_valid_transportation':'Non-conf. Transportation','is_valid_attractions':'Diverse Attractions','is_valid_accommodation':'Minimum Nights Stay','is_not_absent':'Complete Information','valid_cost':'Budget','valid_room_rule':'Room Rule','valid_cuisine':'Cuisine','valid_room_type':'Room Type','valid_transportation':'Transportation'}
42
+ remap_commonsense_constraint_record = {level:{day:{} for day in [3,5,7]} for level in ['easy','medium','hard']}
43
+ remap_hard_constraint_record = {level:{day:{} for day in [3,5,7]} for level in ['easy','medium','hard']}
44
+ for level in commonsense_constraint_record:
45
+ for day in commonsense_constraint_record[level]:
46
+ remap_commonsense_constraint_record[level][day] = {mapping_dict[key] : val for key,val in commonsense_constraint_record[level][day].items()}
47
+ remap_hard_constraint_record[level][day] = {mapping_dict[key] : val for key,val in hard_constraint_record[level][day].items()}
48
+ return remap_commonsense_constraint_record, remap_hard_constraint_record
49
+
50
  def eval_score(validation_or_test: str, file_path: str, TOKEN):
51
 
52
  if validation_or_test == 'validation':
 
86
  commonsenseConstraint_statistic[query_data['level']][query_data['days']].append(commonsense_info_box)
87
  hardConstraint_statistic[query_data['level']][query_data['days']].append(hard_info_box)
88
 
 
 
 
 
89
  constraint_record = {key: {day: {'house rule':0, 'cuisine':0, 'room type':0, 'transportation':0} for day in [3,5,7]} for key in ['medium','hard']}
90
  constraint_mapping = {'house rule':'valid_room_rule','cuisine':'valid_cuisine','room type':'valid_room_type','transportation':'valid_transportation'}
91
  mapping_constraint_record = {key: {day: {'valid_room_rule':0, 'valid_cuisine':0, 'valid_room_type':0, 'valid_transportation':0} for day in [3,5,7]} for key in ['medium','hard']}
 
97
  if unit['local_constraint'][key] != None:
98
  constraint_record[unit['level']][unit['days']][key] += 1
99
  mapping_constraint_record[unit['level']][unit['days']][constraint_mapping[key]] += 1
100
+
101
+ commonsenseConstraint_statistic_processed = statistics(commonsenseConstraint_statistic)
102
+ hardConstraint_statistic_processed = statistics(hardConstraint_statistic)
103
+
104
 
105
  data_record = {key:{day:[] for day in [3,5,7]} for key in ['easy','medium','hard']}
106
 
107
  constraint_dis_record = {"commonsense":{"pass":0,"total":0},"hard":{"pass":0,"total":0}}
108
+ constraint_count = {key:{day:{} for day in [3,5,7]} for key in ['easy','medium','hard']}
109
 
110
  for constraint in ['commonsense','hard']:
111
  if constraint == 'commonsense':
 
116
  key_dict = {'commonsense':['is_valid_information_in_current_city','is_valid_information_in_sandbox','is_reasonalbe_visiting_city','is_valid_restaurants','is_valid_transportation','is_valid_attractions','is_valid_accommodation','is_not_absent'],'hard':['valid_cost','valid_room_rule','valid_cuisine','valid_room_type','valid_transportation']}
117
 
118
  for key in constraint_statistic:
 
119
  for key2 in constraint_statistic[key]:
 
 
 
120
  if key2 == -1:
121
  print(constraint_statistic[key])
122
  exit(0)
 
128
  if key == 'hard' and key3 in ['valid_room_rule','valid_cuisine','valid_room_type','valid_transportation']:
129
  data_record[key][key2][-1] = f"{constraint_statistic[key][key2][key3]['true']}/{mapping_constraint_record[key][key2][key3]}"
130
  constraint_dis_record[constraint]['total'] += mapping_constraint_record[key][key2][key3]
131
+ # hardConstraint_statistic_processed[key][key2][key3]['failed delivery'] = commonsenseConstraint_statistic_processed[key][key2]['is_valid_information_in_current_city']['failed delivery']
132
+ # hardConstraint_statistic_processed[key][key2][key3]['illegal'] = mapping_constraint_record[key][key2][key3] - hardConstraint_statistic_processed[key][key2][key3]['failed delivery'] - constraint_statistic[key][key2][key3]['true'] - constraint_statistic[key][key2][key3]['false']
133
+ hardConstraint_statistic_processed[key][key2][key3]['total'] = mapping_constraint_record[key][key2][key3]
134
  elif key == 'medium' and key3 in ['valid_room_rule','valid_cuisine','valid_room_type']:
135
  data_record[key][key2][-1] = f"{constraint_statistic[key][key2][key3]['true']}/{mapping_constraint_record[key][key2][key3]}"
136
  constraint_dis_record[constraint]['total'] += mapping_constraint_record[key][key2][key3]
137
+ # hardConstraint_statistic_processed[key][key2][key3]['failed delivery'] = commonsenseConstraint_statistic_processed[key][key2]['is_valid_information_in_current_city']['failed delivery']
138
+ # hardConstraint_statistic_processed[key][key2][key3]['illegal'] = mapping_constraint_record[key][key2][key3] - hardConstraint_statistic_processed[key][key2][key3]['failed delivery'] - constraint_statistic[key][key2][key3]['true'] - constraint_statistic[key][key2][key3]['false']
139
+ hardConstraint_statistic_processed[key][key2][key3]['total'] = mapping_constraint_record[key][key2][key3]
140
  else:
141
  data_record[key][key2][-1] = f"{constraint_statistic[key][key2][key3]['true']}/{count_record[key][key2]}"
142
  if key3 in ['valid_cost','valid_visitng_city_number','valid_days']:
143
  constraint_dis_record[constraint]['total'] += count_record[key][key2]
144
+ constraint_count[key][key2][key3] = count_record[key][key2]
145
+ # hardConstraint_statistic_processed[key][key2][key3]['failed delivery'] = commonsenseConstraint_statistic_processed[key][key2]['is_valid_information_in_current_city']['failed delivery']
146
+ # hardConstraint_statistic_processed[key][key2][key3]['illegal'] = count_record[key][key2] - hardConstraint_statistic_processed[key][key2][key3]['failed delivery'] - constraint_statistic[key][key2][key3]['true'] - constraint_statistic[key][key2][key3]['false']
147
+ hardConstraint_statistic_processed[key][key2][key3]['total'] = count_record[key][key2]
148
  else:
149
  data_record[key][key2][-1] = f"{constraint_statistic[key][key2][key3]['true']}/{count_record[key][key2]}"
150
  constraint_dis_record[constraint]['total'] += count_record[key][key2]
151
+ constraint_count[key][key2][key3] = count_record[key][key2]
152
+ # commonsenseConstraint_statistic_processed[key][key2][key3]['failed delivery'] = count_record[key][key2] - constraint_statistic[key][key2][key3]['true'] - constraint_statistic[key][key2][key3]['false']
153
+ commonsenseConstraint_statistic_processed[key][key2][key3]['total'] = count_record[key][key2]
154
  final_all_cnt = 0
155
  final_commonsense_cnt = 0
156
  final_hardConstraint_cnt = 0
 
180
 
181
  result = {}
182
 
183
+ remap_commonsense_constraint_record, remap_hard_constraint_record = paper_term_mapping(commonsenseConstraint_statistic_processed, hardConstraint_statistic_processed)
184
+
185
  if validation_or_test == 'validation':
186
  result['Delivery Rate'] = delivery_cnt / 180
187
  result['Commonsense Constraint Micro Pass Rate'] = constraint_dis_record['commonsense']['pass'] / 1440
 
198
  result['Hard Constraint Macro Pass Rate'] = final_hardConstraint_cnt / 1000
199
  result['Final Pass Rate'] = final_all_cnt / 1000
200
 
201
+ return result, {"Commonsense Constraint":remap_commonsense_constraint_record, "Hard Constraint":remap_hard_constraint_record}
202