TastyPiano / src /cocktails /utilities /other_scrubbing_utilities.py
Cédric Colas
initial commit
e775f6d
raw
history blame
10.3 kB
import numpy as np
import pickle
from src.cocktails.utilities.cocktail_utilities import get_profile, profile_keys
from src.cocktails.utilities.ingredients_utilities import extract_ingredients, ingredient_list, ingredient_profiles
from src.cocktails.utilities.glass_and_volume_utilities import glass_volume, volume_ranges
one_dash = 1
one_splash = 6
one_tablespoon = 15
one_barspoon = 5
fill_rate = 0.8
quantity_factors ={'ml':1,
'cl':10,
'splash':one_splash,
'splashes':one_splash,
'dash':one_dash,
'dashes':one_dash,
'spoon':one_barspoon,
'spoons':one_barspoon,
'tablespoon':one_tablespoon,
'barspoons':one_barspoon,
'barspoon':one_barspoon,
'bar spoons': one_barspoon,
'bar spoon': one_barspoon,
'tablespoons':one_tablespoon,
'teaspoon':5,
'teaspoons':5,
'drop':0.05,
'drops':0.05}
quantitiy_keys = sorted(quantity_factors.keys())
indexes_keys = np.flip(np.argsort([len(k) for k in quantitiy_keys]))
quantity_factors_keys = list(np.array(quantitiy_keys)[indexes_keys])
keys_to_track = ['names', 'urls', 'glass', 'garnish', 'recipe', 'how_to', 'review', 'taste_rep', 'valid']
keys_to_add = ['category', 'subcategory', 'ingredients_str', 'ingredients', 'quantities', 'to_keep']
keys_to_update = ['glass']
keys_for_csv = ['names', 'category', 'subcategory', 'ingredients_str', 'urls', 'glass', 'garnish', 'how_to', 'review', 'taste_rep'] + profile_keys
to_replace_q = {' fresh': ''}
to_replace_ing = {'maple syrup': 'honey syrup',
'agave syrup': 'honey syrup',
'basil': 'mint'}
def print_recipe(unit='mL', ingredient_str=None, ingredients=None, quantities=None, name='', cat='', to_print=True):
str_out = ''
if ingredient_str is None:
assert len(ingredients) == len(quantities), 'provide either ingredient_str, or list ingredients and quantities'
else:
assert ingredients is None and quantities is None, 'provide either ingredient_str, or list ingredients and quantities'
ingredients, quantities = extract_ingredients(ingredient_str)
str_out += f'\nRecipe:'
if name != '' and name is not None: str_out += f' {name}'
if cat != '': str_out += f' ({cat})'
str_out += '\n'
for i in range(len(ingredients)):
# get quantifier
if ingredients[i] == 'egg':
quantities[i] = 1
ingredients[i] = 'egg white'
if unit == 'mL':
quantifier = ' (30 mL)'
elif unit == 'oz':
quantifier = ' (1 fl oz)'
else:
raise ValueError
elif ingredients[i] in ['angostura', 'orange bitters']:
quantities[i] = max(1, int(quantities[i] / 0.6))
quantifier = ' dash'
if quantities[i] > 1: quantifier += 'es'
elif ingredients[i] == 'mint':
if quantities[i] > 1: quantifier = ' leaves'
else: quantifier = ' leaf'
else:
if unit == "oz":
quantities[i] = float(f"{quantities[i] * 0.033814:.3f}") # convert to fl oz
quantifier = ' fl oz'
else:
quantifier = ' mL'
str_out += f' {quantities[i]}{quantifier} - {ingredients[i]}\n'
if to_print:
print(str_out)
return str_out
def test_datapoint(datapoint, category, ingredients, quantities):
# run checks
ingredient_indexes = [ingredient_list.index(ing) for ing in ingredients]
profile = get_profile(category, ingredients, quantities)
volume = profile['end volume']
alcohol = profile['end alcohol']
acid = profile['end acid']
sugar = profile['end sugar']
# check volume
if datapoint['glass'] != None:
if volume > glass_volume[datapoint['glass']] * fill_rate:
# recompute quantities for it to match
ratio = fill_rate * glass_volume[datapoint['glass']] / volume
for i_q in range(len(quantities)):
quantities[i_q] = float(f'{quantities[i_q] * ratio:.2f}')
# check alcohol
assert alcohol < 30, 'too boozy'
assert alcohol < 5, 'not boozy enough'
assert acid < 2, 'too much acid'
assert sugar < 20, 'too much sugar'
assert len(ingredients) > 1, 'only one ingredient'
if len(set(ingredients)) != len(ingredients):
i_doubles = []
s_ing = set()
for i, ing in enumerate(ingredients):
if ing in s_ing:
i_doubles.append(i)
else:
s_ing.add(ing)
ingredient_double_ok = ['mint', 'cointreau', 'lemon juice', 'cuban rum', 'double syrup']
if len(i_doubles) == 1 and ingredients[i_doubles[0]] in ingredient_double_ok:
ing_double = ingredients[i_doubles[0]]
double_q = np.sum([quantities[i] for i in range(len(ingredients)) if ingredients[i] == ing_double])
ingredients.pop(i_doubles[0])
quantities.pop(i_doubles[0])
quantities[ingredients.index(ing_double)] = double_q
else:
assert False, f'double ingredient, not {ingredient_double_ok}'
lemon_lime_q = np.sum([quantities[i] for i in range(len(ingredients)) if ingredients[i] in ['lime juice', 'lemon juice']])
assert lemon_lime_q <= 45, 'too much lemon and lime'
salt_q = np.sum([quantities[i] for i in range(len(ingredients)) if ingredients[i] == 'salt'])
assert salt_q <= 8, 'too much salt'
bitter_q = np.sum([quantities[i] for i in range(len(ingredients)) if ingredients[i] in ['angostura', 'orange bitters']])
assert bitter_q <= 5 * one_dash, 'too much bitter'
absinthe_q = np.sum([quantities[i] for i in range(len(ingredients)) if ingredients[i] == 'absinthe'])
if absinthe_q > 4 * one_dash:
mix_volume = np.sum([quantities[i] for i in range(len(ingredients)) if ingredients[i] != 'mint'])
assert absinthe_q < 0.5 * mix_volume, 'filter absinthe glasses'
if any([w in datapoint['how_to'] or any([w in ing.lower() for ing in datapoint['recipe'][1]]) for w in ['warm', 'boil', 'hot']]) and 'shot' not in datapoint['how_to']:
assert False
water_q = np.sum([quantities[i] for i in range(len(ingredients)) if ingredients[i] == 'water'])
assert water_q < 40
# n_liqueur = np.sum([ingredient_profiles['type'][i].lower() == 'liqueur' for i in ingredient_indexes])
# assert n_liqueur <= 2
n_liqueur_and_vermouth = np.sum([ingredient_profiles['type'][i].lower() in ['liqueur', 'vermouth'] for i in ingredient_indexes])
assert n_liqueur_and_vermouth <= 3
return ingredients, quantities
def run_battery_checks_difford(datapoint, category, ingredients, quantities):
flag = False
try:
ingredients, quantities = test_datapoint(datapoint, category, ingredients, quantities)
except:
flag = True
print(datapoint["names"])
print(datapoint["urls"])
ingredients, quantities = None, None
return flag, ingredients, quantities
def tambouille(q, ingredients_scrubbed, quantities_scrubbed, cat):
# ugly
ing_scrubbed = ingredients_scrubbed[len(quantities_scrubbed)]
if q == '4 cube' and ing_scrubbed == 'pineapple juice':
q = '20 ml'
elif 'top up with' in q:
volume_so_far = np.sum([quantities_scrubbed[i] for i in range(len(quantities_scrubbed)) if ingredients_scrubbed[i] != 'mint'])
volume_mix = np.sum(volume_ranges[cat]) / 2
if (volume_mix - volume_so_far) < 15:
q = '15 ml'#
else:
q = str(int(volume_mix - volume_so_far)) + ' ml'
elif q == '1 pinch' and ing_scrubbed == 'salt':
q = '2 drops'
elif 'cube' in q and ing_scrubbed == 'double syrup':
q = f'{float(q.split(" ")[0]) * 2 * 1.7:.2f} ml' #2g per cube, 1.7 is ratio solid / syrup
elif 'wedge' in q:
if ing_scrubbed == 'orange juice':
vol = 70
elif ing_scrubbed == 'lime juice':
vol = 30
elif ing_scrubbed == 'lemon juice':
vol = 45
elif ing_scrubbed == 'pineapple juice':
vol = 140
factor = float(q.split(' ')[0]) * 0.15 # consider a wedge to be 0.15*the fruit.
q = f'{factor * vol:.2f} ml'
elif 'slice' in q:
if ing_scrubbed == 'orange juice':
vol = 70
elif ing_scrubbed == 'lime juice':
vol = 30
elif ing_scrubbed == 'lemon juice':
vol = 45
elif ing_scrubbed == 'pineapple juice':
vol = 140
f = q.split(' ')[0]
if len(f.split('⁄')) > 1:
frac = f.split('⁄')
factor = float(frac[0]) / float(frac[1])
else:
factor = float(f)
factor *= 0.1 # consider a slice to be 0.1*the fruit.
q = f'{factor * vol:.2f} ml'
elif q == '1 whole' and ing_scrubbed == 'luxardo maraschino':
q = '10 ml'
elif ing_scrubbed == 'egg' and 'ml' not in q:
q = f'{float(q) * 30:.2f} ml' # 30 ml per egg
return q
def compute_eucl_dist(a, b):
return np.sqrt(np.sum((a - b)**2))
def evaluate_with_quadruplets(representations, strategy='all'):
with open(QUADRUPLETS_PATH, 'rb') as f:
data = pickle.load(f)
data = list(data.values())
quadruplets = []
if strategy != 'all':
for d in data:
if d[0] == strategy:
quadruplets.append(d[1:])
elif strategy == 'all':
for d in data:
quadruplets.append(d[1:])
else:
raise ValueError
scores = []
for q in quadruplets:
close = q[0]
if len(close) == 2:
far = q[1]
distance_close = compute_eucl_dist(representations[close[0]], representations[close[1]])
distances_far = [compute_eucl_dist(representations[far[i][0]], representations[far[i][1]]) for i in range(len(far))]
scores.append(distance_close < np.min(distances_far))
if len(scores) == 0:
score = np.nan
else:
score = np.mean(scores)
return score