File size: 9,271 Bytes
e775f6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
import numpy as np
from src.cocktails.utilities.ingredients_utilities import ingredient2ingredient_id, ingredient_profiles, ingredients_per_type, ingredient_list, find_ingredient_from_str
from src.cocktails.utilities.cocktail_category_detection_utilities import *
import time

# representation_keys = ['pH', 'sour', 'sweet', 'booze', 'bitter', 'fruit', 'herb',
#                        'complex', 'spicy', 'strong', 'oaky', 'fizzy', 'colorful', 'eggy']
representation_keys = ['sour', 'sweet', 'booze', 'bitter', 'fruit', 'herb',
                       'complex', 'spicy', 'oaky', 'fizzy', 'colorful', 'eggy']
representation_keys_linear = list(set(representation_keys) - set(['pH', 'complex']))

ing_reps = np.array([[ingredient_profiles[k][ing_id] for ing_id in ingredient2ingredient_id.values()] for k in representation_keys]).transpose()


def compute_cocktail_representation(profile, ingredients, quantities):
    # computes representation of a cocktail from the recipe (ingredients, quantities) and volume
    n = len(ingredients)
    assert n == len(quantities)
    quantities = np.array(quantities)

    weights = quantities / np.sum(quantities)
    rep = dict()

    ing_ids = np.array([ingredient2ingredient_id[ing] for ing in ingredients])
    # compute features as linear combination of ingredient features
    for k in representation_keys_linear:
        k_ing = np.array([ingredient_profiles[k][ing_id] for ing_id in ing_ids])
        rep[k] = np.dot(weights, k_ing)

    # for ph
    # ph = - log10 x
    phs = np.array([ingredient_profiles['pH'][ing_id] for ing_id in ing_ids])
    concentrations = 10 ** (- phs)
    mix_c = np.dot(weights, concentrations)

    rep['pH'] = - np.log10(mix_c)

    rep['complex'] = np.mean([ingredient_profiles['complex'][ing_id] for ing_id in ing_ids]) + len(ing_ids)

    # compute profile after dilution
    volume_ratio = profile['mix volume'] / profile['end volume']
    for k in representation_keys:
        rep['end ' + k] = rep[k] * volume_ratio
    concentration = 10 ** (-rep['pH'])
    end_concentration = concentration * volume_ratio
    rep['end pH'] = - np.log10(end_concentration)
    return rep

def get_alcohol_profile(ingredients, quantities):
    ingredients = ingredients.copy()
    quantities = quantities.copy()
    assert len(ingredients) == len(quantities)
    if 'mint' in ingredients:
        mint_ind = ingredients.index('mint')
        ingredients.pop(mint_ind)
        quantities.pop(mint_ind)
    alcohol = []
    volume_mix = np.sum(quantities)
    weights = quantities / volume_mix
    assert np.abs(np.sum(weights) - 1) < 1e-4
    ingredients_list = [ing.lower() for ing in ingredient_list]
    for ing, q in zip(ingredients, quantities):
        id = ingredients_list.index(ing)
        alcohol.append(ingredient_profiles['ethanol'][id])
    alcohol = np.dot(alcohol, weights)
    return alcohol, volume_mix

def get_mix_profile(ingredients, quantities):
    ingredients = ingredients.copy()
    quantities = quantities.copy()
    assert len(ingredients) == len(quantities)
    if 'mint' in ingredients:
        mint_ind = ingredients.index('mint')
        ingredients.pop(mint_ind)
        quantities.pop(mint_ind)
    alcohol, sugar, acid = [], [], []
    volume_mix = np.sum(quantities)
    weights = quantities / volume_mix
    assert np.abs(np.sum(weights) - 1) < 1e-4
    ingredients_list = [ing.lower() for ing in ingredient_list]
    for ing, q in zip(ingredients, quantities):
        id = ingredients_list.index(ing)
        sugar.append(ingredient_profiles['sugar'][id])
        alcohol.append(ingredient_profiles['ethanol'][id])
        acid.append(ingredient_profiles['acid'][id])
    sugar = np.dot(sugar, weights)
    acid = np.dot(acid, weights)
    alcohol = np.dot(alcohol, weights)
    return alcohol, sugar, acid


def extract_preparation_type(instructions, recipe):
    flag = False
    instructions = instructions.lower()
    egg_in_recipe = any([find_ingredient_from_str(ing_str)[1]=='egg' for ing_str in recipe[1]])
    if 'shake' in instructions:
        if egg_in_recipe:
            prep_type = 'egg_shaken'
        else:
            prep_type = 'shaken'
    elif 'stir' in instructions:
        prep_type = 'stirred'
    elif 'blend' in instructions:
        prep_type = 'blended'
    elif any([w in instructions for w in ['build', 'mix', 'pour', 'combine', 'place']]):
        prep_type = 'built'
    else:
        prep_type = 'built'
    if egg_in_recipe and 'shaken' not in prep_type:
        stop = 1
    return flag, prep_type

def get_dilution_ratio(category, alcohol):
    # formulas from the Liquid Intelligence book
    # The formula for built was invented
    if category == 'stirred':
        return -1.21 * alcohol**2 + 1.246 * alcohol + 0.145
    elif category in ['shaken', 'egg_shaken']:
        return -1.567 * alcohol**2 + 1.742 * alcohol + 0.203
    elif category == 'built':
        return (-1.21 * alcohol**2 + 1.246 * alcohol + 0.145) /2
    else:
        return 1

def get_cocktail_rep(category, ingredients, quantities, keys):
    ingredients = ingredients.copy()
    quantities = quantities.copy()
    assert len(ingredients) == len(quantities)

    volume_mix = np.sum([quantities[i] for i in range(len(ingredients)) if ingredients[i] != 'mint'])

    # compute alcohol content without mint ingredient
    ingredients2 = [ing for ing in ingredients if ing != 'mint']
    quantities2 = [q for ing, q in zip(ingredients, quantities) if ing != 'mint']
    weights2 = quantities2 / np.sum(quantities2)
    assert np.abs(np.sum(weights2) - 1) < 1e-4
    ing_ids2 = np.array([ingredient2ingredient_id[ing] for ing in ingredients2])
    alcohol = np.array([ingredient_profiles['ethanol'][ing_id] for ing_id in ing_ids2])
    alcohol = np.dot(alcohol, weights2)
    dilution_ratio = get_dilution_ratio(category, alcohol)
    end_volume = volume_mix + volume_mix * dilution_ratio
    volume_ratio = volume_mix / end_volume
    end_alcohol = alcohol * volume_ratio

    # computes representation of a cocktail from the recipe (ingredients, quantities) and volume
    weights = quantities / np.sum(quantities)
    assert np.abs(np.sum(weights) - 1) < 1e-4
    ing_ids = np.array([ingredient2ingredient_id[ing] for ing in ingredients])
    reps = ing_reps[ing_ids]
    cocktail_rep = np.dot(weights, reps)
    i_complex = keys.index('end complex')
    cocktail_rep[i_complex] = np.mean(reps[:, i_complex]) + len(ing_ids)  # complexity increases with number of ingredients

    # compute profile after dilution
    cocktail_rep = cocktail_rep * volume_ratio
    cocktail_rep = np.concatenate([[end_volume], cocktail_rep])
    return cocktail_rep, end_volume, end_alcohol

def get_profile(category, ingredients, quantities):

    volume_mix = np.sum([quantities[i] for i in range(len(ingredients)) if ingredients[i] != 'mint'])
    alcohol, sugar, acid = get_mix_profile(ingredients, quantities)
    dilution_ratio = get_dilution_ratio(category, alcohol)
    end_volume = volume_mix + volume_mix * dilution_ratio
    volume_ratio = volume_mix / end_volume
    profile = {'mix volume': volume_mix,
               'mix alcohol': alcohol,
               'mix sugar': sugar,
               'mix acid': acid,
               'dilution ratio': dilution_ratio,
               'end volume': end_volume,
               'end alcohol': alcohol * volume_ratio,
               'end sugar': sugar * volume_ratio,
               'end acid': acid * volume_ratio}
    cocktail_rep = compute_cocktail_representation(profile, ingredients, quantities)
    profile.update(cocktail_rep)
    return profile

profile_keys = ['mix volume', 'end volume',
                'dilution ratio',
                'mix alcohol', 'end alcohol',
                'mix sugar', 'end sugar',
                'mix acid', 'end acid'] \
               + representation_keys \
               + ['end ' + k for k in representation_keys]

def update_profile_in_datapoint(datapoint, category, ingredients, quantities):
    profile = get_profile(category, ingredients, quantities)
    for k in profile_keys:
        datapoint[k] = profile[k]
    return datapoint

# define representation keys
def get_bunch_of_rep_keys():
    dict_rep_keys = dict()
    # all
    rep_keys = profile_keys
    dict_rep_keys['all'] = rep_keys
    # only_end
    rep_keys = [k for k in profile_keys if 'end' in k ]
    dict_rep_keys['only_end'] = rep_keys
    # except_end
    rep_keys = [k for k in profile_keys if 'end' not in k ]
    dict_rep_keys['except_end'] = rep_keys
    # custom
    to_remove = ['end alcohol', 'end sugar', 'end acid', 'end pH', 'end strong']
    rep_keys = [k for k in profile_keys if 'end' in k ]
    for k in to_remove:
        if k in rep_keys:
            rep_keys.remove(k)
    dict_rep_keys['custom'] = rep_keys
    # custom restricted
    to_remove = ['end alcohol', 'end sugar', 'end acid', 'end pH', 'end strong', 'end spicy', 'end oaky']
    rep_keys = [k for k in profile_keys if 'end' in k ]
    for k in to_remove:
        if k in rep_keys:
            rep_keys.remove(k)
    dict_rep_keys['restricted'] = rep_keys
    dict_rep_keys['affective'] = ['end booze', 'end sweet', 'end sour', 'end fizzy', 'end complex', 'end bitter', 'end spicy', 'end colorful']
    return dict_rep_keys