File size: 14,898 Bytes
fe3bafe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 |
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "9e7cae4a",
"metadata": {},
"outputs": [],
"source": [
"from lxml import html\n",
"import requests\n",
"import os\n",
"import pandas as pd\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c88b9ce9",
"metadata": {},
"outputs": [],
"source": [
"client_id = os.environ['LV_CLIENT_ID']\n",
"client_secret = os.environ['LV_CLIENT_SECRET']\n",
"headers = {\n",
" 'Content-Type':'application/json',\n",
" 'authority':'api.louisvuitton.com',\n",
" 'accept':'application/json, text/plain, */*',\n",
" 'accept-language':'en-US,en;q=0.6',\n",
" 'client_id':client_id,\n",
" 'client_secret':client_secret,\n",
" 'cookie':'ak_cc=US; OPTOUTMULTI=0:0%7Cc1:0%7Cc2:1%7Cc4:1%7Cc3:1; ATGID=anonymous; SGID=sb.springboot41-prd; SGID=.springboot41-prd; prevURL=; currentURL=https://www.louisvuitton.com/ajax/productsearch; storeLangCommerceHeader=eng-e1; geolocUserZone=eng-us; AKA_A2=A; _abck=94026E30A1FABEBFA844CBD6388965B8~0~YAAQnr8mF5/FGIeIAQAAjjKPkgpw8D5r3zC2HFECV0EQXRXPCoAQ+gYSqR/Sut2w/03toFhfVT9Yda45yFwqUGKStpIxbZAwPw0ooQplfv9eymgzFxyoYjEcqrD6rV/4OEgdCIov1wE0On3Z71z1v9UoZOisQgAlBEKsV0dYi02t6vutjUwi6f5T6N+h6SWX1l62T/QGwvxI13WHrWAOIsNJ1VJd/N3FiC/cKxVKskc3YPJf8tFZ25jOs2cUin5GUXWA7HuLb7dpffVxE0wp5vcOes47KXi9be6zRpbtfF+aHEjVkgPsOjznAFhN/X6FCHBTuJ5UvY2vHlWEIR8kW/pEW4zHm5bfQyaKKiBWQvHTXkeKRyhCnUSoRx9wZPB2dIcdY2igDmG06NT+NWYDPJtLoIU1I14uXhn+p8/w~-1~-1~-1; bm_sz=A8530E728A25BB2F984ED31AD5DF00A6~YAAQnr8mF6LFGIeIAQAAjjKPkhQ72plnxNz48wepUXjRYRETUBQ1oWkIiS8E0wS/+9NHmJzh9bagubSRjmSbTkSEEzyIbBsWTowVRBJBCiW6xO9lhJT/vmSLZtosV4g0eTOhRFoFwRrorwWUjGarPRTLozlk+KpZppYbt+EbdcyCdZNb95EUr6Za65FUi8FRSH02djUkhL5XlC3aebrVqtHfnG7uCJsWYBo+fnDJU1+mxQ8e1J+iTdq4ZdXsYuUuXZz3A0OJMy1JP5M/kKV4JKmIO89y3rgqZZB+xxNLbY+Su4yH7c/q2+s=~3556656~3686978; lv-dispatch=eng-us; consent_ecom_us=functional:1|analytics:0|customization:0|advertising:0; qb_dnt=2; ak_bmsc=589EDDF01ED59E65EA72F7393A895F31~000000000000000000000000000000~YAAQnr8mFxbGGIeIAQAATzaPkhR5K7vu8hiBPm/G/uXQB2vcY/Q4eGJWL+r7GNyViebDR/5XNvUSus0l/5Uhpgq+sHzuQR7L7aYzbFREYRNqvAZ0ngto1AvbYgwL8xA7OJM+D03m8i/BMKZHk++/aC3UFmk/RYNOKIv8p4HjlGM22auYotHx2mlWq8DTZPj1z77noO3WrzpYdEirg6xSV8havW9BHABzNmP3Cfnt3xxVq3y3bMNDcpiXy2+eAJ7eX5UNVa/Rj+P75LGxaPAv4AsXiphTgX6rMKt5T6OAvLMX6vdLLCpZYvDxvtctIZ1o7p1kgBdympPZFvVTPcmDtKrzh9sABV0v2Lls8NzLbbdf2k4B5uyUfy0fQL60zUUZgrlSOoFzgJgQviOHvoVQbqFdXCFyiH+fBjKltnlQXKJ9f0Y5JOjnBIkUHCJ9fUHINiwqt9h2cw5daORuAKdfQpsi3Z3cSVM7cIdb/Dfu0f2vu+K1GOxNv1+NH0KlwbAxMA==; ATG_SESSION_ID=GCUqx-HVSh3KVpIaIX+L30So.front41-prd; _dynSessConf=-86995900442046424; JSESSIONID=GCUqx-HVSh3KVpIaIX+L30So.front41-prd; PIM-SESSION-ID=HBqQRDXDzoWH4e2V; bm_sv=CF0A04BDB7486115978F40C364AB5E38~YAAQnr8mF1opHYeIAQAAT1G5khS7D+LGov3Y87+pRE+B0F86Y3tJjyDrwszLeFA06ZC1s/so8vDcDtmB0VrCDU1N+jlXvFNpGfEkhJTiyQrFAkZq2i57xih3Y4Oe7kDnWY6TqYj05c7rqHmBOUw8+XtLNLJAzSPx+0cKcLqujkgpHiI1xPuO1N27mPXK0SMc2DLsF9MJZa0EqlH8YtoHBgDTz/q2R9Q5g28FJHVuw3SqLHdFDaHO2dML6P/eVsRDeYR3XTd8OQ==~1; lv-dispatch-url=https://us.louisvuitton.com/eng-us/women/handbags/all-handbags/_/N-tfr7qdp; utag_main=_sn:6$_se:25$_ss:0$_st:1686090605436$dc_visit:2$v_id:01884a527895001827510ebf5e3504075002b06d00a61$ses_id:1686086038612%3Bexp-session$_pn:10%3Bexp-session',\n",
" 'origin':'https://us.louisvuitton.com',\n",
" 'referer':'https://us.louisvuitton.com/',\n",
" 'sec-ch-ua':'\"Brave\";v=\"113\", \"Chromium\";v=\"113\", \"Not-A.Brand\";v=\"24\"',\n",
" 'sec-ch-ua-mobile':'?0',\n",
" 'sec-ch-ua-platform':'\"macOS\"',\n",
" 'sec-fetch-dest':'empty',\n",
" 'sec-fetch-mode':'cors',\n",
" 'sec-fetch-site':'same-site',\n",
" 'sec-gpc':'1',\n",
" 'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0',\n",
"}\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "78440fbc",
"metadata": {},
"outputs": [],
"source": [
"url = 'https://us.louisvuitton.com/eng-us/homepage'\n",
"response = requests.get(url, headers=headers)\n",
"print(len(response.text))\n",
"tree = html.fromstring(response.text)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f91b5ee1",
"metadata": {},
"outputs": [],
"source": [
"sitemap_paths = tree.xpath('//*[@id=\"header\"]//a/@href')\n",
"\n",
"categories = {}\n",
"for p in sitemap_paths:\n",
" parts = p.split(\"/\")\n",
" if parts[-1].startswith(\"N-\") :\n",
" categories[parts[-3]]= {\"code\":parts[-1][2:]}\n",
"len(categories)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "429ba6c0",
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"for cat_key in categories.keys():\n",
" print(cat_key)\n",
" code = categories[cat_key][\"code\"]\n",
"\n",
" url = f'https://api.louisvuitton.com/eco-eu/search-merch-eapi/v1/eng-us/plp/products/{code}-ay1ygzo25?page='\n",
" print(url)\n",
" nbPages = 100\n",
" page = 0\n",
" all_hits = []\n",
"\n",
" while page < nbPages:\n",
" response = requests.get(url + str(page), headers=headers)\n",
" res = response.json()\n",
" if \"nbPages\" in res:\n",
" nbPages = res[\"nbPages\"]\n",
" all_hits.extend(res[\"hits\"])\n",
" print(res[\"page\"], len(all_hits))\n",
" page = page + 1\n",
"\n",
"\n",
" recs = []\n",
" misses = []\n",
" for hit in all_hits:\n",
" try:\n",
" rec = {}\n",
" for key in [\"productId\", \"name\", \"url\", \"disambiguatingDescription\"]:\n",
" rec[key] = hit[key]\n",
" rec[\"image\"] = hit[\"image\"][0][\"contentUrl\"]\n",
" recs.append(rec)\n",
" except:\n",
" misses.append(rec)\n",
" print(hit)\n",
" categories[cat_key][\"urls\"] = recs\n",
" categories[cat_key][\"misses\"] = misses\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1bd2372a",
"metadata": {},
"outputs": [],
"source": [
"flatten = []\n",
"for cat in categories.keys():\n",
" info = categories[cat]\n",
" for rec in categories[cat][\"urls\"]:\n",
" doc = {}\n",
" doc[\"productId\"] = rec[\"productId\"]\n",
" doc[\"category\"] = cat\n",
" doc[\"category_code\"] = info[\"code\"]\n",
" doc[\"name\"] = rec[\"name\"]\n",
" doc[\"url\"] = rec[\"url\"]\n",
" doc[\"image\"] = rec[\"image\"]\n",
" doc[\"disambiguatingDescription\"] = rec[\"disambiguatingDescription\"]\n",
" flatten.append(doc)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9df3b289",
"metadata": {},
"outputs": [],
"source": [
"\n",
"dat = pd.DataFrame(flatten)\n",
"size = str(400)\n",
"dat['image_fix'] = dat['image'].str.replace(\"{IMG_HEIGHT}\", size).str.replace(\"{IMG_WIDTH}\", size).str.replace(\" \", \"%20\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e19f1375",
"metadata": {},
"outputs": [],
"source": [
"dat"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f08df1f2",
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"import os\n",
"for rec in dat.itertuples():\n",
" \n",
" print(rec.image_fix)\n",
" os.system(f\"\"\"\n",
" curl '{rec.image_fix}'\\\n",
" -H 'authority: us.louisvuitton.com' \\\n",
" -H 'accept: image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8' \\\n",
" -H 'accept-language: en-US,en;q=0.9' \\\n",
" -H 'cache-control: no-cache' \\\n",
" -H 'cookie: ak_cc=US; OPTOUTMULTI=0:0%7Cc1:0%7Cc2:1%7Cc4:1%7Cc3:1; ATGID=anonymous; SGID=sb.springboot41-prd; prevURL=; currentURL=https://www.louisvuitton.com/ajax/productsearch; storeLangCommerceHeader=eng-e1; geolocUserZone=eng-us; AKA_A2=A; _abck=94026E30A1FABEBFA844CBD6388965B8~0~YAAQnr8mF5/FGIeIAQAAjjKPkgpw8D5r3zC2HFECV0EQXRXPCoAQ+gYSqR/Sut2w/03toFhfVT9Yda45yFwqUGKStpIxbZAwPw0ooQplfv9eymgzFxyoYjEcqrD6rV/4OEgdCIov1wE0On3Z71z1v9UoZOisQgAlBEKsV0dYi02t6vutjUwi6f5T6N+h6SWX1l62T/QGwvxI13WHrWAOIsNJ1VJd/N3FiC/cKxVKskc3YPJf8tFZ25jOs2cUin5GUXWA7HuLb7dpffVxE0wp5vcOes47KXi9be6zRpbtfF+aHEjVkgPsOjznAFhN/X6FCHBTuJ5UvY2vHlWEIR8kW/pEW4zHm5bfQyaKKiBWQvHTXkeKRyhCnUSoRx9wZPB2dIcdY2igDmG06NT+NWYDPJtLoIU1I14uXhn+p8/w~-1~-1~-1; bm_sz=A8530E728A25BB2F984ED31AD5DF00A6~YAAQnr8mF6LFGIeIAQAAjjKPkhQ72plnxNz48wepUXjRYRETUBQ1oWkIiS8E0wS/+9NHmJzh9bagubSRjmSbTkSEEzyIbBsWTowVRBJBCiW6xO9lhJT/vmSLZtosV4g0eTOhRFoFwRrorwWUjGarPRTLozlk+KpZppYbt+EbdcyCdZNb95EUr6Za65FUi8FRSH02djUkhL5XlC3aebrVqtHfnG7uCJsWYBo+fnDJU1+mxQ8e1J+iTdq4ZdXsYuUuXZz3A0OJMy1JP5M/kKV4JKmIO89y3rgqZZB+xxNLbY+Su4yH7c/q2+s=~3556656~3686978; lv-dispatch=eng-us; consent_ecom_us=functional:1|analytics:0|customization:0|advertising:0; qb_dnt=2; ak_bmsc=589EDDF01ED59E65EA72F7393A895F31~000000000000000000000000000000~YAAQnr8mFxbGGIeIAQAATzaPkhR5K7vu8hiBPm/G/uXQB2vcY/Q4eGJWL+r7GNyViebDR/5XNvUSus0l/5Uhpgq+sHzuQR7L7aYzbFREYRNqvAZ0ngto1AvbYgwL8xA7OJM+D03m8i/BMKZHk++/aC3UFmk/RYNOKIv8p4HjlGM22auYotHx2mlWq8DTZPj1z77noO3WrzpYdEirg6xSV8havW9BHABzNmP3Cfnt3xxVq3y3bMNDcpiXy2+eAJ7eX5UNVa/Rj+P75LGxaPAv4AsXiphTgX6rMKt5T6OAvLMX6vdLLCpZYvDxvtctIZ1o7p1kgBdympPZFvVTPcmDtKrzh9sABV0v2Lls8NzLbbdf2k4B5uyUfy0fQL60zUUZgrlSOoFzgJgQviOHvoVQbqFdXCFyiH+fBjKltnlQXKJ9f0Y5JOjnBIkUHCJ9fUHINiwqt9h2cw5daORuAKdfQpsi3Z3cSVM7cIdb/Dfu0f2vu+K1GOxNv1+NH0KlwbAxMA==; PIM-SESSION-ID=HBqQRDXDzoWH4e2V; lv-dispatch-url=https://us.louisvuitton.com/eng-us/women/handbags/all-handbags/_/N-tfr7qdp; utag_main=_sn:6$_se:28$_ss:0$_st:1686090718084$dc_visit:2$v_id:01884a527895001827510ebf5e3504075002b06d00a61$ses_id:1686086038612%3Bexp-session$_pn:11%3Bexp-session; anonymous_session=true; ATG_SESSION_ID=B-8233ZEyNVjFobJCNa+2INd.front41-prd; _dynSessConf=2255588875954406228; JSESSIONID=B-8233ZEyNVjFobJCNa+2INd.front41-prd; bm_sv=CF0A04BDB7486115978F40C364AB5E38~YAAQnr8mF1xwHYeIAQAAlSm7khS9xE3iX2eAH+anrXfnlQ7v8oUsEA0z/MTkPXZnzBAvGACc64Rw7A1Y5WpGYwhqMubbtv5eueVsxuxlhE/aJNZtgpkk/epZage/P7W27HdoknmpCXGdYaFsYNTqrcCrNXOS/DUkUdvE6OjHGDg6c+05MfuLiHR+zMzGM/mZzxWzV3ruLRv6toIGPskD/LkRrOU0j8B7alLLaAhQKTvZVFPlcgbho2BHD8rHcNL0E7h2pvdGvA==~1' \\\n",
" -H 'pragma: no-cache' \\\n",
" -H 'referer: https://us.louisvuitton.com/eng-us/products/twist-belt-chain-wallet-epi-nvprod1740047v/M68560' \\\n",
" -H 'sec-ch-ua: \"Not.A/Brand\";v=\"8\", \"Chromium\";v=\"114\", \"Brave\";v=\"114\"' \\\n",
" -H 'sec-ch-ua-mobile: ?0' \\\n",
" -H 'sec-ch-ua-platform: \"macOS\"' \\\n",
" -H 'sec-fetch-dest: image' \\\n",
" -H 'sec-fetch-mode: no-cors' \\\n",
" -H 'sec-fetch-site: same-origin' \\\n",
" -H 'sec-gpc: 1' \\\n",
" -H 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36' \\\n",
" --compressed --output 'lvphotos/{rec.productId}.png'\n",
" \"\"\")\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "a8ab57be",
"metadata": {
"scrolled": false
},
"outputs": [
{
"ename": "UnidentifiedImageError",
"evalue": "cannot identify image file '/Users/jdonaldson/Projects/hushh/lvphotos/nvprod4280065v.png'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mUnidentifiedImageError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[16], line 10\u001b[0m\n\u001b[1;32m 8\u001b[0m encodings \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m file \u001b[38;5;129;01min\u001b[39;00m files:\n\u001b[0;32m---> 10\u001b[0m image \u001b[38;5;241m=\u001b[39m \u001b[43mImage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43md\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mfile\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/opt/homebrew/anaconda3/envs/lv/lib/python3.10/site-packages/PIL/Image.py:3283\u001b[0m, in \u001b[0;36mopen\u001b[0;34m(fp, mode, formats)\u001b[0m\n\u001b[1;32m 3281\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(message)\n\u001b[1;32m 3282\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcannot identify image file \u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m (filename \u001b[38;5;28;01mif\u001b[39;00m filename \u001b[38;5;28;01melse\u001b[39;00m fp)\n\u001b[0;32m-> 3283\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m UnidentifiedImageError(msg)\n",
"\u001b[0;31mUnidentifiedImageError\u001b[0m: cannot identify image file '/Users/jdonaldson/Projects/hushh/lvphotos/nvprod4280065v.png'"
]
}
],
"source": [
"import os\n",
"import glob\n",
"import sys\n",
"from PIL import Image\n",
"d = '/Users/jdonaldson/Projects/hushh/lvphotos'\n",
"os.chdir(d)\n",
"files = glob.glob(f'*.png')\n",
"encodings = []\n",
"for file in files:\n",
" image = Image.open(f\"{d}/{file}\")\n"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "1de4037f",
"metadata": {},
"outputs": [],
"source": [
"!mogrify -format jpg *.png"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "191cd02a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/Users/jdonaldson/Projects/hushh/lvphotos\r\n"
]
}
],
"source": [
"!pwd"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dac99f1c",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
|