{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "9e7cae4a", "metadata": {}, "outputs": [], "source": [ "from lxml import html\n", "import requests\n", "import os\n", "import pandas as pd\n" ] }, { "cell_type": "code", "execution_count": null, "id": "c88b9ce9", "metadata": {}, "outputs": [], "source": [ "client_id = os.environ['LV_CLIENT_ID']\n", "client_secret = os.environ['LV_CLIENT_SECRET']\n", "headers = {\n", " 'Content-Type':'application/json',\n", " 'authority':'api.louisvuitton.com',\n", " 'accept':'application/json, text/plain, */*',\n", " 'accept-language':'en-US,en;q=0.6',\n", " 'client_id':client_id,\n", " 'client_secret':client_secret,\n", " 'cookie':'ak_cc=US; OPTOUTMULTI=0:0%7Cc1:0%7Cc2:1%7Cc4:1%7Cc3:1; ATGID=anonymous; SGID=sb.springboot41-prd; SGID=.springboot41-prd; prevURL=; currentURL=https://www.louisvuitton.com/ajax/productsearch; storeLangCommerceHeader=eng-e1; geolocUserZone=eng-us; AKA_A2=A; _abck=94026E30A1FABEBFA844CBD6388965B8~0~YAAQnr8mF5/FGIeIAQAAjjKPkgpw8D5r3zC2HFECV0EQXRXPCoAQ+gYSqR/Sut2w/03toFhfVT9Yda45yFwqUGKStpIxbZAwPw0ooQplfv9eymgzFxyoYjEcqrD6rV/4OEgdCIov1wE0On3Z71z1v9UoZOisQgAlBEKsV0dYi02t6vutjUwi6f5T6N+h6SWX1l62T/QGwvxI13WHrWAOIsNJ1VJd/N3FiC/cKxVKskc3YPJf8tFZ25jOs2cUin5GUXWA7HuLb7dpffVxE0wp5vcOes47KXi9be6zRpbtfF+aHEjVkgPsOjznAFhN/X6FCHBTuJ5UvY2vHlWEIR8kW/pEW4zHm5bfQyaKKiBWQvHTXkeKRyhCnUSoRx9wZPB2dIcdY2igDmG06NT+NWYDPJtLoIU1I14uXhn+p8/w~-1~-1~-1; bm_sz=A8530E728A25BB2F984ED31AD5DF00A6~YAAQnr8mF6LFGIeIAQAAjjKPkhQ72plnxNz48wepUXjRYRETUBQ1oWkIiS8E0wS/+9NHmJzh9bagubSRjmSbTkSEEzyIbBsWTowVRBJBCiW6xO9lhJT/vmSLZtosV4g0eTOhRFoFwRrorwWUjGarPRTLozlk+KpZppYbt+EbdcyCdZNb95EUr6Za65FUi8FRSH02djUkhL5XlC3aebrVqtHfnG7uCJsWYBo+fnDJU1+mxQ8e1J+iTdq4ZdXsYuUuXZz3A0OJMy1JP5M/kKV4JKmIO89y3rgqZZB+xxNLbY+Su4yH7c/q2+s=~3556656~3686978; lv-dispatch=eng-us; consent_ecom_us=functional:1|analytics:0|customization:0|advertising:0; qb_dnt=2; ak_bmsc=589EDDF01ED59E65EA72F7393A895F31~000000000000000000000000000000~YAAQnr8mFxbGGIeIAQAATzaPkhR5K7vu8hiBPm/G/uXQB2vcY/Q4eGJWL+r7GNyViebDR/5XNvUSus0l/5Uhpgq+sHzuQR7L7aYzbFREYRNqvAZ0ngto1AvbYgwL8xA7OJM+D03m8i/BMKZHk++/aC3UFmk/RYNOKIv8p4HjlGM22auYotHx2mlWq8DTZPj1z77noO3WrzpYdEirg6xSV8havW9BHABzNmP3Cfnt3xxVq3y3bMNDcpiXy2+eAJ7eX5UNVa/Rj+P75LGxaPAv4AsXiphTgX6rMKt5T6OAvLMX6vdLLCpZYvDxvtctIZ1o7p1kgBdympPZFvVTPcmDtKrzh9sABV0v2Lls8NzLbbdf2k4B5uyUfy0fQL60zUUZgrlSOoFzgJgQviOHvoVQbqFdXCFyiH+fBjKltnlQXKJ9f0Y5JOjnBIkUHCJ9fUHINiwqt9h2cw5daORuAKdfQpsi3Z3cSVM7cIdb/Dfu0f2vu+K1GOxNv1+NH0KlwbAxMA==; ATG_SESSION_ID=GCUqx-HVSh3KVpIaIX+L30So.front41-prd; _dynSessConf=-86995900442046424; JSESSIONID=GCUqx-HVSh3KVpIaIX+L30So.front41-prd; PIM-SESSION-ID=HBqQRDXDzoWH4e2V; bm_sv=CF0A04BDB7486115978F40C364AB5E38~YAAQnr8mF1opHYeIAQAAT1G5khS7D+LGov3Y87+pRE+B0F86Y3tJjyDrwszLeFA06ZC1s/so8vDcDtmB0VrCDU1N+jlXvFNpGfEkhJTiyQrFAkZq2i57xih3Y4Oe7kDnWY6TqYj05c7rqHmBOUw8+XtLNLJAzSPx+0cKcLqujkgpHiI1xPuO1N27mPXK0SMc2DLsF9MJZa0EqlH8YtoHBgDTz/q2R9Q5g28FJHVuw3SqLHdFDaHO2dML6P/eVsRDeYR3XTd8OQ==~1; lv-dispatch-url=https://us.louisvuitton.com/eng-us/women/handbags/all-handbags/_/N-tfr7qdp; utag_main=_sn:6$_se:25$_ss:0$_st:1686090605436$dc_visit:2$v_id:01884a527895001827510ebf5e3504075002b06d00a61$ses_id:1686086038612%3Bexp-session$_pn:10%3Bexp-session',\n", " 'origin':'https://us.louisvuitton.com',\n", " 'referer':'https://us.louisvuitton.com/',\n", " 'sec-ch-ua':'\"Brave\";v=\"113\", \"Chromium\";v=\"113\", \"Not-A.Brand\";v=\"24\"',\n", " 'sec-ch-ua-mobile':'?0',\n", " 'sec-ch-ua-platform':'\"macOS\"',\n", " 'sec-fetch-dest':'empty',\n", " 'sec-fetch-mode':'cors',\n", " 'sec-fetch-site':'same-site',\n", " 'sec-gpc':'1',\n", " 'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0',\n", "}\n" ] }, { "cell_type": "code", "execution_count": null, "id": "78440fbc", "metadata": {}, "outputs": [], "source": [ "url = 'https://us.louisvuitton.com/eng-us/homepage'\n", "response = requests.get(url, headers=headers)\n", "print(len(response.text))\n", "tree = html.fromstring(response.text)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "f91b5ee1", "metadata": {}, "outputs": [], "source": [ "sitemap_paths = tree.xpath('//*[@id=\"header\"]//a/@href')\n", "\n", "categories = {}\n", "for p in sitemap_paths:\n", " parts = p.split(\"/\")\n", " if parts[-1].startswith(\"N-\") :\n", " categories[parts[-3]]= {\"code\":parts[-1][2:]}\n", "len(categories)" ] }, { "cell_type": "code", "execution_count": null, "id": "429ba6c0", "metadata": { "scrolled": false }, "outputs": [], "source": [ "for cat_key in categories.keys():\n", " print(cat_key)\n", " code = categories[cat_key][\"code\"]\n", "\n", " url = f'https://api.louisvuitton.com/eco-eu/search-merch-eapi/v1/eng-us/plp/products/{code}-ay1ygzo25?page='\n", " print(url)\n", " nbPages = 100\n", " page = 0\n", " all_hits = []\n", "\n", " while page < nbPages:\n", " response = requests.get(url + str(page), headers=headers)\n", " res = response.json()\n", " if \"nbPages\" in res:\n", " nbPages = res[\"nbPages\"]\n", " all_hits.extend(res[\"hits\"])\n", " print(res[\"page\"], len(all_hits))\n", " page = page + 1\n", "\n", "\n", " recs = []\n", " misses = []\n", " for hit in all_hits:\n", " try:\n", " rec = {}\n", " for key in [\"productId\", \"name\", \"url\", \"disambiguatingDescription\"]:\n", " rec[key] = hit[key]\n", " rec[\"image\"] = hit[\"image\"][0][\"contentUrl\"]\n", " recs.append(rec)\n", " except:\n", " misses.append(rec)\n", " print(hit)\n", " categories[cat_key][\"urls\"] = recs\n", " categories[cat_key][\"misses\"] = misses\n" ] }, { "cell_type": "code", "execution_count": null, "id": "1bd2372a", "metadata": {}, "outputs": [], "source": [ "flatten = []\n", "for cat in categories.keys():\n", " info = categories[cat]\n", " for rec in categories[cat][\"urls\"]:\n", " doc = {}\n", " doc[\"productId\"] = rec[\"productId\"]\n", " doc[\"category\"] = cat\n", " doc[\"category_code\"] = info[\"code\"]\n", " doc[\"name\"] = rec[\"name\"]\n", " doc[\"url\"] = rec[\"url\"]\n", " doc[\"image\"] = rec[\"image\"]\n", " doc[\"disambiguatingDescription\"] = rec[\"disambiguatingDescription\"]\n", " flatten.append(doc)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "9df3b289", "metadata": {}, "outputs": [], "source": [ "\n", "dat = pd.DataFrame(flatten)\n", "size = str(400)\n", "dat['image_fix'] = dat['image'].str.replace(\"{IMG_HEIGHT}\", size).str.replace(\"{IMG_WIDTH}\", size).str.replace(\" \", \"%20\")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "e19f1375", "metadata": {}, "outputs": [], "source": [ "dat" ] }, { "cell_type": "code", "execution_count": null, "id": "f08df1f2", "metadata": { "scrolled": false }, "outputs": [], "source": [ "import os\n", "for rec in dat.itertuples():\n", " \n", " print(rec.image_fix)\n", " os.system(f\"\"\"\n", " curl '{rec.image_fix}'\\\n", " -H 'authority: us.louisvuitton.com' \\\n", " -H 'accept: image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8' \\\n", " -H 'accept-language: en-US,en;q=0.9' \\\n", " -H 'cache-control: no-cache' \\\n", " -H 'cookie: ak_cc=US; OPTOUTMULTI=0:0%7Cc1:0%7Cc2:1%7Cc4:1%7Cc3:1; ATGID=anonymous; SGID=sb.springboot41-prd; prevURL=; currentURL=https://www.louisvuitton.com/ajax/productsearch; storeLangCommerceHeader=eng-e1; geolocUserZone=eng-us; AKA_A2=A; _abck=94026E30A1FABEBFA844CBD6388965B8~0~YAAQnr8mF5/FGIeIAQAAjjKPkgpw8D5r3zC2HFECV0EQXRXPCoAQ+gYSqR/Sut2w/03toFhfVT9Yda45yFwqUGKStpIxbZAwPw0ooQplfv9eymgzFxyoYjEcqrD6rV/4OEgdCIov1wE0On3Z71z1v9UoZOisQgAlBEKsV0dYi02t6vutjUwi6f5T6N+h6SWX1l62T/QGwvxI13WHrWAOIsNJ1VJd/N3FiC/cKxVKskc3YPJf8tFZ25jOs2cUin5GUXWA7HuLb7dpffVxE0wp5vcOes47KXi9be6zRpbtfF+aHEjVkgPsOjznAFhN/X6FCHBTuJ5UvY2vHlWEIR8kW/pEW4zHm5bfQyaKKiBWQvHTXkeKRyhCnUSoRx9wZPB2dIcdY2igDmG06NT+NWYDPJtLoIU1I14uXhn+p8/w~-1~-1~-1; bm_sz=A8530E728A25BB2F984ED31AD5DF00A6~YAAQnr8mF6LFGIeIAQAAjjKPkhQ72plnxNz48wepUXjRYRETUBQ1oWkIiS8E0wS/+9NHmJzh9bagubSRjmSbTkSEEzyIbBsWTowVRBJBCiW6xO9lhJT/vmSLZtosV4g0eTOhRFoFwRrorwWUjGarPRTLozlk+KpZppYbt+EbdcyCdZNb95EUr6Za65FUi8FRSH02djUkhL5XlC3aebrVqtHfnG7uCJsWYBo+fnDJU1+mxQ8e1J+iTdq4ZdXsYuUuXZz3A0OJMy1JP5M/kKV4JKmIO89y3rgqZZB+xxNLbY+Su4yH7c/q2+s=~3556656~3686978; lv-dispatch=eng-us; consent_ecom_us=functional:1|analytics:0|customization:0|advertising:0; qb_dnt=2; ak_bmsc=589EDDF01ED59E65EA72F7393A895F31~000000000000000000000000000000~YAAQnr8mFxbGGIeIAQAATzaPkhR5K7vu8hiBPm/G/uXQB2vcY/Q4eGJWL+r7GNyViebDR/5XNvUSus0l/5Uhpgq+sHzuQR7L7aYzbFREYRNqvAZ0ngto1AvbYgwL8xA7OJM+D03m8i/BMKZHk++/aC3UFmk/RYNOKIv8p4HjlGM22auYotHx2mlWq8DTZPj1z77noO3WrzpYdEirg6xSV8havW9BHABzNmP3Cfnt3xxVq3y3bMNDcpiXy2+eAJ7eX5UNVa/Rj+P75LGxaPAv4AsXiphTgX6rMKt5T6OAvLMX6vdLLCpZYvDxvtctIZ1o7p1kgBdympPZFvVTPcmDtKrzh9sABV0v2Lls8NzLbbdf2k4B5uyUfy0fQL60zUUZgrlSOoFzgJgQviOHvoVQbqFdXCFyiH+fBjKltnlQXKJ9f0Y5JOjnBIkUHCJ9fUHINiwqt9h2cw5daORuAKdfQpsi3Z3cSVM7cIdb/Dfu0f2vu+K1GOxNv1+NH0KlwbAxMA==; PIM-SESSION-ID=HBqQRDXDzoWH4e2V; lv-dispatch-url=https://us.louisvuitton.com/eng-us/women/handbags/all-handbags/_/N-tfr7qdp; utag_main=_sn:6$_se:28$_ss:0$_st:1686090718084$dc_visit:2$v_id:01884a527895001827510ebf5e3504075002b06d00a61$ses_id:1686086038612%3Bexp-session$_pn:11%3Bexp-session; anonymous_session=true; ATG_SESSION_ID=B-8233ZEyNVjFobJCNa+2INd.front41-prd; _dynSessConf=2255588875954406228; JSESSIONID=B-8233ZEyNVjFobJCNa+2INd.front41-prd; bm_sv=CF0A04BDB7486115978F40C364AB5E38~YAAQnr8mF1xwHYeIAQAAlSm7khS9xE3iX2eAH+anrXfnlQ7v8oUsEA0z/MTkPXZnzBAvGACc64Rw7A1Y5WpGYwhqMubbtv5eueVsxuxlhE/aJNZtgpkk/epZage/P7W27HdoknmpCXGdYaFsYNTqrcCrNXOS/DUkUdvE6OjHGDg6c+05MfuLiHR+zMzGM/mZzxWzV3ruLRv6toIGPskD/LkRrOU0j8B7alLLaAhQKTvZVFPlcgbho2BHD8rHcNL0E7h2pvdGvA==~1' \\\n", " -H 'pragma: no-cache' \\\n", " -H 'referer: https://us.louisvuitton.com/eng-us/products/twist-belt-chain-wallet-epi-nvprod1740047v/M68560' \\\n", " -H 'sec-ch-ua: \"Not.A/Brand\";v=\"8\", \"Chromium\";v=\"114\", \"Brave\";v=\"114\"' \\\n", " -H 'sec-ch-ua-mobile: ?0' \\\n", " -H 'sec-ch-ua-platform: \"macOS\"' \\\n", " -H 'sec-fetch-dest: image' \\\n", " -H 'sec-fetch-mode: no-cors' \\\n", " -H 'sec-fetch-site: same-origin' \\\n", " -H 'sec-gpc: 1' \\\n", " -H 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36' \\\n", " --compressed --output 'lvphotos/{rec.productId}.png'\n", " \"\"\")\n" ] }, { "cell_type": "code", "execution_count": 16, "id": "a8ab57be", "metadata": { "scrolled": false }, "outputs": [ { "ename": "UnidentifiedImageError", "evalue": "cannot identify image file '/Users/jdonaldson/Projects/hushh/lvphotos/nvprod4280065v.png'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mUnidentifiedImageError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[16], line 10\u001b[0m\n\u001b[1;32m 8\u001b[0m encodings \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m file \u001b[38;5;129;01min\u001b[39;00m files:\n\u001b[0;32m---> 10\u001b[0m image \u001b[38;5;241m=\u001b[39m \u001b[43mImage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43md\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mfile\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/opt/homebrew/anaconda3/envs/lv/lib/python3.10/site-packages/PIL/Image.py:3283\u001b[0m, in \u001b[0;36mopen\u001b[0;34m(fp, mode, formats)\u001b[0m\n\u001b[1;32m 3281\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(message)\n\u001b[1;32m 3282\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcannot identify image file \u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m (filename \u001b[38;5;28;01mif\u001b[39;00m filename \u001b[38;5;28;01melse\u001b[39;00m fp)\n\u001b[0;32m-> 3283\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m UnidentifiedImageError(msg)\n", "\u001b[0;31mUnidentifiedImageError\u001b[0m: cannot identify image file '/Users/jdonaldson/Projects/hushh/lvphotos/nvprod4280065v.png'" ] } ], "source": [ "import os\n", "import glob\n", "import sys\n", "from PIL import Image\n", "d = '/Users/jdonaldson/Projects/hushh/lvphotos'\n", "os.chdir(d)\n", "files = glob.glob(f'*.png')\n", "encodings = []\n", "for file in files:\n", " image = Image.open(f\"{d}/{file}\")\n" ] }, { "cell_type": "code", "execution_count": 24, "id": "1de4037f", "metadata": {}, "outputs": [], "source": [ "!mogrify -format jpg *.png" ] }, { "cell_type": "code", "execution_count": 22, "id": "191cd02a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/jdonaldson/Projects/hushh/lvphotos\r\n" ] } ], "source": [ "!pwd" ] }, { "cell_type": "code", "execution_count": null, "id": "dac99f1c", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.11" } }, "nbformat": 4, "nbformat_minor": 5 }