import os import base64 from langchain.docstore.document import Document from langchain.text_splitter import CharacterTextSplitter from langchain.llms.openai import OpenAI from langchain.chains.summarize import load_summarize_chain from langchain.document_loaders import UnstructuredURLLoader import nltk import openai nltk.download('punkt') OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") SCRAP_API_KEY = os.getenv("SCRAP_API_KEY") def create_brand_html(brand_link): urls = [brand_link] loader = UnstructuredURLLoader(urls=urls) data = loader.load() chunk_size = 3000 chunk_overlap = 200 text_splitter = CharacterTextSplitter( chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len, ) texts = text_splitter.split_text(data[0].page_content) docs = [Document(page_content=t) for t in texts[:]] return docs def create_langchain_openai_query(docs): openai.api_key = OPENAI_API_KEY llm = OpenAI(temperature=0, openai_api_key=openai.api_key) map_reduce_chain = load_summarize_chain(llm, chain_type="map_reduce") output = map_reduce_chain.run(docs) return output def create_screenshot_from_scrap_fly(link_to_fetch): import requests import random try: params = { 'key': SCRAP_API_KEY, 'url': link_to_fetch, 'auto_scroll': True, 'capture': 'fullpage', 'options': 'block_banners' } response = requests.get('https://api.scrapfly.io/screenshot', params=params) location = f"brand_ss_{random.randint(1, 100000000)}.png" with open(location, 'wb') as file: file.write(response.content) return {"location": location, "success": True} except Exception as e: return {"success": False, "error": e} def check_and_compress_image(image_path, output_path, target_size_mb=4, max_size_mb=5): from PIL import Image image_size = os.path.getsize(image_path) / (1024 * 1024) # Convert bytes to MB print(f"Original image size: {image_size:.2f} MB") if image_size > max_size_mb: img = Image.open(image_path) quality = 95 while image_size > target_size_mb and quality > 10: img.save(output_path, optimize=True, quality=quality) image_size = os.path.getsize(output_path) / (1024 * 1024) quality -= 5 return {"success": True} else: return {"success": False}