MoodCamera / summarizer.py
Anustup's picture
Update summarizer.py
5460f4d verified
import os
import base64
from langchain.docstore.document import Document
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms.openai import OpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import UnstructuredURLLoader
import nltk
import openai
nltk.download('punkt')
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
SCRAP_API_KEY = os.getenv("SCRAP_API_KEY")
def create_brand_html(brand_link):
urls = [brand_link]
loader = UnstructuredURLLoader(urls=urls)
data = loader.load()
chunk_size = 3000
chunk_overlap = 200
text_splitter = CharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
length_function=len,
)
texts = text_splitter.split_text(data[0].page_content)
docs = [Document(page_content=t) for t in texts[:]]
return docs
def create_langchain_openai_query(docs):
openai.api_key = OPENAI_API_KEY
llm = OpenAI(temperature=0, openai_api_key=openai.api_key)
map_reduce_chain = load_summarize_chain(llm, chain_type="map_reduce")
output = map_reduce_chain.run(docs)
return output
def create_screenshot_from_scrap_fly(link_to_fetch):
import requests
import random
try:
params = {
'key': SCRAP_API_KEY,
'url': link_to_fetch,
'auto_scroll': True,
'capture': 'fullpage',
'options': 'block_banners'
}
response = requests.get('https://api.scrapfly.io/screenshot', params=params)
location = f"brand_ss_{random.randint(1, 100000000)}.png"
with open(location, 'wb') as file:
file.write(response.content)
return {"location": location, "success": True}
except Exception as e:
return {"success": False, "error": e}
def check_and_compress_image(image_path, output_path, target_size_mb=4, max_size_mb=5):
from PIL import Image
image_size = os.path.getsize(image_path) / (1024 * 1024) # Convert bytes to MB
print(f"Original image size: {image_size:.2f} MB")
if image_size > max_size_mb:
img = Image.open(image_path)
quality = 95
while image_size > target_size_mb and quality > 10:
img.save(output_path, optimize=True, quality=quality)
image_size = os.path.getsize(output_path) / (1024 * 1024)
quality -= 5
return {"success": True}
else:
return {"success": False}