import subprocess import sys import os import requests from PIL import Image import pytesseract from io import BytesIO import pandas as pd import json from groq import Groq from twilio.rest import Client import logging from datetime import datetime from selenium import webdriver from selenium.webdriver.common.by import By import time # Configure logging logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler('logs/sms_debug.log'), logging.StreamHandler() ] ) logger = logging.getLogger(__name__) def setup_directories(): """Create necessary directories""" directories = ['logs', 'data', 'data/images', 'data/texts', 'data/reports'] for directory in directories: os.makedirs(directory, exist_ok=True) logger.info("Directory structure created") class SMSSender: def __init__(self): """Initialize Twilio client with credentials""" self.account_sid = "AC68e68b700bfe8ede9080e426042e6ccf" self.auth_token = "27814cd39d313e35713c81e7b36da11f" self.from_number = "+17322534518" self.client = Client(self.account_sid, self.auth_token) def send_sms(self, to_number, message): """Send SMS using Twilio""" try: logger.info(f"Attempting to send SMS to: {to_number}") if not to_number.startswith('+'): to_number = f"+91{to_number}" message = self.client.messages.create( body=message, from_=self.from_number, to=to_number ) logger.info(f"SMS sent successfully! Message SID: {message.sid}") return True except Exception as e: logger.error(f"Failed to send SMS: {str(e)}", exc_info=True) return False class ScamDetector: def __init__(self, groq_api_key, sms_sender): self.groq_client = Groq(api_key=groq_api_key) self.sms_sender = sms_sender self.base_path = os.path.join(os.getcwd(), 'data') setup_directories() def process_text_with_groq(self, text): try: prompt = f""" Format the following extracted text from an SMS image. Keep the original content intact but improve the formatting and remove any OCR artifacts: {text} """ completion = self.groq_client.chat.completions.create( model="llama3-8b-8192", messages=[{"role": "user", "content": prompt}], temperature=0.3, max_tokens=1024, top_p=1, stream=False, stop=None ) return completion.choices[0].message.content.strip() except Exception as e: logger.error(f"Error in Groq processing: {str(e)}") return text def download_and_extract_text(self, url, save_image=True): try: response = requests.get(url, timeout=10) img = Image.open(BytesIO(response.content)) # Save image if requested if save_image: img_filename = f"image_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png" img_path = os.path.join(self.base_path, 'images', img_filename) img.save(img_path) logger.info(f"Image saved: {img_path}") text = pytesseract.image_to_string(img) text = text.strip() if text: return self.process_text_with_groq(text) except Exception as e: logger.error(f"Error processing image from {url}: {str(e)}") return None def scrape_images(self): # ... (rest of the scraping code remains the same) def process_and_save(self, image_urls): """Process images and save results locally""" logger.info("Extracting text from images...") image_texts = [] timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") for i, url in enumerate(image_urls): logger.info(f"Processing image {i+1}/{len(image_urls)} from URL: {url}") text = self.download_and_extract_text(url) if text: image_texts.append({ 'url': url, 'text': text }) # Save files in local directories url_path = os.path.join(self.base_path, 'texts', f'scam_urls_{timestamp}.txt') text_path = os.path.join(self.base_path, 'texts', f'scam_texts_{timestamp}.txt') csv_path = os.path.join(self.base_path, 'reports', f'scam_report_{timestamp}.csv') # Save URLs with open(url_path, 'w') as f: for url in image_urls: f.write(url + '\n') # Save extracted texts with open(text_path, 'w', encoding='utf-8') as f: for item in image_texts: f.write(f"URL: {item['url']}\n") f.write(f"Text:\n{item['text']}\n") f.write("-" * 80 + "\n") # Save CSV report df = pd.DataFrame(image_texts) df.to_csv(csv_path, index=False) # Send SMS report message = f""" Scam Detector Run Report Time: {timestamp} Total URLs found: {len(image_urls)} Total texts extracted: {len(image_texts)} Files saved in local directories """ self.sms_sender.send_sms( to_number="8140030507", message=message ) return url_path, text_path, csv_path def main(): try: logger.info("Starting the scam detection process...") # Create timestamp for this run run_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # Setup run-specific logging log_path = os.path.join('logs', f'scam_run_{run_timestamp}.log') run_log_handler = logging.FileHandler(log_path) run_log_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) logger.addHandler(run_log_handler) logger.info(f"Starting new detection run at {run_timestamp}") GROQ_API_KEY = "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" sms_sender = SMSSender() detector = ScamDetector(groq_api_key=GROQ_API_KEY, sms_sender=sms_sender) logger.info("Starting image scraping...") image_urls = detector.scrape_images() logger.info(f"Found {len(image_urls)} unique images") url_path, text_path, csv_path = detector.process_and_save(image_urls) logger.info(f"Results saved locally and SMS sent!") logger.info("Detection run completed") print("Detection run completed successfully.") except Exception as e: logger.error(f"An error occurred: {str(e)}", exc_info=True) import traceback traceback.print_exc() if __name__ == "__main__": main()