Spaces:
Sleeping
Sleeping
import os | |
import time | |
from flask import Flask, render_template, jsonify, request | |
from src.helper import download_hugging_face_embeddings | |
from langchain.llms import Replicate | |
from dotenv import load_dotenv | |
from PyPDF2 import PdfReader | |
from langchain.schema import Document | |
from langchain.text_splitter import CharacterTextSplitter | |
# Initialize Flask app | |
app = Flask(__name__) | |
# Load environment variables | |
load_dotenv() | |
# Optional PDF processing functions | |
# def load_pdf(file_path): | |
# all_text = "" | |
# with open(file_path, 'rb') as file: | |
# reader = PdfReader(file) | |
# for page in reader.pages: | |
# all_text += page.extract_text() + "\n" | |
# return all_text if all_text else None | |
# def text_split(text): | |
# text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) | |
# document = Document(page_content=text) | |
# return text_splitter.split_documents([document]) | |
# Load and process data | |
# pdf_file_path = "data/Okelloetal.2008TourismanalysisManka.pdf" | |
# extracted_data = load_pdf(pdf_file_path) | |
# if extracted_data is None: | |
# raise ValueError("The extracted data is None. Please check the load_pdf function.") | |
# print(f"Extracted Data: {extracted_data}") | |
# Split the extracted text into chunks | |
# text_chunks = text_split(extracted_data) | |
# if not text_chunks: | |
# raise ValueError("The text_chunks is None or empty. Please check the text_split function.") | |
# print(f"Text Chunks: {text_chunks}") | |
embeddings = download_hugging_face_embeddings() | |
if embeddings is None: | |
raise ValueError("The embeddings is None. Please check the download_hugging_face_embeddings function.") | |
print(f"Embeddings: {embeddings}") | |
os.environ["REPLICATE_API_TOKEN"] = "r8_3eWT6qNBwq8r7zNknWKxsyNyOQ6WMGS2WWRay" | |
# Initialize the Replicate model | |
llm = Replicate( | |
model="a16z-infra/llama7b-v2-chat:4f0a4744c7295c024a1de15e1a63c880d3da035fa1f49bfd344fe076074c8eea", | |
config={ | |
'max_new_tokens': 100, # Maximum number of tokens to generate in response | |
'temperature': 0.7, # Optimal temperature for balanced randomness and coherence | |
'top_k': 50 # Optimal top-k value for considering the top 50 predictions | |
} | |
) | |
# Flask routes | |
def index(): | |
return render_template('chat.html') | |
def chat(): | |
try: | |
msg = request.form["msg"] | |
input_text = msg | |
print(f"Received message: {input_text}") | |
# Display spinner | |
result = {"generated_text": "Thinking..."} | |
# Simulate processing delay | |
time.sleep(1) | |
# Retrieve response from the model | |
result = llm.generate([input_text]) | |
print(f"LLMResult: {result}") | |
# Access the generated text from the result object | |
if result.generations and result.generations[0]: | |
generated_text = result.generations[0][0].text | |
else: | |
generated_text = "No response generated." | |
print(f"Response: {generated_text}") | |
return str(generated_text) | |
except Exception as e: | |
print(f"Error: {e}") | |
return jsonify({"error": str(e)}), 500 | |
if __name__ == '__main__': | |
app.run(host="0.0.0.0", port=8080, debug=True) |