File size: 5,628 Bytes
189cd2f
 
 
 
02223fb
 
 
189cd2f
 
02223fb
189cd2f
02223fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af912a1
02223fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ea28c8
02223fb
 
 
 
 
 
 
 
 
 
 
 
46de9df
02223fb
af912a1
02223fb
 
 
 
 
 
 
 
af912a1
02223fb
af912a1
02223fb
 
 
46de9df
af912a1
46de9df
 
 
02223fb
46de9df
02223fb
46de9df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd2cc59
02223fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ea28c8
02223fb
 
 
 
 
 
 
 
 
 
 
 
24e37bf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import eventlet
eventlet.monkey_patch()


from dotenv import load_dotenv
from flask import Flask, request, render_template
from flask_cors import CORS
from flask_socketio import SocketIO, emit

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain.retrievers.document_compressors import EmbeddingsFilter
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers import EnsembleRetriever
from langchain_community.vectorstores import FAISS
from langchain_groq import ChatGroq
from langchain import hub
import pickle
import os



# Load environment variables
load_dotenv(".env")
USER_AGENT = os.getenv("USER_AGENT")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
SECRET_KEY = os.getenv("SECRET_KEY")
SESSION_ID_DEFAULT = "abc123"


# Set environment variables
os.environ['USER_AGENT'] = USER_AGENT
os.environ["GROQ_API_KEY"] = GROQ_API_KEY
os.environ["TOKENIZERS_PARALLELISM"] = 'true'

# Initialize Flask app and SocketIO with CORS
app = Flask(__name__)
CORS(app)
socketio = SocketIO(app, cors_allowed_origins="*")
app.config['SESSION_COOKIE_SECURE'] = True  # Use HTTPS
app.config['SESSION_COOKIE_HTTPONLY'] = True
app.config['SECRET_KEY'] = SECRET_KEY


embed_model = HuggingFaceEmbeddings(model_name="Alibaba-NLP/gte-multilingual-base", model_kwargs={"trust_remote_code":True})
llm = ChatGroq(
    model="llama-3.1-8b-instant",  
    temperature=0.0,
    max_tokens=1024, 
    max_retries=2
)

excel_vectorstore = FAISS.load_local(folder_path="./faiss_excel_doc_index", embeddings=embed_model, allow_dangerous_deserialization=True)
word_vectorstore = FAISS.load_local(folder_path="./faiss_recursive_split_word_doc_index", embeddings=embed_model, allow_dangerous_deserialization=True)
excel_vectorstore.merge_from(word_vectorstore)
combined_vectorstore = excel_vectorstore

with open('combined_recursive_keyword_retriever.pkl', 'rb') as f:
    combined_keyword_retriever = pickle.load(f)
    # combined_keyword_retriever.k = 1000

semantic_retriever = combined_vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 100})


# initialize the ensemble retriever
ensemble_retriever = EnsembleRetriever(
    retrievers=[combined_keyword_retriever, semantic_retriever], weights=[0.5, 0.5]
)


embeddings_filter = EmbeddingsFilter(embeddings=embed_model, similarity_threshold=0.4)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=embeddings_filter, base_retriever=semantic_retriever
)

template = """
You are an Arabic AI Assistant focused on providing clear, detailed responses in HTML format with appropriate direction for Arabic language (right-to-left).

    - Always answer truthfully. If the user query is irrelevant to the provided CONTEXT, respond by stating the reason.
    - For general questions like greetings, reply with formal Arabic greetings.
    - Generate responses in Arabic, and format any English words and numbers appropriately for clarity.

Response Formatting Guidelines:

    - All responses must be generated in HTML and wrapped inside a <div dir="rtl"> tag.
    - Utilize proper HTML tags for structuring the response:
        - Use <p> for paragraphs.
        - Apply <strong> for bold texts.
        - Organize content with ordered (<ol>) or unordered (<ul>) lists as needed.
        - Include line breaks (<br>) where appropriate for readability.
    - Wrap all English words/numbers or sentences in a <span dir="ltr"> tag to maintain left-to-right directionality.
    - Numbers with decimal values should be rounded off to two decimal places.
    - Follow right-to-left language rules (dir="rtl"), ensuring Arabic text is properly aligned.

Additional Instructions:

    - Provide detailed yet concise answers, covering all important aspects.
    - Ensure proper HTML formatting is applied to the entire response for clarity and structure.
    - Only return the AI-generated answer in HTML format.
    - Responding outside the provided CONTEXT may result in the termination of the interaction.

CONTEXT: {context}
Query: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
output_parser = StrOutputParser()

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": compression_retriever.with_config(run_name="Docs") | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | output_parser
)

# Function to handle WebSocket connection
@socketio.on('connect')
def handle_connect():
    emit('connection_response', {'message': 'Connected successfully.'}, room=request.sid)

@socketio.on('ping')
def handle_ping(data):
    emit('ping_response', {'message': 'Healthy Connection.'}, room=request.sid)

# Function to handle WebSocket disconnection
@socketio.on('disconnect')
def handle_disconnect():
    emit('connection_response', {'message': 'Disconnected successfully.'})

# Function to handle WebSocket messages
@socketio.on('message')
def handle_message(data):
    question = data.get('question')
    try:
        for chunk in rag_chain.stream(question):
            emit('response', chunk, room=request.sid)
            print(chunk)
    except Exception as e:
        emit('response', {"error": "An error occurred while processing your request."}, room=request.sid)


# Home route
@app.route("/")
def index_view():
    return render_template('chat.html') 

# Main function to run the app
if __name__ == '__main__':
    socketio.run(app, debug=True)