File size: 3,675 Bytes
2321c66
 
 
 
 
 
 
 
 
 
 
1f9aa71
 
d484afb
2334c6b
2321c66
 
0b76712
2321c66
0b76712
2321c66
0b76712
2321c66
0b76712
 
 
 
2321c66
 
 
 
 
 
0b76712
2321c66
0b76712
2321c66
 
 
0b76712
2321c66
0b76712
2321c66
 
0b76712
2321c66
 
 
 
0b76712
 
171aee0
0b76712
 
2321c66
0b76712
2321c66
 
0b76712
 
2321c66
0b76712
 
171aee0
2321c66
0b76712
2321c66
0b76712
 
 
 
 
 
 
2321c66
 
d9bdbe2
5353c1d
c8b10d8
4fb1273
0b76712
8fb982e
 
85d00ce
1f9aa71
2334c6b
1f9aa71
2334c6b
a2dfe55
2334c6b
 
d355ea6
 
 
 
 
 
 
 
 
 
 
2334c6b
171aee0
0b76712
 
 
 
171aee0
 
 
 
85d00ce
171aee0
2321c66
 
171aee0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import google.generativeai as genai
from langchain.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
from st_audiorec import st_audiorec
import whisper
from googletrans import Translator

genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))


def get_pdf_text(pdf_docs):
    text=""
    for pdf in pdf_docs:
        pdf_reader= PdfReader(pdf)
        for page in pdf_reader.pages:
            text+= page.extract_text()
    return  text



def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
    chunks = text_splitter.split_text(text)
    return chunks


def get_vector_store(text_chunks):
    embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
    vector_store.save_local("faiss_index")


def get_conversational_chain():

    prompt_template = """
    Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
    provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
    Context:\n {context}?\n
    Question: \n{question}\n
    Answer:
    """

    model = ChatGoogleGenerativeAI(model="gemini-pro",
                             temperature=0.1)

    prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
    chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)

    return chain



def user_input(user_question):
    embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
    
    new_db = FAISS.load_local("faiss_index", embeddings,allow_dangerous_deserialization= True)
    docs = new_db.similarity_search(user_question)

    chain = get_conversational_chain()

    
    response = chain(
        {"input_documents":docs, "question": user_question}
        , return_only_outputs=True)

    print(response)
    st.write("Reply: ", response["output_text"])




def main():
    st.set_page_config("Chat PDF")
    st.header("QnA with Multiple PDF files💁")

    # Audio recording
    wav_audio_data = st_audiorec()

    if wav_audio_data is not None:
        with open("query.wav", "wb") as f:
            f.write(wav_audio_data)

        model = whisper.load_model("large")
        result = model.transcribe("query.wav")
        detected_language = result["language"]
        transcribed_text = result["text"]

        translator = Translator()
        translation = translator.translate(transcribed_text, dest="en")
        user_question = translation.text

        st.write("Detected Language:", detected_language)
        st.write("Transcribed Text:", transcribed_text)
        st.write("Translated Question:", user_question)
        user_input(user_question)

    with st.sidebar:
        st.title("Menu:")
        pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
        if st.button("Submit & Process"):
            with st.spinner("Processing..."):
                raw_text = get_pdf_text(pdf_docs)
                text_chunks = get_text_chunks(raw_text)
                get_vector_store(text_chunks)
            st.success("Done")


if __name__ == "__main__":
    main()