import streamlit as st import fitz # PyMuPDF from transformers import AutoTokenizer, AutoModelForCausalLM # Function to read and extract text from a PDF document def read_pdf(file_path): text = "" with fitz.open(file_path) as doc: for page in doc: text += page.get_text() return text # Load the document text document_text = read_pdf("jeff_wo.pdf") # Adjust path to your PDF file # Streamlit UI st.title("LLaMA 2-based Q&A System") st.write("### Enter your query below:") query = st.text_input("Query") # Load tokenizer and model tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf") model = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-7b-chat-hf") # Function to get answers using the LLaMA 2 model def get_answer(context, query): input_text = f"Context: {context}\nQ: {query}\nA:" input_ids = tokenizer.encode(input_text, return_tensors="pt") # Generate an answer to the query output = model.generate(input_ids, max_length=512, num_return_sequences=1) answer = tokenizer.decode(output[0], skip_special_tokens=True) return answer # Button to generate answers if st.button("Get Answer"): with st.spinner("Finding the answer..."): answer = get_answer(document_text, query) st.write("### Answer:") st.write(answer) # Ensure to replace `path/to/your/document.pdf` with the actual path to the PDF in your repo