File size: 1,428 Bytes
1664bb9 f3cacbe 1664bb9 f3cacbe a9a4fe5 f3cacbe 1664bb9 f3cacbe 1664bb9 f3cacbe 1664bb9 f3cacbe 1664bb9 f3cacbe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import streamlit as st
import fitz # PyMuPDF
from transformers import AutoTokenizer, AutoModelForCausalLM
# Function to read and extract text from a PDF document
def read_pdf(file_path):
text = ""
with fitz.open(file_path) as doc:
for page in doc:
text += page.get_text()
return text
# Load the document text
document_text = read_pdf("jeff_wo.pdf") # Adjust path to your PDF file
# Streamlit UI
st.title("LLaMA 2-based Q&A System")
st.write("### Enter your query below:")
query = st.text_input("Query")
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf")
model = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-7b-chat-hf")
# Function to get answers using the LLaMA 2 model
def get_answer(context, query):
input_text = f"Context: {context}\nQ: {query}\nA:"
input_ids = tokenizer.encode(input_text, return_tensors="pt")
# Generate an answer to the query
output = model.generate(input_ids, max_length=512, num_return_sequences=1)
answer = tokenizer.decode(output[0], skip_special_tokens=True)
return answer
# Button to generate answers
if st.button("Get Answer"):
with st.spinner("Finding the answer..."):
answer = get_answer(document_text, query)
st.write("### Answer:")
st.write(answer)
# Ensure to replace `path/to/your/document.pdf` with the actual path to the PDF in your repo
|