|
import streamlit as st |
|
import fitz |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
|
|
|
def read_pdf(file_path): |
|
text = "" |
|
with fitz.open(file_path) as doc: |
|
for page in doc: |
|
text += page.get_text() |
|
return text |
|
|
|
|
|
document_text = read_pdf("jeff_wo.pdf") |
|
|
|
|
|
st.title("LLaMA 2-based Q&A System") |
|
st.write("### Enter your query below:") |
|
query = st.text_input("Query") |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf") |
|
model = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-7b-chat-hf") |
|
|
|
|
|
def get_answer(context, query): |
|
input_text = f"Context: {context}\nQ: {query}\nA:" |
|
input_ids = tokenizer.encode(input_text, return_tensors="pt") |
|
|
|
output = model.generate(input_ids, max_length=512, num_return_sequences=1) |
|
answer = tokenizer.decode(output[0], skip_special_tokens=True) |
|
return answer |
|
|
|
|
|
if st.button("Get Answer"): |
|
with st.spinner("Finding the answer..."): |
|
answer = get_answer(document_text, query) |
|
st.write("### Answer:") |
|
st.write(answer) |
|
|
|
|
|
|