Spaces:
Paused
Paused
code update
Browse files- app.py +10 -6
- requirements.txt +2 -1
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
import os
|
2 |
import io
|
3 |
import streamlit as st
|
4 |
from PyPDF2 import PdfReader
|
@@ -8,6 +7,7 @@ from langchain.vectorstores import FAISS
|
|
8 |
from langchain.chains.question_answering import load_qa_chain
|
9 |
from langchain.llms import HuggingFacePipeline
|
10 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
|
|
11 |
|
12 |
# Global variables
|
13 |
knowledge_base = None
|
@@ -22,7 +22,10 @@ def load_pdf(pdf_file):
|
|
22 |
# 텍스트를 청크로 분할
|
23 |
def split_text(text):
|
24 |
text_splitter = CharacterTextSplitter(
|
25 |
-
separator="\n",
|
|
|
|
|
|
|
26 |
)
|
27 |
return text_splitter.split_text(text)
|
28 |
|
@@ -35,9 +38,11 @@ def create_knowledge_base(chunks):
|
|
35 |
def load_model():
|
36 |
model_name = "halyn/gemma2-2b-it-finetuned-paperqa"
|
37 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
38 |
-
|
39 |
-
|
|
|
40 |
|
|
|
41 |
# QA 체인 설정
|
42 |
def setup_qa_chain():
|
43 |
global qa_chain
|
@@ -46,7 +51,6 @@ def setup_qa_chain():
|
|
46 |
qa_chain = load_qa_chain(llm, chain_type="stuff")
|
47 |
|
48 |
|
49 |
-
|
50 |
# 메인 페이지 UI
|
51 |
def main_page():
|
52 |
st.title("Welcome to GemmaPaperQA")
|
@@ -90,7 +94,7 @@ def main_page():
|
|
90 |
st.session_state.page = "chat"
|
91 |
setup_qa_chain()
|
92 |
st.success("PDF successfully processed! You can now ask questions.")
|
93 |
-
|
94 |
except Exception as e:
|
95 |
st.error(f"Failed to process the PDF: {str(e)}")
|
96 |
|
|
|
|
|
1 |
import io
|
2 |
import streamlit as st
|
3 |
from PyPDF2 import PdfReader
|
|
|
7 |
from langchain.chains.question_answering import load_qa_chain
|
8 |
from langchain.llms import HuggingFacePipeline
|
9 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
10 |
+
from peft import PeftModel, PeftConfig
|
11 |
|
12 |
# Global variables
|
13 |
knowledge_base = None
|
|
|
22 |
# 텍스트를 청크로 분할
|
23 |
def split_text(text):
|
24 |
text_splitter = CharacterTextSplitter(
|
25 |
+
separator="\n",
|
26 |
+
chunk_size=1000,
|
27 |
+
chunk_overlap=200,
|
28 |
+
length_function=len
|
29 |
)
|
30 |
return text_splitter.split_text(text)
|
31 |
|
|
|
38 |
def load_model():
|
39 |
model_name = "halyn/gemma2-2b-it-finetuned-paperqa"
|
40 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
41 |
+
config = PeftConfig.from_pretrained(model_name)
|
42 |
+
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
|
43 |
+
model = PeftModel.from_pretrained(model, model_name)
|
44 |
|
45 |
+
return pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150, temperature=0.1)
|
46 |
# QA 체인 설정
|
47 |
def setup_qa_chain():
|
48 |
global qa_chain
|
|
|
51 |
qa_chain = load_qa_chain(llm, chain_type="stuff")
|
52 |
|
53 |
|
|
|
54 |
# 메인 페이지 UI
|
55 |
def main_page():
|
56 |
st.title("Welcome to GemmaPaperQA")
|
|
|
94 |
st.session_state.page = "chat"
|
95 |
setup_qa_chain()
|
96 |
st.success("PDF successfully processed! You can now ask questions.")
|
97 |
+
|
98 |
except Exception as e:
|
99 |
st.error(f"Failed to process the PDF: {str(e)}")
|
100 |
|
requirements.txt
CHANGED
@@ -9,4 +9,5 @@ torch==2.4.1
|
|
9 |
faiss-cpu==1.8.0.post1
|
10 |
requests==2.32.3
|
11 |
huggingface-hub==0.25.1
|
12 |
-
sentence-transformers==3.1.1
|
|
|
|
9 |
faiss-cpu==1.8.0.post1
|
10 |
requests==2.32.3
|
11 |
huggingface-hub==0.25.1
|
12 |
+
sentence-transformers==3.1.1
|
13 |
+
peft==0.2.0
|