Spaces:
Paused
Paused
code update
Browse files- app.py +11 -7
- requirements.txt +2 -2
app.py
CHANGED
@@ -31,21 +31,25 @@ def split_text(text):
|
|
31 |
|
32 |
# FAISS ๋ฒกํฐ ์ ์ฅ์ ์์ฑ
|
33 |
def create_knowledge_base(chunks):
|
34 |
-
model_name = "
|
35 |
embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
36 |
return FAISS.from_texts(chunks, embeddings)
|
37 |
|
38 |
# Hugging Face ๋ชจ๋ธ ๋ก๋
|
39 |
def load_model():
|
40 |
-
model_name = "halyn/gemma2-2b-it-finetuned-paperqa"
|
41 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name
|
42 |
model = AutoModelForCausalLM.from_pretrained(model_name)
|
43 |
return pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150, temperature=0.1)
|
44 |
|
45 |
# QA ์ฒด์ธ ์ค์
|
46 |
def setup_qa_chain():
|
47 |
global qa_chain
|
48 |
-
|
|
|
|
|
|
|
|
|
49 |
llm = HuggingFacePipeline(pipeline=pipe)
|
50 |
qa_chain = load_qa_chain(llm, chain_type="stuff")
|
51 |
|
@@ -78,19 +82,19 @@ def main_page():
|
|
78 |
|
79 |
st.text_area("Preview of extracted text", text[:1000], height=200)
|
80 |
st.write(f"Total characters extracted: {len(text)}")
|
81 |
-
|
82 |
if st.button("Proceed with this file"):
|
83 |
chunks = split_text(text)
|
84 |
-
global knowledge_base
|
85 |
knowledge_base = create_knowledge_base(chunks)
|
86 |
|
87 |
if knowledge_base is None:
|
88 |
st.error("Failed to create knowledge base.")
|
89 |
return
|
90 |
|
|
|
|
|
91 |
st.session_state.paper_name = paper.name[:-4]
|
92 |
st.session_state.page = "chat"
|
93 |
-
setup_qa_chain()
|
94 |
st.success("PDF successfully processed! You can now ask questions.")
|
95 |
|
96 |
except Exception as e:
|
|
|
31 |
|
32 |
# FAISS ๋ฒกํฐ ์ ์ฅ์ ์์ฑ
|
33 |
def create_knowledge_base(chunks):
|
34 |
+
model_name = "sentence-transformers/all-mpnet-base-v2" # ์๋ฒ ๋ฉ ๋ชจ๋ธ์ ๋ช
์
|
35 |
embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
36 |
return FAISS.from_texts(chunks, embeddings)
|
37 |
|
38 |
# Hugging Face ๋ชจ๋ธ ๋ก๋
|
39 |
def load_model():
|
40 |
+
model_name = "halyn/gemma2-2b-it-finetuned-paperqa" # ํ
์คํธ ์์ฑ ๋ชจ๋ธ ์ฌ์ฉ
|
41 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
42 |
model = AutoModelForCausalLM.from_pretrained(model_name)
|
43 |
return pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150, temperature=0.1)
|
44 |
|
45 |
# QA ์ฒด์ธ ์ค์
|
46 |
def setup_qa_chain():
|
47 |
global qa_chain
|
48 |
+
try:
|
49 |
+
pipe = load_model()
|
50 |
+
except Exception as e:
|
51 |
+
print(f"Error loading model: {e}")
|
52 |
+
return
|
53 |
llm = HuggingFacePipeline(pipeline=pipe)
|
54 |
qa_chain = load_qa_chain(llm, chain_type="stuff")
|
55 |
|
|
|
82 |
|
83 |
st.text_area("Preview of extracted text", text[:1000], height=200)
|
84 |
st.write(f"Total characters extracted: {len(text)}")
|
85 |
+
global knowledge_base
|
86 |
if st.button("Proceed with this file"):
|
87 |
chunks = split_text(text)
|
|
|
88 |
knowledge_base = create_knowledge_base(chunks)
|
89 |
|
90 |
if knowledge_base is None:
|
91 |
st.error("Failed to create knowledge base.")
|
92 |
return
|
93 |
|
94 |
+
setup_qa_chain()
|
95 |
+
|
96 |
st.session_state.paper_name = paper.name[:-4]
|
97 |
st.session_state.page = "chat"
|
|
|
98 |
st.success("PDF successfully processed! You can now ask questions.")
|
99 |
|
100 |
except Exception as e:
|
requirements.txt
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
streamlit
|
2 |
PyPDF2
|
|
|
3 |
langchain==0.3.1
|
4 |
langchain-community==0.3.1
|
5 |
langchain-core==0.3.8
|
@@ -10,5 +11,4 @@ faiss-cpu==1.8.0.post1
|
|
10 |
requests==2.32.3
|
11 |
huggingface-hub==0.25.1
|
12 |
sentence-transformers==3.1.1
|
13 |
-
peft==0.2.0
|
14 |
-
langchain-huggingface
|
|
|
1 |
streamlit
|
2 |
PyPDF2
|
3 |
+
langchain-huggingface
|
4 |
langchain==0.3.1
|
5 |
langchain-community==0.3.1
|
6 |
langchain-core==0.3.8
|
|
|
11 |
requests==2.32.3
|
12 |
huggingface-hub==0.25.1
|
13 |
sentence-transformers==3.1.1
|
14 |
+
peft==0.2.0
|
|