halyn commited on
Commit
91830bb
โ€ข
1 Parent(s): 9bd8b79

code update

Browse files
Files changed (2) hide show
  1. app.py +11 -7
  2. requirements.txt +2 -2
app.py CHANGED
@@ -31,21 +31,25 @@ def split_text(text):
31
 
32
  # FAISS ๋ฒกํ„ฐ ์ €์žฅ์†Œ ์ƒ์„ฑ
33
  def create_knowledge_base(chunks):
34
- model_name = "halyn/gemma2-2b-it-finetuned-paperqa"
35
  embeddings = HuggingFaceEmbeddings(model_name=model_name)
36
  return FAISS.from_texts(chunks, embeddings)
37
 
38
  # Hugging Face ๋ชจ๋ธ ๋กœ๋“œ
39
  def load_model():
40
- model_name = "halyn/gemma2-2b-it-finetuned-paperqa"
41
- tokenizer = AutoTokenizer.from_pretrained(model_name, clean_up_tokenization_spaces=False)
42
  model = AutoModelForCausalLM.from_pretrained(model_name)
43
  return pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150, temperature=0.1)
44
 
45
  # QA ์ฒด์ธ ์„ค์ •
46
  def setup_qa_chain():
47
  global qa_chain
48
- pipe = load_model()
 
 
 
 
49
  llm = HuggingFacePipeline(pipeline=pipe)
50
  qa_chain = load_qa_chain(llm, chain_type="stuff")
51
 
@@ -78,19 +82,19 @@ def main_page():
78
 
79
  st.text_area("Preview of extracted text", text[:1000], height=200)
80
  st.write(f"Total characters extracted: {len(text)}")
81
-
82
  if st.button("Proceed with this file"):
83
  chunks = split_text(text)
84
- global knowledge_base
85
  knowledge_base = create_knowledge_base(chunks)
86
 
87
  if knowledge_base is None:
88
  st.error("Failed to create knowledge base.")
89
  return
90
 
 
 
91
  st.session_state.paper_name = paper.name[:-4]
92
  st.session_state.page = "chat"
93
- setup_qa_chain()
94
  st.success("PDF successfully processed! You can now ask questions.")
95
 
96
  except Exception as e:
 
31
 
32
  # FAISS ๋ฒกํ„ฐ ์ €์žฅ์†Œ ์ƒ์„ฑ
33
  def create_knowledge_base(chunks):
34
+ model_name = "sentence-transformers/all-mpnet-base-v2" # ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ์„ ๋ช…์‹œ
35
  embeddings = HuggingFaceEmbeddings(model_name=model_name)
36
  return FAISS.from_texts(chunks, embeddings)
37
 
38
  # Hugging Face ๋ชจ๋ธ ๋กœ๋“œ
39
  def load_model():
40
+ model_name = "halyn/gemma2-2b-it-finetuned-paperqa" # ํ…์ŠคํŠธ ์ƒ์„ฑ ๋ชจ๋ธ ์‚ฌ์šฉ
41
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
42
  model = AutoModelForCausalLM.from_pretrained(model_name)
43
  return pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150, temperature=0.1)
44
 
45
  # QA ์ฒด์ธ ์„ค์ •
46
  def setup_qa_chain():
47
  global qa_chain
48
+ try:
49
+ pipe = load_model()
50
+ except Exception as e:
51
+ print(f"Error loading model: {e}")
52
+ return
53
  llm = HuggingFacePipeline(pipeline=pipe)
54
  qa_chain = load_qa_chain(llm, chain_type="stuff")
55
 
 
82
 
83
  st.text_area("Preview of extracted text", text[:1000], height=200)
84
  st.write(f"Total characters extracted: {len(text)}")
85
+ global knowledge_base
86
  if st.button("Proceed with this file"):
87
  chunks = split_text(text)
 
88
  knowledge_base = create_knowledge_base(chunks)
89
 
90
  if knowledge_base is None:
91
  st.error("Failed to create knowledge base.")
92
  return
93
 
94
+ setup_qa_chain()
95
+
96
  st.session_state.paper_name = paper.name[:-4]
97
  st.session_state.page = "chat"
 
98
  st.success("PDF successfully processed! You can now ask questions.")
99
 
100
  except Exception as e:
requirements.txt CHANGED
@@ -1,5 +1,6 @@
1
  streamlit
2
  PyPDF2
 
3
  langchain==0.3.1
4
  langchain-community==0.3.1
5
  langchain-core==0.3.8
@@ -10,5 +11,4 @@ faiss-cpu==1.8.0.post1
10
  requests==2.32.3
11
  huggingface-hub==0.25.1
12
  sentence-transformers==3.1.1
13
- peft==0.2.0
14
- langchain-huggingface
 
1
  streamlit
2
  PyPDF2
3
+ langchain-huggingface
4
  langchain==0.3.1
5
  langchain-community==0.3.1
6
  langchain-core==0.3.8
 
11
  requests==2.32.3
12
  huggingface-hub==0.25.1
13
  sentence-transformers==3.1.1
14
+ peft==0.2.0