Spaces:
Sleeping
Sleeping
standardteam
commited on
Commit
•
07661af
1
Parent(s):
c1dd588
Update app.py
Browse files
app.py
CHANGED
@@ -9,24 +9,24 @@ from langchain.vectorstores import Chroma
|
|
9 |
import gradio as gr
|
10 |
import tempfile
|
11 |
|
12 |
-
|
13 |
def qa(file, openaikey, query, chain_type, k):
|
14 |
os.environ["OPENAI_API_KEY"] = openaikey
|
15 |
|
16 |
-
# load document
|
17 |
loader = PyPDFLoader(file.name)
|
18 |
documents = loader.load()
|
19 |
-
# split the documents into chunks
|
20 |
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
21 |
texts = text_splitter.split_documents(documents)
|
22 |
-
# select which embeddings we want to use
|
23 |
embeddings = OpenAIEmbeddings()
|
24 |
-
# create the vectorestore to use as the index
|
25 |
db = Chroma.from_documents(texts, embeddings)
|
26 |
-
# expose this index in a retriever interface
|
27 |
retriever = db.as_retriever(
|
28 |
search_type="similarity", search_kwargs={"k": k})
|
29 |
-
# create a chain to answer questions
|
30 |
qa = RetrievalQA.from_chain_type(
|
31 |
llm=OpenAI(), chain_type=chain_type, retriever=retriever, return_source_documents=True)
|
32 |
result = qa({"query": query})
|
@@ -37,15 +37,16 @@ def qa(file, openaikey, query, chain_type, k):
|
|
37 |
iface = gr.Interface(
|
38 |
fn=qa,
|
39 |
inputs=[
|
40 |
-
gr.inputs.File(label="
|
41 |
gr.inputs.Textbox(label="OpenAI API Key"),
|
42 |
-
gr.inputs.Textbox(label="
|
|
|
43 |
gr.inputs.Dropdown(choices=['stuff', 'map_reduce', "refine", "map_rerank"], label="Chain type"),
|
44 |
gr.inputs.Slider(minimum=1, maximum=5, default=2, label="Number of relevant chunks"),
|
45 |
],
|
46 |
outputs="text",
|
47 |
-
title="
|
48 |
-
description="
|
49 |
)
|
50 |
|
51 |
iface.launch()
|
|
|
9 |
import gradio as gr
|
10 |
import tempfile
|
11 |
|
12 |
+
#定义查询函数qa
|
13 |
def qa(file, openaikey, query, chain_type, k):
|
14 |
os.environ["OPENAI_API_KEY"] = openaikey
|
15 |
|
16 |
+
# load document 加载PDF文件
|
17 |
loader = PyPDFLoader(file.name)
|
18 |
documents = loader.load()
|
19 |
+
# split the documents into chunks 将PDF文件分割成小块
|
20 |
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
21 |
texts = text_splitter.split_documents(documents)
|
22 |
+
# select which embeddings we want to use 使用 OpenAI 的embeddings模型为每个文本块创建一个向量嵌入
|
23 |
embeddings = OpenAIEmbeddings()
|
24 |
+
# create the vectorestore to use as the index 创建一个向量存储VectorStore,用于后续的搜索。
|
25 |
db = Chroma.from_documents(texts, embeddings)
|
26 |
+
# expose this index in a retriever interface 使用这个向量存储VectorStore创建一个检索器retriever
|
27 |
retriever = db.as_retriever(
|
28 |
search_type="similarity", search_kwargs={"k": k})
|
29 |
+
# create a chain to answer questions 然后使用这个检索器和 OpenAI 的模型创建一个问答链来回答问题。
|
30 |
qa = RetrievalQA.from_chain_type(
|
31 |
llm=OpenAI(), chain_type=chain_type, retriever=retriever, return_source_documents=True)
|
32 |
result = qa({"query": query})
|
|
|
37 |
iface = gr.Interface(
|
38 |
fn=qa,
|
39 |
inputs=[
|
40 |
+
gr.inputs.File(label="上传PDF"),
|
41 |
gr.inputs.Textbox(label="OpenAI API Key"),
|
42 |
+
gr.inputs.Textbox(label="你的问题"),
|
43 |
+
#longchain的文档documents分析功能的不同类型,具体见https://python.langchain.com.cn/docs/modules/chains/document/的解释
|
44 |
gr.inputs.Dropdown(choices=['stuff', 'map_reduce', "refine", "map_rerank"], label="Chain type"),
|
45 |
gr.inputs.Slider(minimum=1, maximum=5, default=2, label="Number of relevant chunks"),
|
46 |
],
|
47 |
outputs="text",
|
48 |
+
title="你可以问我关于你上传的PDF文件的任何信息!",
|
49 |
+
description="1) 上传一个PDF文件. 2)输入你的OpenAI API key.这将产生费用 3) 输入问题然后点击运行."
|
50 |
)
|
51 |
|
52 |
iface.launch()
|