JUNGU commited on
Commit
1a2426f
β€’
1 Parent(s): 77a4ab0

Upload 3 files

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. app.py +111 -0
  3. docs.pdf +3 -0
  4. requirements.txt +7 -0
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ docs.pdf filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chat_models import ChatOpenAI
2
+ from langchain.document_loaders import PyPDFLoader
3
+ from langchain.embeddings.openai import OpenAIEmbeddings
4
+ from langchain.embeddings.cohere import CohereEmbeddings
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch
7
+ from langchain.vectorstores import Chroma
8
+ from PyPDF2 import PdfWriter
9
+ import gradio as gr
10
+ import os
11
+ from dotenv import load_dotenv
12
+ import openai
13
+ import streamlit as st
14
+
15
+ load_dotenv()
16
+ #λΉ„λ°€ν‚€ κ°€μ Έμ˜€κΈ° μ‹œλ„μ€‘
17
+ # api_key = os.getenv('OPENAI_API_KEY') ## .env 파일 μ—…λ‘œλ“œν•˜λ©΄ μˆ¨κ²¨μ§€μ§€ μ•ŠμŒ μ•ˆλ¨
18
+ # api_key = os.environ['my_secret'] ## μ•ˆλΆˆλŸ¬μ™€μ§
19
+ # api_key = os.getenv('my_secret') ## 3트 .env λŒ€μ‹  secretν‚€λ₯Ό λΆˆλŸ¬μ˜€λŠ” ν˜•νƒœλ‘œ 도전
20
+ os.environ["OPENAI_API_KEY"] = os.environ['my_secret']
21
+
22
+ loader = PyPDFLoader("/home/user/app/docs.pdf")
23
+ documents = loader.load()
24
+
25
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
26
+ texts = text_splitter.split_documents(documents)
27
+
28
+ #vector embedding
29
+ embeddings = OpenAIEmbeddings()
30
+ vector_store = Chroma.from_documents(texts, embeddings)
31
+ retriever = vector_store.as_retriever(search_kwargs={"k": 2})
32
+
33
+ from langchain.chat_models import ChatOpenAI
34
+ from langchain.chains import RetrievalQAWithSourcesChain
35
+
36
+ llm = ChatOpenAI(model_name="gpt-4", temperature=0) # Modify model_name if you have access to GPT-4
37
+
38
+ chain = RetrievalQAWithSourcesChain.from_chain_type(
39
+ llm=llm,
40
+ chain_type="stuff",
41
+ retriever = retriever,
42
+ return_source_documents=True)
43
+
44
+ from langchain.prompts.chat import (
45
+ ChatPromptTemplate,
46
+ SystemMessagePromptTemplate,
47
+ HumanMessagePromptTemplate,
48
+ )
49
+
50
+ system_template="""Use the following pieces of context to answer the users question shortly.
51
+ Given the following summaries of a long document and a question, create a final answer with references ("SOURCES"), use "SOURCES" in capital letters regardless of the number of sources.
52
+ If you don't know the answer, just say that "I don't know", don't try to make up an answer.
53
+ ----------------
54
+ {summaries}
55
+
56
+ You MUST answer in Korean and in Markdown format:"""
57
+
58
+ messages = [
59
+ SystemMessagePromptTemplate.from_template(system_template),
60
+ HumanMessagePromptTemplate.from_template("{question}")
61
+ ]
62
+
63
+ prompt = ChatPromptTemplate.from_messages(messages)
64
+
65
+ from langchain.chat_models import ChatOpenAI
66
+ from langchain.chains import RetrievalQAWithSourcesChain
67
+
68
+ chain_type_kwargs = {"prompt": prompt}
69
+
70
+ llm = ChatOpenAI(model_name="gpt-4", temperature=0) # Modify model_name if you have access to GPT-4
71
+
72
+ chain = RetrievalQAWithSourcesChain.from_chain_type(
73
+ llm=llm,
74
+ chain_type="stuff",
75
+ retriever = retriever,
76
+ return_source_documents=True,
77
+ chain_type_kwargs=chain_type_kwargs
78
+ )
79
+
80
+ query = "ν–‰λ³΅ν•œ μΈμƒμ΄λž€?"
81
+ result = chain(query)
82
+
83
+
84
+ for doc in result['source_documents']:
85
+ print('λ‚΄μš© : ' + doc.page_content[0:100].replace('\n', ' '))
86
+ print('파일 : ' + doc.metadata['source'])
87
+ print('νŽ˜μ΄μ§€ : ' + str(doc.metadata['page']))
88
+
89
+
90
+ def respond(message, chat_history): # μ±„νŒ…λ΄‡μ˜ 응닡을 μ²˜λ¦¬ν•˜λŠ” ν•¨μˆ˜λ₯Ό μ •μ˜ν•©λ‹ˆλ‹€.
91
+
92
+ result = chain(message)
93
+
94
+ bot_message = result['answer']
95
+
96
+ for i, doc in enumerate(result['source_documents']):
97
+ bot_message += '[' + str(i+1) + '] ' + doc.metadata['source'] + '(' + str(doc.metadata['page']) + ') '
98
+
99
+ chat_history.append((message, bot_message)) # μ±„νŒ… 기둝에 μ‚¬μš©μžμ˜ λ©”μ‹œμ§€μ™€ λ΄‡μ˜ 응닡을 μΆ”κ°€ν•©λ‹ˆλ‹€.
100
+
101
+ return "", chat_history # μˆ˜μ •λœ μ±„νŒ… 기둝을 λ°˜ν™˜ν•©λ‹ˆλ‹€.
102
+
103
+ with gr.Blocks(theme='gstaff/sketch') as demo: # gr.Blocks()λ₯Ό μ‚¬μš©ν•˜μ—¬ μΈν„°νŽ˜μ΄μŠ€λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
104
+ gr.Markdown("# μ•ˆλ…•ν•˜μ„Έμš”. 세이노와 λŒ€ν™”ν•΄λ³΄μ„Έμš”.")
105
+ chatbot = gr.Chatbot(label="μ±„νŒ…μ°½") # 'μ±„νŒ…μ°½'μ΄λΌλŠ” λ ˆμ΄λΈ”μ„ 가진 μ±„νŒ…λ΄‡ μ»΄ν¬λ„ŒνŠΈλ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
106
+ msg = gr.Textbox(label="μž…λ ₯") # 'μž…λ ₯'μ΄λΌλŠ” λ ˆμ΄λΈ”μ„ 가진 ν…μŠ€νŠΈλ°•μŠ€λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
107
+ clear = gr.Button("μ΄ˆκΈ°ν™”") # 'μ΄ˆκΈ°ν™”'λΌλŠ” λ ˆμ΄λΈ”μ„ 가진 λ²„νŠΌμ„ μƒμ„±ν•©λ‹ˆλ‹€.
108
+
109
+ msg.submit(respond, [msg, chatbot], [msg, chatbot]) # ν…μŠ€νŠΈλ°•μŠ€μ— λ©”μ‹œμ§€λ₯Ό μž…λ ₯ν•˜κ³  μ œμΆœν•˜λ©΄ respond ν•¨μˆ˜κ°€ ν˜ΈμΆœλ˜λ„λ‘ ν•©λ‹ˆλ‹€.
110
+ clear.click(lambda: None, None, chatbot, queue=False) # 'μ΄ˆκΈ°ν™”' λ²„νŠΌμ„ ν΄λ¦­ν•˜λ©΄ μ±„νŒ… 기둝을 μ΄ˆκΈ°ν™”ν•©λ‹ˆλ‹€.
111
+ demo.launch(debug=True) # μΈν„°νŽ˜μ΄μŠ€λ₯Ό μ‹€ν–‰ν•©λ‹ˆλ‹€. μ‹€ν–‰ν•˜λ©΄ μ‚¬μš©μžλŠ” 'μž…λ ₯' ν…μŠ€νŠΈλ°•μŠ€μ— λ©”μ‹œμ§€λ₯Ό μž‘μ„±ν•˜κ³  μ œμΆœν•  수 있으며, 'μ΄ˆκΈ°ν™”' λ²„νŠΌμ„ 톡해 μ±„νŒ… 기둝을 μ΄ˆκΈ°ν™” ν•  수 μžˆμŠ΅λ‹ˆλ‹€.
docs.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dab840d01bd8582e930da5ccb74c032279e832ed02f7f938953e7f77730d1ad2
3
+ size 4232031
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ openai
2
+ langchain
3
+ pypdf
4
+ chromadb
5
+ tiktoken
6
+ PyPDF2
7
+ streamlit