keyword_konan / app.py
mlo0ollm's picture
apply keyword's parallel processing
a0942f4
from annotated_text import annotated_text
import streamlit as st
import openai
from gpt_based_function import gpt_keyw_extract_n_annotator
import concurrent.futures
import os, sys
from tqdm import tqdm
# current_path = '/Users/kintch/PycharmProjects/jungu_sgi/keyword_konan'
# os.chdir(current_path)
# sys.path.append(current_path)
# OpenAI API ์„ค์ • (ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ ์ฝ์–ด์˜ด)
openai.api_key = os.getenv("OPENAI_API_KEY") # ์‹ค์ œ ์ฝ”๋“œ์—์„œ ์ฃผ์„ ํ•ด์ œ
st.set_page_config(layout="wide")
col, _ = st.columns(2)
preset_learning_text = """๋ฏผ์ฃผ์ฃผ์˜ ์‚ฌํšŒ๋Š” ๊ตญ๋ฏผ์ด ์ •์น˜์— ์ฐธ์—ฌํ•  ๊ถŒ๋ฆฌ๋ฅผ ๋ณด์žฅํ•œ๋‹ค. ๊ทธ๋Ÿฌํ•œ ๊ถŒ๋ฆฌ๋ฅผ ์ฐธ์ •๊ถŒ์ด๋ผ ํ•˜๋Š”๋ฐ, ์ด๋Š” ๊ธฐ๋ณธ์ ์œผ๋กœ โ€˜์„ ๊ฑฐโ€™๋กœ ์‹คํ˜„๋œ๋‹ค. ์„ ๊ฑฐ๋Š” ์‚ฌํšŒ ์ง‘๋‹จ์˜ ๋Œ€ํ‘œ์ž๋‚˜ ๊ณต์ง์ž๋ฅผ ์„ ์ถœํ•˜์—ฌ ๊ทธ๋“ค์—๊ฒŒ ๋Œ€ํ‘œ์„ฑ์„ ๋ถ€์—ฌํ•˜๋Š” ํ–‰์œ„์ด๋‹ค. ๊ทธ๋Ÿฌ๋ฏ€๋กœ ๋†’์€ ํˆฌํ‘œ์œจ์€ ๋ฏผ์ฃผ์ฃผ์˜์˜ ์ •๋‹น์„ฑ ํ™•๋ณด์™€ ๊นŠ์€ ๊ด€๋ จ์ด ์žˆ๋‹ค.
์„ ๊ฑฐ ํˆฌํ‘œ ์ œ๋„์—๋Š” ํˆฌํ‘œ๊ถŒ ํ–‰์‚ฌ๋ฅผ ํˆฌํ‘œ์ž์˜ ์ž์œ ์˜์‚ฌ์— ๋งก๊ธฐ๋Š” โ€˜์ž์œ  ํˆฌํ‘œ์ œโ€™์™€ ํˆฌํ‘œ๊ถŒ ํ–‰์‚ฌ๋ฅผ ๊ตญ๋ฏผ์˜ ์˜๋ฌด๋กœ ๊ฐ„์ฃผํ•˜๊ณ  ์ •๋‹นํ•œ ์‚ฌ์œ  ์—†์ด ๊ธฐ๊ถŒํ•˜๋ฉด ๋ฒ•์  ์ œ์žฌ๋ฅผ ๊ฐ€ํ•˜๋Š” โ€˜์˜๋ฌด ํˆฌํ‘œ์ œโ€™๊ฐ€ ์žˆ๋‹ค. ์šฐ๋ฆฌ๋‚˜๋ผ๋Š” ์ž์œ  ํˆฌํ‘œ์ œ๋ฅผ ์ฑ„ํƒํ•˜๊ณ  ์žˆ๋Š”๋ฐ, ์ตœ๊ทผ ์น˜๋ฅธ ์„ ๊ฑฐ์˜ ํ‰๊ท  ํˆฌํ‘œ์œจ์ด 50ํผ์„ผํŠธ๋Œ€๋กœ ๋‚˜ํƒ€๋‚ฌ๋‹ค. ๊ฒฝ์ œ ๊ฐœ๋ฐœ ํ˜‘๋ ฅ ๊ธฐ๊ตฌ(OECD) ํšŒ์›๊ตญ ํ‰๊ท ์ด 70ํผ์„ผํŠธ๋Œ€์ธ ๊ฒƒ์„ ์ƒ๊ฐํ•˜๋ฉด ๋งค์šฐ ๋‚ฎ์€ ์ˆ˜์น˜๋ผ ํ•  ์ˆ˜ ์žˆ๋‹ค. ์ด๋Ÿฌํ•œ ์ƒํ™ฉ์ด ์ง€์†๋˜์ž ์˜๋ฌด ํˆฌํ‘œ์ œ๋ฅผ ๋„์ž…ํ•ด์•ผ ํ•œ๋‹ค๋Š” ์˜๊ฒฌ์ด ์ œ์‹œ๋˜์—ˆ๊ณ , ์ž์œ  ํˆฌํ‘œ์ œ๊ฐ€ ๋ฏผ์ฃผ์ฃผ์˜์˜ ์›์น™์— ๋งž์œผ๋ฏ€๋กœ ์ด๋ฅผ ์œ ์ง€ํ•ด์•ผ ํ•œ๋‹ค๋Š” ์˜๊ฒฌ๊ณผ ๋Œ€๋ฆฝํ•˜๊ณ  ์žˆ๋‹ค.
์˜๋ฌด ํˆฌํ‘œ์ œ๋ฅผ ๋„์ž…ํ•˜์ž๋Š” ์ธก์€ ๋‚ฎ์€ ํˆฌํ‘œ์œจ๋กœ ํˆฌํ‘œ ๊ฒฐ๊ณผ์˜ ์ •๋‹น์„ฑ์„ ํ™•๋ณดํ•˜์ง€ ๋ชปํ•˜๋Š” ๋ฌธ์ œ๊ฐ€ ๋งค์šฐ ์‹ฌ๊ฐํ•˜๋‹ค๊ณ  ์ฃผ์žฅํ•œ๋‹ค. ๋˜ ์˜๋ฌด ํˆฌํ‘œ์ œ์˜ ๊ฐ•์ œ์„ฑ๊ณผ ๋ฒ•์  ์ œ์žฌ๊ฐ€ ํˆฌํ‘œ์œจ์„ ๋†’์ด๋ฏ€๋กœ ํˆฌํ‘œ์œจ์ด ๋‚ฎ์•„์„œ ๋ฐœ์ƒํ•˜๋Š” ๋ฌธ์ œ๋ฅผ ํ•ด๊ฒฐํ•  ์ˆ˜ ์žˆ๋‹ค๊ณ  ๋ณธ๋‹ค. ๊ทธ๋ฆฌ๊ณ  ๊ตญ๋ฏผ ๋Œ€๋ถ€๋ถ„์ด ํˆฌํ‘œ์— ์ฐธ์—ฌํ•˜๊ฒŒ ๋˜๋ฉด ์ •์น˜์ธ๋“ค์ด ๋ชจ๋“  ๊ณ„์ธต์˜ ์ง€์ง€๋ฅผ ๋ฐ›๊ธฐ ์œ„ํ•ด ์ •์ฑ… ๊ฒฝ์Ÿ๋ ฅ์„ ๋†’์ด๋ ค ํ•  ๊ฒƒ์ด๋ฏ€๋กœ ์ •์น˜ ์†Œ์™ธ ๊ณ„์ธต์— ๋”์šฑ ๊ด€์‹ฌ์„ ์Ÿ๋Š” ํšจ๊ณผ๊ฐ€ ์žˆ์„ ๊ฒƒ์ด๋ผ๊ณ  ์ด์•ผ๊ธฐํ•œ๋‹ค.
๋ฐ˜๋ฉด ์˜๋ฌด ํˆฌํ‘œ์ œ์— ๋ฐ˜๋Œ€ํ•˜๋Š” ์ธก์€ ํ˜„์žฌ ์šฐ๋ฆฌ๋‚˜๋ผ์˜ ํˆฌํ‘œ์œจ์ด ์ •์น˜ ์ง€๋„์ž๋“ค์˜ ๋Œ€ํ‘œ์„ฑ์„ ํ›ผ์†ํ•  ๋งŒํผ ์‹ฌ๊ฐํ•œ ์ƒํ™ฉ์€ ์•„๋‹ˆ๋ผ๊ณ  ์ฃผ์žฅํ•œ๋‹ค. ๋˜ ํˆฌํ‘œ์œจ์„ ๋†’์ด๋Š” ๊ฒƒ๋ณด๋‹ค ๊ตญ๋ฏผ์˜ ์‹ ๋ขฐ๋ฅผ ํšŒ๋ณตํ•˜๋Š” ๊ฒƒ์ด ๋” ์ค‘์š”ํ•˜๊ณ , ์‹œ๋ฏผ ๊ต์œก์ด๋‚˜ ๋ชจ์˜ ํˆฌํ‘œ ๊ต์œก ํ”„๋กœ๊ทธ๋žจ์œผ๋กœ๋„ ํˆฌํ‘œ์œจ ์ƒ์Šน์„ ๊ธฐ๋Œ€ํ•  ์ˆ˜ ์žˆ๋‹ค๋ฉฐ ์˜๋ฌด ํˆฌํ‘œ์ œ์˜ ๋„์ž…๋งŒ์ด ํˆฌํ‘œ์œจ์ด๋‚˜ ์ •์น˜์  ๊ด€์‹ฌ์„ ๋†’์ด๋Š” ํ•ด๊ฒฐ ๋ฐฉ์•ˆ์€ ์•„๋‹ˆ๋ผ๊ณ  ์ด์•ผ๊ธฐํ•œ๋‹ค. ๊ทธ๋ฆฌ๊ณ  ์˜๋ฌด ํˆฌํ‘œ์ œ๋ฅผ ๋„์ž…ํ•˜๋ฉด, ์„ ์ถœ๋œ ์ •์น˜์ธ๋“ค์ด ๋†’์€ ํˆฌํ‘œ์œจ์„ ํ•‘๊ณ„๋กœ ์•ˆํ•˜๋ฌด์ธ์˜ ํƒœ๋„๋ฅผ ๊ฐ–๋Š” ๋ถ€์ž‘์šฉ์ด ์ƒ๊ธด๋‹ค๋“ ๊ฐ€ ํ›„๋ณด์ž๋ฅผ ์ž˜ ๋ชจ๋ฅด๋Š” ์ƒํƒœ์—์„œ ํˆฌํ‘œํ•˜๋Š” ์ผ์ด ๋ฐœ์ƒํ•˜์—ฌ ๊ตญ๋ฏผ์˜ ๋œป์ด ์˜คํžˆ๋ ค ์™œ๊ณก๋  ์ˆ˜ ์žˆ๋‹ค๋ฉฐ ์šฐ๋ ค์˜ ๋ชฉ์†Œ๋ฆฌ๋ฅผ ๋‚ด๊ณ  ์žˆ๋‹ค.
"""
# ํ‚ค์›Œ๋“œ ์ฐพ๊ธฐ ์‹คํ–‰ํ•ด์„œ ๋ณ€์ˆ˜์— keep
# preset_keyw_func(preset_learning_texts)
preset_learning_texts = preset_learning_text.split("\n") #๋ฌธ๋‹จ๋ณ„๋กœ ์ชผ๊ฐœ๊ธฐ
highlighted_texts_list = list()
tmp = str()
tmp_list = list()
for k in range(len(preset_learning_texts)):
print(k)
text = preset_learning_texts[k] #๋ฌธ๋‹จ๋ณ„ ํ…์ŠคํŠธ
tmp += text #1๋ฌธ๋‹จ, 1+2๋ฌธ๋‹จ, 1+2+3๋ฌธ๋‹จ ...
tmp_list.append(tmp)
#to be updated: ํ•œ ๋ฒˆ ํด๋ฆญํ•  ๋•Œ ๋งˆ๋‹ค, +1 ๋ฌธ๋‹จ์”ฉ ํ‚ค์›Œ๋“œ ๋ถ„์„ ๊ฒฐ๊ณผ ๋ณด์—ฌ์ฃผ๊ธฐ
def run(list_sum):
with concurrent.futures.ThreadPoolExecutor() as executor:
results = list(tqdm(executor.map(gpt_keyw_extract_n_annotator, list_sum), total=len(list_sum)))
return results
highlighted_texts_list = run(tmp_list)
def display_passage(col):
st.header("์ง€๋ฌธ")
global text_container
text_container = st.container() # ์ถ”๊ฐ€๋œ ์ฝ”๋“œ
# ๋ฏธ๋ฆฌ ๊ณต๊ฐ„ ํ™•๋ณด
global text_placeholder
text_placeholder = text_container.empty()
global preset_learning_text
text_placeholder.write(preset_learning_text)
#์„ ์–ธํ•œ ๋ณ€์ˆ˜ ๋‹ค๋ฅธ ํ•จ์ˆ˜์—์„œ ์‚ฌ์šฉ๊ฐ€๋Šฅํ•˜๊ฒŒ ํ•˜๊ธฐ ์œ„ํ•ด return
return text_placeholder, text_container, preset_learning_text
def display_summary(col):
st.header("์š”์•ฝ ๊ฒฐ๊ณผ")
global user_summary
user_summary = st.text_area("์š”์•ฝ๊ฒฐ๊ณผ๋ฅผ ์ œ์ถœํ•˜์„ธ์š”.")
cols = st.columns(2)
with cols[0]:
btn_submit = st.button("์ œ์ถœ")
if btn_submit:
#๊ตฌ๊ธ€ ๋“œ๋ผ์ด๋ธŒ api ์—ฐ๊ฒฐ ๋กœ์ง ์ถ”ํ›„ ์ถ”๊ฐ€
st.write("์ œ์ถœ ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
pass
with cols[1]:
btn_score = st.button("์ฑ„์ ํ•˜๊ธฐ")
if btn_score:
#๋ฃจ๋ธŒ๋ฆญ์— ์˜ํ•œ ์ฑ„์ 
lubric = """์ฑ„์  ๊ธฐ์ค€ ์ƒ: ๋ฌธ์žฅ์˜ ์ฃผ์ œ๋ฅผ ํŒŒ์•…ํ•˜๊ณ , ์ฃผ์š” ๋‚ด์šฉ์„ ํŒŒ์•…ํ•  ์ˆ˜ ์žˆ๋‹ค. ์ค‘: ๋ฌธ์žฅ์˜ ์ฃผ์ œ๋ฅผ ํŒŒ์•…ํ•  ์ˆ˜ ์žˆ๋‹ค. ํ•˜: ๋ฌธ์žฅ์˜ ์ฃผ์ œ๋ฅผ ํŒŒ์•…ํ•  ์ˆ˜ ์—†๋‹ค."""
#๋ฃจ๋ธŒ๋ฆญ ๊ธฐ์ค€์„ ์ด์šฉํ•ด์„œ ์ž…๋ ฅ์นธ์— ์ž…๋ ฅํ•œ ๋‚ด์šฉ์„ ์ฑ„์ ํ•˜๋Š” ์˜์–ด๋กœ ํ”„๋กฌํ”„ํŠธ
explanation_task = f"{lubric}์„ ๊ธฐ์ค€์œผ๋กœ {user_summary}์˜ ๋‚ด์šฉ์„ ์ฑ„์ ํ•ด์ฃผ์„ธ์š”. ์ฑ„์  ๊ธฐ์ค€์€ ๊ณต๊ฐœํ•˜์ง€ ๋ง๊ณ  ์ƒ, ์ค‘,ํ•˜๋กœ ๋‚˜๋ˆ„๊ณ  ๊ฐ„๋‹จํ•œ ์ด์œ ๋ฅผ ์•Œ๋ ค์ฃผ์„ธ์š”."
messages = [
{"role": "system", "content": "You are a helpful assistant. use only korean"},
{"role": "user", "content": explanation_task}
]
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo-16k",
messages=messages,
temperature=0.1,
max_tokens=2500
)
explanation = response['choices'][0]['message']['content']
st.write(f"์ฑ„์  ํ•˜๊ธฐ: {explanation}")
def display_input_btns(col):
st.header("์ธ๊ณต์ง€๋Šฅ ์‚ฌ์šฉํ•˜๊ธฐ")
global user_input
user_input = st.text_area("๋‚ด์šฉ์„ ๋„ฃ๊ณ  ๋ฒ„ํŠผ์„ ๋ˆŒ๋Ÿฌ์ฃผ์„ธ์š”:", "")
st.write(user_input)
# ๋ฒ„ํŠผ row
cols = st.columns(4)
with cols[0]:
btn_keyword = st.button("ํ‚ค์›Œ๋“œ ์ฐพ๊ธฐ")
if btn_keyword:
# ํ‚ค์›Œ๋“œ ์ฐพ๊ธฐ ๋กœ์ง
# highlighted_text = gpt_keyw_extract_n_annotator(preset_learning_text) # Should Be DELETED
global highlighted_texts_list
highlighted_text = highlighted_texts_list[-1]
# ๊ธฐ์กด ์ง€๋ฌธ ์ง€์šฐ๊ธฐ
text_placeholder.empty()
# ์ƒˆ๋กœ์šด ๋‚ด์šฉ ๋„ฃ๊ธฐ
with text_container:
exec(highlighted_text)
with cols[1]:
global btn_explanation
btn_explanation= st.button("์ถ”๊ฐ€ ์„ค๋ช…")
with cols[2]:
global btn_simple
btn_simple = st.button("์‰ฌ์šด ํ‘œํ˜„")
with cols[3]:
global btn_rewrite
btn_rewrite = st.button("๋‹ค์‹œ ์“ฐ๊ธฐ")
return btn_keyword, btn_explanation, btn_simple, btn_rewrite
def display_output():
with st.container():
st.header("๊ฒฐ๊ณผ")
if btn_explanation:
explanation_task = f"Explain the term '{user_input}' in a simple manner, based on the context of the following passage: {preset_learning_text}"
messages = [
{"role": "system", "content": "You are a helpful assistant that explains complex topics in a way that an elementary school student can understand. use only korean"},
{"role": "user", "content": explanation_task}
]
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo-16k",
messages=messages,
temperature=0.1,
max_tokens=200
)
explanation = response['choices'][0]['message']['content']
# ์ถ”๊ฐ€ ์„ค๋ช…
st.write(f"์ถ”๊ฐ€ ์„ค๋ช…: {explanation}")
pass
if btn_simple:
explanation_task = f"Describe the fingerprint of '{preset_learning_text}' in a way that an elementary school student could understand."
messages = [
{"role": "system", "content": "You are a helpful assistant that explains complex topics in a way that an elementary school student can understand. use only korean"},
{"role": "user", "content": explanation_task}
]
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo-16k",
messages=messages,
temperature=0.1,
max_tokens=2500
)
explanation = response['choices'][0]['message']['content']
# ์‰ฌ์šด ํ‘œํ˜„์œผ๋กœ ๊ฒฐ๊ณผ ์ถœ๋ ฅ
st.write(f"์‰ฌ์šด ๊ธ€: {explanation}")
pass
if btn_rewrite:
explanation_task = f"Rewrite the contents of '{user_input}' so that it will pass the writing test."
messages = [
{"role": "system", "content": "You are a helpful assistant. use only korean"},
{"role": "user", "content": explanation_task}
]
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo-16k",
messages=messages,
temperature=0.1,
max_tokens=2500
)
explanation = response['choices'][0]['message']['content']
st.write(f"๋‹ค์‹œ ์“ฐ๊ธฐ: {explanation}")
#๊ฒฐ๊ณผ ๋ถ€๋ถ„์— ๋ฒ„ํŠผ ์ถœ๋ ฅ ์ถ”๊ฐ€
def main():
st.title("ํ•œ๊ตญ์–ด ํ•™์Šต์ž๋ฅผ ์œ„ํ•œ HCI tools")
col1, col2 = st.columns(2)
with col1:
display_passage(col1)
display_summary(col1)
with col2:
btn_keyword, btn_explanation, btn_simple, btn_rewrite = display_input_btns(col2)
display_output()
# # ํ•˜๋‹จ ์ปจํ…Œ์ด๋„ˆ
# with st.container():
# st.header("๊ฒฐ๊ณผ")
if __name__ == "__main__":
main()