Spaces:

fukufuk
/

InformationLiteracySupportTool

Running

App Files Files Community

fukufuk commited on Sep 30

Commit

318adba

•

1 Parent(s): 0398d77

Update application file

Browse files

Files changed (16) hide show

app.py +54 -22
{util → lib}/__init__.py +3 -0
lib/__pycache__/__init__.cpython-311.pyc +0 -0
{util → lib}/__pycache__/fact_opinion_classifer.cpython-311.pyc +0 -0
{util → lib}/__pycache__/fact_summarizer.cpython-311.pyc +0 -0
lib/__pycache__/get_data_info.cpython-311.pyc +0 -0
lib/__pycache__/get_raw_data_html.cpython-311.pyc +0 -0
lib/__pycache__/get_raw_data_info.cpython-311.pyc +0 -0
{util → lib}/__pycache__/opinion_reason_classifer.cpython-311.pyc +0 -0
{util → lib}/fact_opinion_classifer.py +0 -0
{util → lib}/fact_summarizer.py +0 -0
lib/get_data_info.py +38 -0
lib/get_raw_data_html.py +21 -0
lib/get_raw_data_info.py +33 -0
{util → lib}/opinion_reason_classifer.py +0 -0
util/__pycache__/__init__.cpython-311.pyc +0 -0

app.py CHANGED Viewed

@@ -6,8 +6,9 @@ import requests
 from bs4 import BeautifulSoup as bs
 from openai import OpenAI
-from util import (fact_opinion_classifer, fact_summarizer,
-                  opinion_reason_classifer)
 client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
@@ -17,46 +18,62 @@ def main(url):
     soup = bs(response.text, 'html.parser')
     article_elems = soup.select('#uamods > .article_body > div > p')
     sentence_text = "\n".join([article_elem.text for article_elem in article_elems])
-    message = '' + sentence_text
-    sentences = [line.strip() for line in re.split('。|？|\n|!', message) if line.strip() != ""]
     fact_opinion_list = fact_opinion_classifer(client, sentences)
     if len(sentences) != len(fact_opinion_list):
         raise ValueError(f'GPTの回答の数が一致しませんでした。: {fact_opinion_list}, {sentences}')
     opinions = []
     facts = []
     for sentence, fact_opinion in zip(sentences, fact_opinion_list):
         if fact_opinion == "Opinion":
-            message = message.replace(sentence, f'<span style="opacity:0.6;">{sentence}</span>')
             opinions.append(sentence)
         elif fact_opinion == "Fact":
             facts.append(sentence)
         else:
             print(f'error: not known fact_opinion option: {fact_opinion}')
-    message = '<p>' + message.replace('\n', '</p>\n<p>') + '</p>'
-    fact_sentence = fact_summarizer(client, sentence_text)
     if len(opinions) == 0:
         opinion_reason_text = "<h3>この文章には意見に関する文が見つかりませんでした。</h3>"
     else:
         opinion_reasons = opinion_reason_classifer(client, opinions)
         opinion_reason_text = ""
         for opinion, reason in zip(opinions, opinion_reasons):
-            opinion_reason_text += f'<p>- {opinion}<p><p>   →{reason}</p><br><hr>'
-    html_txt = f"""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 500px;"> {message}</div>"""
     opinion_reason_text = f"""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 450px;"> {opinion_reason_text}</div>"""
-    return html_txt, opinion_reason_text, fact_sentence, "None"
 if __name__ == "__main__":
     '''main'''
     with gr.Blocks(
-        title='東京大学 | 情報リテラシーサポートツール',
         theme='shivi/calm_seafoam',
         css='''
         #title_{
@@ -86,15 +103,28 @@ if __name__ == "__main__":
                 btn = gr.Button("Enter")
         with gr.Row():
-            with gr.Column(scale=1):
-                out1 = gr.HTML("""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 500px;"><span style="opacity: 0.5;">こちらに本文が表示されます。</span></div>""")
-            with gr.Column(scale=1):
-                with gr.Tab('意見についての文章'):
-                    out2 = gr.HTML()
-                with gr.Tab('本文から確実に言えること'):
-                    out3 = gr.Markdown()
-                with gr.Tab('使用されているデータの詳細'):
-                    out4 = gr.Markdown()
         with gr.Row():
             with gr.Column():
                 gr.Markdown(value='''
@@ -134,6 +164,8 @@ if __name__ == "__main__":
                 </p>
                 ''')
-        inp.submit(main, [inp], [out1, out2, out3, out4])
-        btn.click(main, [inp], [out1, out2, out3, out4])
     demo.launch(auth=(os.getenv('USER_NAME'), os.getenv('PASSWORD')))

 from bs4 import BeautifulSoup as bs
 from openai import OpenAI
+from lib import (fact_opinion_classifer, fact_summarizer, get_data_info,
+                 get_raw_data_html, get_raw_data_info,
+                 opinion_reason_classifer)
 client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
     soup = bs(response.text, 'html.parser')
     article_elems = soup.select('#uamods > .article_body > div > p')
     sentence_text = "\n".join([article_elem.text for article_elem in article_elems])
+    sentences = [line.strip() for line in re.split('。|？|\n|!', sentence_text) if line.strip() != ""]
     fact_opinion_list = fact_opinion_classifer(client, sentences)
     if len(sentences) != len(fact_opinion_list):
         raise ValueError(f'GPTの回答の数が一致しませんでした。: {fact_opinion_list}, {sentences}')
+    fact_main_message = '' + sentence_text
+    opinion_main_message = '' + sentence_text
     opinions = []
     facts = []
     for sentence, fact_opinion in zip(sentences, fact_opinion_list):
         if fact_opinion == "Opinion":
+            opinion_main_message = opinion_main_message.replace(sentence, f'<b>{sentence}</b>')
+            fact_main_message = fact_main_message.replace(sentence, f'<span style="color: gray;">{sentence}</span>')
             opinions.append(sentence)
         elif fact_opinion == "Fact":
+            fact_main_message = fact_main_message.replace(sentence, f'<b>{sentence}</b>')
+            opinion_main_message = opinion_main_message.replace(sentence, f'<span style="color: gray;">{sentence}</span>')
             facts.append(sentence)
         else:
             print(f'error: not known fact_opinion option: {fact_opinion}')
+    opinion_main_message = '<p>' + opinion_main_message.replace('\n', '</p>\n<p>') + '</p>'
+    fact_main_message = '<p>' + fact_main_message.replace('\n', '</p>\n<p>') + '</p>'
+    html_format = """<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 450px;"> {message}</div>"""
+    opinion_main_message = html_format.format(message=opinion_main_message)
+    fact_main_message = html_format.format(message=fact_main_message)
     if len(opinions) == 0:
         opinion_reason_text = "<h3>この文章には意見に関する文が見つかりませんでした。</h3>"
     else:
         opinion_reasons = opinion_reason_classifer(client, opinions)
+        all_opinion_data_html_format = "<ul>{all_data}</ul>"
+        opinion_data_html_format = """<li style="list-style: none; border-bottom:1px solid; border-top:1px solid; padding: 6px;"><div><b>{text}</b></div><div><details><summary><b>解釈</b></summary><span style="color: red;">{reason}</span></details></div></li>"""
         opinion_reason_text = ""
         for opinion, reason in zip(opinions, opinion_reasons):
+            opinion_reason_text += opinion_data_html_format.format(text=opinion, reason=reason)
+        opinion_reason_html = all_opinion_data_html_format.format(all_data=opinion_reason_text)
+    fact_sentence = fact_summarizer(client, sentence_text)
+    data_texts = get_data_info(client, facts)
+    predicted_data = get_raw_data_info(client, data_texts)
+    data_main_message, raw_data_html_txt = get_raw_data_html(sentence_text, data_texts, predicted_data)
     opinion_reason_text = f"""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 450px;"> {opinion_reason_text}</div>"""
+    return fact_main_message, opinion_main_message, opinion_reason_html, fact_sentence, raw_data_html_txt, data_main_message
 if __name__ == "__main__":
     '''main'''
     with gr.Blocks(
+        title='情報リテラシーサポートツール',
         theme='shivi/calm_seafoam',
         css='''
         #title_{
                 btn = gr.Button("Enter")
         with gr.Row():
+            with gr.Tab('事実'):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        out_fact_0 = gr.HTML("""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 450px;"><span style="opacity: 0.5;">こちらに本文が表示されます。</span></div>""")
+                    with gr.Column(scale=1):
+                        gr.Markdown("## 本文から確実に言えること")
+                        out_fact_1 = gr.Markdown("""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 400px;"><span style="opacity: 0.5;">こちらに分析内容が表示されます。</span></div>""")
+            with gr.Tab('意見'):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        out_opinion_0 = gr.HTML("""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 450px;"><span style="opacity: 0.5;">こちらに本文が表示されます。</span></div>""")
+                    with gr.Column(scale=1):
+                        gr.Markdown("## 意見文分析")
+                        out_opinion_1 = gr.HTML("""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 400px;"><span style="opacity: 0.5;">こちらに分析内容が表示されます。</span></div>""")
+            with gr.Tab('データ'):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        out_data_0 = gr.HTML("""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 450px;"><span style="opacity: 0.5;">こちらに本文が表示されます。</span></div>""")
+                    with gr.Column(scale=1):
+                        gr.Markdown("## 加工データ推測")
+                        out_data_1 = gr.HTML("""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 400px;"><span style="opacity: 0.5;">こちらに分析内容が表示されます。</span></div>""")
         with gr.Row():
             with gr.Column():
                 gr.Markdown(value='''
                 </p>
                 ''')
+        inp.submit(main, [inp], [out_fact_0, out_opinion_0, out_opinion_1,
+                                 out_fact_1, out_data_1, out_data_0])
+        btn.click(main, [inp], [out_fact_0, out_opinion_0, out_opinion_1,
+                                out_fact_1, out_data_1, out_data_0])
     demo.launch(auth=(os.getenv('USER_NAME'), os.getenv('PASSWORD')))

{util → lib}/__init__.py RENAMED Viewed

@@ -1,3 +1,6 @@
 from .fact_opinion_classifer import fact_opinion_classifer
 from .fact_summarizer import fact_summarizer
 from .opinion_reason_classifer import opinion_reason_classifer

 from .fact_opinion_classifer import fact_opinion_classifer
 from .fact_summarizer import fact_summarizer
+from .get_data_info import get_data_info
+from .get_raw_data_html import get_raw_data_html
+from .get_raw_data_info import get_raw_data_info
 from .opinion_reason_classifer import opinion_reason_classifer

lib/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (539 Bytes). View file

{util → lib}/__pycache__/fact_opinion_classifer.cpython-311.pyc RENAMED Viewed

Binary files a/util/__pycache__/fact_opinion_classifer.cpython-311.pyc and b/lib/__pycache__/fact_opinion_classifer.cpython-311.pyc differ

{util → lib}/__pycache__/fact_summarizer.cpython-311.pyc RENAMED Viewed

File without changes

lib/__pycache__/get_data_info.cpython-311.pyc ADDED Viewed

Binary file (3.71 kB). View file

lib/__pycache__/get_raw_data_html.cpython-311.pyc ADDED Viewed

Binary file (2.09 kB). View file

lib/__pycache__/get_raw_data_info.cpython-311.pyc ADDED Viewed

Binary file (2.89 kB). View file

{util → lib}/__pycache__/opinion_reason_classifer.cpython-311.pyc RENAMED Viewed

Binary files a/util/__pycache__/opinion_reason_classifer.cpython-311.pyc and b/lib/__pycache__/opinion_reason_classifer.cpython-311.pyc differ

{util → lib}/fact_opinion_classifer.py RENAMED Viewed

File without changes

{util → lib}/fact_summarizer.py RENAMED Viewed

File without changes

lib/get_data_info.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import re
+from openai import OpenAI
+REQUIREMENT = """以下の箇条書きの文章群から、数値データや表データを含む箇所をそのまま抜き出してください。数値や表が含まれていない場合、その文章については None としてください。具体的には以下の条件に従ってください。
+### 条件
+-  数値データや表データを含む箇所をそのまま抜き出す
+- 数値や表がない場合は None を返す
+- 結果を箇条書きで表示する
+- 複数データが含まれる場合はスラッシュ(/)区切りで一行に複数記す"""
+EXAMPLE_QUESTION = """1. TSRの倒産集計における業種分類は、総務省の日本産業標準分類に準拠している。
+2. 2024年1-8月の倒産は、ラーメン店が44件（前年同期比57.1％増）で、2009年からの統計では同期間で最多だった2020年の31件を大幅に上回っている。
+3. 2009年以降の倒産動向をみると、ラーメン店は2013年に29件を記録した。その後、インバウンド（訪日外国人客）の来店増などで2016年は16件まで減少した"""
+EXAMPLE_ANSWER = """1. None
+2. 2024年1-8月の倒産は、ラーメン店が44件（前年同期比57.1％増）/ 2009年からの統計では同期間で最多だった2020年の31件
+3. ラーメン店は2013年に29件を記録した/ 2016年は16件まで減少した"""
+def get_data_info(client: OpenAI, facts, model_name='gpt-4o-mini-2024-07-18'):
+    message = "\n".join([f'{i+1}. {q}' for i, q in enumerate(facts)])
+    response = client.chat.completions.create(
+        messages=[
+            {"role": "user","content": REQUIREMENT},
+            {"role": "user","content": EXAMPLE_QUESTION},
+            {"role": "assistant","content": EXAMPLE_ANSWER},
+            {"role": "user","content": message}
+        ],
+        model=model_name,
+        temperature=0,
+    )
+    res_text = response.choices[0].message.content
+    predicted_data = re.findall(r'\d{1,2}\. (.+)\n*', res_text)
+    predicted_data = [[d_.strip() for d_ in d.split('/')] for d in predicted_data]
+    predicted_data = '/'.join(['/'.join(d) for d in predicted_data if d != ['None']]).split('/')
+    return predicted_data

lib/get_raw_data_html.py ADDED Viewed

	@@ -0,0 +1,21 @@

+def get_raw_data_html(sentence_text: str, data_texts: list, predicted_data: list):
+    data_main_message = f'{sentence_text}'
+    all_raw_data_html_format = "<h3>[生データ一覧]<h4><ul>{all_data}</ul>"
+    raw_data_html_format = """<li style="list-style: none; border-bottom:1px solid; border-top:1px solid; padding: 6px;"><b>{text}<b></li>"""
+    all_processed_data_html_format = "<h3>[加工データ一覧]<h4><ul>{all_data}</ul>"
+    processed_data_html_format = """<li style="list-style: none; border-bottom:1px solid; border-top:1px solid; padding: 6px;"><div><b>{text}</b></div><div><details><summary>加工内容</summary><span style="color: red;">{reason}</span></details></div></li>"""
+    all_raw_data = ""
+    all_processed_data = ""
+    for text, data in zip(data_texts, predicted_data):
+        if data[0] == '生データ':
+            all_raw_data += raw_data_html_format.format(text=text)
+            data_main_message = data_main_message.replace(text, f'<span style="background-color: #00ff00">{text}</span>')
+        elif data[0] == '加工データ':
+            all_processed_data += processed_data_html_format.format(text=text, reason=data[1])
+            data_main_message = data_main_message.replace(text, f'<span style="background-color: #ffff00">{text}</span>')
+        else:
+            pass
+    all_raw_data_html = all_raw_data_html_format.format(all_data=all_raw_data)
+    all_processed_data_html = all_processed_data_html_format.format(all_data=all_processed_data)
+    return data_main_message, all_raw_data_html + "\n<hr>\n" + all_processed_data_html

lib/get_raw_data_info.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import re
+from openai import OpenAI
+REQUIREMENT_ = """以下の箇条書きのデータ群を分析し、「生データ」か「加工データ」かを推定してください。加工データである場合、スラッシュ(/)区切りで加工過程の説明も追記してください。
+生データ: 収集されたままの加工されていないデータ
+加工データ: 処理・変換され理解しやすくされたデータ"""
+EXAMPLE_QUESTION_ = """1. 2024年1-8月の倒産は、ラーメン店が44件（前年同期比57.1％増）
+2. ラーメン店は2013年に29件を記録した
+3. 2009年からの統計では同期間で最多だった2020年の31件"""
+EXAMPLE_ANSWER_ = """1. 加工データ / 前年との比較を割合で示すことで理解しやすくされています。
+2. 生データ
+3. 加工データ / 過去のデータと比べて数が最も多かったことを示すことで数値の理解を助けています。"""
+def get_raw_data_info(client: OpenAI, data_texts, model_name='gpt-4o-mini-2024-07-18'):
+    message = "\n".join([f'{i+1}. {q}' for i, q in enumerate(data_texts)])
+    response = client.chat.completions.create(
+        messages=[
+            {"role": "user","content": REQUIREMENT_},
+            {"role": "user","content": EXAMPLE_QUESTION_},
+            {"role": "assistant","content": EXAMPLE_ANSWER_},
+            {"role": "user","content": message}
+        ],
+        model=model_name,
+        temperature=0,
+    )
+    res_text = response.choices[0].message.content
+    predicted_data = re.findall(r'\d{1,2}\. (.+)\n*', res_text)
+    predicted_data = [[d_.strip() for d_ in d.split('/')] for d in predicted_data]
+    return predicted_data

{util → lib}/opinion_reason_classifer.py RENAMED Viewed

File without changes

util/__pycache__/__init__.cpython-311.pyc DELETED Viewed

Binary file (376 Bytes)