fukufuk commited on
Commit
318adba
1 Parent(s): 0398d77

Update application file

Browse files
app.py CHANGED
@@ -6,8 +6,9 @@ import requests
6
  from bs4 import BeautifulSoup as bs
7
  from openai import OpenAI
8
 
9
- from util import (fact_opinion_classifer, fact_summarizer,
10
- opinion_reason_classifer)
 
11
 
12
  client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
13
 
@@ -17,46 +18,62 @@ def main(url):
17
  soup = bs(response.text, 'html.parser')
18
  article_elems = soup.select('#uamods > .article_body > div > p')
19
  sentence_text = "\n".join([article_elem.text for article_elem in article_elems])
20
- message = '' + sentence_text
21
- sentences = [line.strip() for line in re.split('。|?|\n|!', message) if line.strip() != ""]
22
 
23
  fact_opinion_list = fact_opinion_classifer(client, sentences)
24
 
25
  if len(sentences) != len(fact_opinion_list):
26
  raise ValueError(f'GPTの回答の数が一致しませんでした。: {fact_opinion_list}, {sentences}')
 
 
 
27
 
28
  opinions = []
29
  facts = []
30
  for sentence, fact_opinion in zip(sentences, fact_opinion_list):
31
  if fact_opinion == "Opinion":
32
- message = message.replace(sentence, f'<span style="opacity:0.6;">{sentence}</span>')
 
33
  opinions.append(sentence)
34
  elif fact_opinion == "Fact":
 
 
35
  facts.append(sentence)
36
  else:
37
  print(f'error: not known fact_opinion option: {fact_opinion}')
38
 
39
- message = '<p>' + message.replace('\n', '</p>\n<p>') + '</p>'
 
40
 
41
- fact_sentence = fact_summarizer(client, sentence_text)
 
 
42
 
43
  if len(opinions) == 0:
44
  opinion_reason_text = "<h3>この文章には意見に関する文が見つかりませんでした。</h3>"
45
  else:
46
  opinion_reasons = opinion_reason_classifer(client, opinions)
 
 
47
  opinion_reason_text = ""
48
  for opinion, reason in zip(opinions, opinion_reasons):
49
- opinion_reason_text += f'<p>- {opinion}<p><p> →{reason}</p><br><hr>'
 
 
 
 
 
 
 
50
 
51
- html_txt = f"""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 500px;"> {message}</div>"""
52
  opinion_reason_text = f"""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 450px;"> {opinion_reason_text}</div>"""
53
- return html_txt, opinion_reason_text, fact_sentence, "None"
54
 
55
 
56
  if __name__ == "__main__":
57
  '''main'''
58
  with gr.Blocks(
59
- title='東京大学 | 情報リテラシーサポートツール',
60
  theme='shivi/calm_seafoam',
61
  css='''
62
  #title_{
@@ -86,15 +103,28 @@ if __name__ == "__main__":
86
 
87
  btn = gr.Button("Enter")
88
  with gr.Row():
89
- with gr.Column(scale=1):
90
- out1 = gr.HTML("""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 500px;"><span style="opacity: 0.5;">こちらに本文が表示されます。</span></div>""")
91
- with gr.Column(scale=1):
92
- with gr.Tab('意見についての文章'):
93
- out2 = gr.HTML()
94
- with gr.Tab('本文から確実に言えること'):
95
- out3 = gr.Markdown()
96
- with gr.Tab('使用されているデータの詳細'):
97
- out4 = gr.Markdown()
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  with gr.Row():
99
  with gr.Column():
100
  gr.Markdown(value='''
@@ -134,6 +164,8 @@ if __name__ == "__main__":
134
  </p>
135
  ''')
136
 
137
- inp.submit(main, [inp], [out1, out2, out3, out4])
138
- btn.click(main, [inp], [out1, out2, out3, out4])
 
 
139
  demo.launch(auth=(os.getenv('USER_NAME'), os.getenv('PASSWORD')))
 
6
  from bs4 import BeautifulSoup as bs
7
  from openai import OpenAI
8
 
9
+ from lib import (fact_opinion_classifer, fact_summarizer, get_data_info,
10
+ get_raw_data_html, get_raw_data_info,
11
+ opinion_reason_classifer)
12
 
13
  client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
14
 
 
18
  soup = bs(response.text, 'html.parser')
19
  article_elems = soup.select('#uamods > .article_body > div > p')
20
  sentence_text = "\n".join([article_elem.text for article_elem in article_elems])
21
+ sentences = [line.strip() for line in re.split('。|?|\n|!', sentence_text) if line.strip() != ""]
 
22
 
23
  fact_opinion_list = fact_opinion_classifer(client, sentences)
24
 
25
  if len(sentences) != len(fact_opinion_list):
26
  raise ValueError(f'GPTの回答の数が一致しませんでした。: {fact_opinion_list}, {sentences}')
27
+
28
+ fact_main_message = '' + sentence_text
29
+ opinion_main_message = '' + sentence_text
30
 
31
  opinions = []
32
  facts = []
33
  for sentence, fact_opinion in zip(sentences, fact_opinion_list):
34
  if fact_opinion == "Opinion":
35
+ opinion_main_message = opinion_main_message.replace(sentence, f'<b>{sentence}</b>')
36
+ fact_main_message = fact_main_message.replace(sentence, f'<span style="color: gray;">{sentence}</span>')
37
  opinions.append(sentence)
38
  elif fact_opinion == "Fact":
39
+ fact_main_message = fact_main_message.replace(sentence, f'<b>{sentence}</b>')
40
+ opinion_main_message = opinion_main_message.replace(sentence, f'<span style="color: gray;">{sentence}</span>')
41
  facts.append(sentence)
42
  else:
43
  print(f'error: not known fact_opinion option: {fact_opinion}')
44
 
45
+ opinion_main_message = '<p>' + opinion_main_message.replace('\n', '</p>\n<p>') + '</p>'
46
+ fact_main_message = '<p>' + fact_main_message.replace('\n', '</p>\n<p>') + '</p>'
47
 
48
+ html_format = """<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 450px;"> {message}</div>"""
49
+ opinion_main_message = html_format.format(message=opinion_main_message)
50
+ fact_main_message = html_format.format(message=fact_main_message)
51
 
52
  if len(opinions) == 0:
53
  opinion_reason_text = "<h3>この文章には意見に関する文が見つかりませんでした。</h3>"
54
  else:
55
  opinion_reasons = opinion_reason_classifer(client, opinions)
56
+ all_opinion_data_html_format = "<ul>{all_data}</ul>"
57
+ opinion_data_html_format = """<li style="list-style: none; border-bottom:1px solid; border-top:1px solid; padding: 6px;"><div><b>{text}</b></div><div><details><summary><b>解釈</b></summary><span style="color: red;">{reason}</span></details></div></li>"""
58
  opinion_reason_text = ""
59
  for opinion, reason in zip(opinions, opinion_reasons):
60
+ opinion_reason_text += opinion_data_html_format.format(text=opinion, reason=reason)
61
+ opinion_reason_html = all_opinion_data_html_format.format(all_data=opinion_reason_text)
62
+
63
+ fact_sentence = fact_summarizer(client, sentence_text)
64
+
65
+ data_texts = get_data_info(client, facts)
66
+ predicted_data = get_raw_data_info(client, data_texts)
67
+ data_main_message, raw_data_html_txt = get_raw_data_html(sentence_text, data_texts, predicted_data)
68
 
 
69
  opinion_reason_text = f"""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 450px;"> {opinion_reason_text}</div>"""
70
+ return fact_main_message, opinion_main_message, opinion_reason_html, fact_sentence, raw_data_html_txt, data_main_message
71
 
72
 
73
  if __name__ == "__main__":
74
  '''main'''
75
  with gr.Blocks(
76
+ title='情報リテラシーサポートツール',
77
  theme='shivi/calm_seafoam',
78
  css='''
79
  #title_{
 
103
 
104
  btn = gr.Button("Enter")
105
  with gr.Row():
106
+ with gr.Tab('事実'):
107
+ with gr.Row():
108
+ with gr.Column(scale=1):
109
+ out_fact_0 = gr.HTML("""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 450px;"><span style="opacity: 0.5;">こちらに本文が表示されます。</span></div>""")
110
+ with gr.Column(scale=1):
111
+ gr.Markdown("## 本文から確実に言えること")
112
+ out_fact_1 = gr.Markdown("""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 400px;"><span style="opacity: 0.5;">こちらに分析内容が表示されます。</span></div>""")
113
+ with gr.Tab('意見'):
114
+ with gr.Row():
115
+ with gr.Column(scale=1):
116
+ out_opinion_0 = gr.HTML("""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 450px;"><span style="opacity: 0.5;">こちらに本文が表示されます。</span></div>""")
117
+ with gr.Column(scale=1):
118
+ gr.Markdown("## 意見文分析")
119
+ out_opinion_1 = gr.HTML("""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 400px;"><span style="opacity: 0.5;">こちらに分析内容が表示されます。</span></div>""")
120
+ with gr.Tab('データ'):
121
+ with gr.Row():
122
+ with gr.Column(scale=1):
123
+ out_data_0 = gr.HTML("""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 450px;"><span style="opacity: 0.5;">こちらに本文が表示されます。</span></div>""")
124
+ with gr.Column(scale=1):
125
+ gr.Markdown("## 加工データ推測")
126
+ out_data_1 = gr.HTML("""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 400px;"><span style="opacity: 0.5;">こちらに分析内容が表示されます。</span></div>""")
127
+
128
  with gr.Row():
129
  with gr.Column():
130
  gr.Markdown(value='''
 
164
  </p>
165
  ''')
166
 
167
+ inp.submit(main, [inp], [out_fact_0, out_opinion_0, out_opinion_1,
168
+ out_fact_1, out_data_1, out_data_0])
169
+ btn.click(main, [inp], [out_fact_0, out_opinion_0, out_opinion_1,
170
+ out_fact_1, out_data_1, out_data_0])
171
  demo.launch(auth=(os.getenv('USER_NAME'), os.getenv('PASSWORD')))
{util → lib}/__init__.py RENAMED
@@ -1,3 +1,6 @@
1
  from .fact_opinion_classifer import fact_opinion_classifer
2
  from .fact_summarizer import fact_summarizer
 
 
 
3
  from .opinion_reason_classifer import opinion_reason_classifer
 
1
  from .fact_opinion_classifer import fact_opinion_classifer
2
  from .fact_summarizer import fact_summarizer
3
+ from .get_data_info import get_data_info
4
+ from .get_raw_data_html import get_raw_data_html
5
+ from .get_raw_data_info import get_raw_data_info
6
  from .opinion_reason_classifer import opinion_reason_classifer
lib/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (539 Bytes). View file
 
{util → lib}/__pycache__/fact_opinion_classifer.cpython-311.pyc RENAMED
Binary files a/util/__pycache__/fact_opinion_classifer.cpython-311.pyc and b/lib/__pycache__/fact_opinion_classifer.cpython-311.pyc differ
 
{util → lib}/__pycache__/fact_summarizer.cpython-311.pyc RENAMED
File without changes
lib/__pycache__/get_data_info.cpython-311.pyc ADDED
Binary file (3.71 kB). View file
 
lib/__pycache__/get_raw_data_html.cpython-311.pyc ADDED
Binary file (2.09 kB). View file
 
lib/__pycache__/get_raw_data_info.cpython-311.pyc ADDED
Binary file (2.89 kB). View file
 
{util → lib}/__pycache__/opinion_reason_classifer.cpython-311.pyc RENAMED
Binary files a/util/__pycache__/opinion_reason_classifer.cpython-311.pyc and b/lib/__pycache__/opinion_reason_classifer.cpython-311.pyc differ
 
{util → lib}/fact_opinion_classifer.py RENAMED
File without changes
{util → lib}/fact_summarizer.py RENAMED
File without changes
lib/get_data_info.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ from openai import OpenAI
4
+
5
+ REQUIREMENT = """以下の箇条書きの文章群から、数値データや表データを含む箇所をそのまま抜き出してください。数値や表が含まれていない場合、その文章については None としてください。具体的には以下の条件に従ってください。
6
+
7
+ ### 条件
8
+ - 数値データや表データを含む箇所をそのまま抜き出す
9
+ - 数値や表がない場合は None を返す
10
+ - 結果を箇条書きで表示する
11
+ - 複数データが含まれる場合はスラッシュ(/)区切りで一行に複数記す"""
12
+
13
+ EXAMPLE_QUESTION = """1. TSRの倒産集計における業種分類は、総務省の日本産業標準分類に準拠している。
14
+ 2. 2024年1-8月の倒産は、ラーメン店が44件(前年同期比57.1%増)で、2009年からの統計では同期間で最多だった2020年の31件を大幅に上回っている。
15
+ 3. 2009年以降の倒産動向をみると、ラーメン店は2013年に29件を記録した。その後、インバウンド(訪日外国人客)の来店増などで2016年は16件まで減少した"""
16
+
17
+ EXAMPLE_ANSWER = """1. None
18
+ 2. 2024年1-8月の倒産は、ラーメン店が44件(前年同期比57.1%増)/ 2009年からの統計では同期間で最多だった2020年の31件
19
+ 3. ラーメン店は2013年に29件を記録した/ 2016年は16件まで減少した"""
20
+
21
+
22
+ def get_data_info(client: OpenAI, facts, model_name='gpt-4o-mini-2024-07-18'):
23
+ message = "\n".join([f'{i+1}. {q}' for i, q in enumerate(facts)])
24
+ response = client.chat.completions.create(
25
+ messages=[
26
+ {"role": "user","content": REQUIREMENT},
27
+ {"role": "user","content": EXAMPLE_QUESTION},
28
+ {"role": "assistant","content": EXAMPLE_ANSWER},
29
+ {"role": "user","content": message}
30
+ ],
31
+ model=model_name,
32
+ temperature=0,
33
+ )
34
+ res_text = response.choices[0].message.content
35
+ predicted_data = re.findall(r'\d{1,2}\. (.+)\n*', res_text)
36
+ predicted_data = [[d_.strip() for d_ in d.split('/')] for d in predicted_data]
37
+ predicted_data = '/'.join(['/'.join(d) for d in predicted_data if d != ['None']]).split('/')
38
+ return predicted_data
lib/get_raw_data_html.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def get_raw_data_html(sentence_text: str, data_texts: list, predicted_data: list):
2
+ data_main_message = f'{sentence_text}'
3
+ all_raw_data_html_format = "<h3>[生データ一覧]<h4><ul>{all_data}</ul>"
4
+ raw_data_html_format = """<li style="list-style: none; border-bottom:1px solid; border-top:1px solid; padding: 6px;"><b>{text}<b></li>"""
5
+ all_processed_data_html_format = "<h3>[加工データ一覧]<h4><ul>{all_data}</ul>"
6
+ processed_data_html_format = """<li style="list-style: none; border-bottom:1px solid; border-top:1px solid; padding: 6px;"><div><b>{text}</b></div><div><details><summary>加工内容</summary><span style="color: red;">{reason}</span></details></div></li>"""
7
+
8
+ all_raw_data = ""
9
+ all_processed_data = ""
10
+ for text, data in zip(data_texts, predicted_data):
11
+ if data[0] == '生データ':
12
+ all_raw_data += raw_data_html_format.format(text=text)
13
+ data_main_message = data_main_message.replace(text, f'<span style="background-color: #00ff00">{text}</span>')
14
+ elif data[0] == '加工データ':
15
+ all_processed_data += processed_data_html_format.format(text=text, reason=data[1])
16
+ data_main_message = data_main_message.replace(text, f'<span style="background-color: #ffff00">{text}</span>')
17
+ else:
18
+ pass
19
+ all_raw_data_html = all_raw_data_html_format.format(all_data=all_raw_data)
20
+ all_processed_data_html = all_processed_data_html_format.format(all_data=all_processed_data)
21
+ return data_main_message, all_raw_data_html + "\n<hr>\n" + all_processed_data_html
lib/get_raw_data_info.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ from openai import OpenAI
4
+
5
+ REQUIREMENT_ = """以下の箇条書きのデータ群を分析し、「生データ」か「加工データ」かを推定してください。加工データである場合、スラッシュ(/)区切りで加工過程の説明も追記してください。
6
+ 生データ: 収集されたままの加工されていないデータ
7
+ 加工データ: 処理・変換され理解しやすくされたデータ"""
8
+
9
+ EXAMPLE_QUESTION_ = """1. 2024年1-8月の倒産は、ラーメン店が44件(前年同期比57.1%増)
10
+ 2. ラーメン店は2013年に29件を記録した
11
+ 3. 2009年からの統計では同期間で最多だった2020年の31件"""
12
+
13
+ EXAMPLE_ANSWER_ = """1. 加工データ / 前年との比較を割合で示すことで理解しやすくされています。
14
+ 2. 生データ
15
+ 3. 加工データ / 過去のデータと比べて数が最も多かったことを示すことで数値の理解を助けています。"""
16
+
17
+
18
+ def get_raw_data_info(client: OpenAI, data_texts, model_name='gpt-4o-mini-2024-07-18'):
19
+ message = "\n".join([f'{i+1}. {q}' for i, q in enumerate(data_texts)])
20
+ response = client.chat.completions.create(
21
+ messages=[
22
+ {"role": "user","content": REQUIREMENT_},
23
+ {"role": "user","content": EXAMPLE_QUESTION_},
24
+ {"role": "assistant","content": EXAMPLE_ANSWER_},
25
+ {"role": "user","content": message}
26
+ ],
27
+ model=model_name,
28
+ temperature=0,
29
+ )
30
+ res_text = response.choices[0].message.content
31
+ predicted_data = re.findall(r'\d{1,2}\. (.+)\n*', res_text)
32
+ predicted_data = [[d_.strip() for d_ in d.split('/')] for d in predicted_data]
33
+ return predicted_data
{util → lib}/opinion_reason_classifer.py RENAMED
File without changes
util/__pycache__/__init__.cpython-311.pyc DELETED
Binary file (376 Bytes)