fukufuk
commited on
Commit
•
0398d77
1
Parent(s):
755c307
Add application file
Browse files- .gitignore +1 -0
- app.py +139 -0
- requirements.txt +3 -0
- util/__init__.py +3 -0
- util/__pycache__/__init__.cpython-311.pyc +0 -0
- util/__pycache__/fact_opinion_classifer.cpython-311.pyc +0 -0
- util/__pycache__/fact_summarizer.cpython-311.pyc +0 -0
- util/__pycache__/opinion_reason_classifer.cpython-311.pyc +0 -0
- util/fact_opinion_classifer.py +44 -0
- util/fact_summarizer.py +24 -0
- util/opinion_reason_classifer.py +41 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
venv/
|
app.py
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
|
4 |
+
import gradio as gr
|
5 |
+
import requests
|
6 |
+
from bs4 import BeautifulSoup as bs
|
7 |
+
from openai import OpenAI
|
8 |
+
|
9 |
+
from util import (fact_opinion_classifer, fact_summarizer,
|
10 |
+
opinion_reason_classifer)
|
11 |
+
|
12 |
+
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
|
13 |
+
|
14 |
+
|
15 |
+
def main(url):
|
16 |
+
response = requests.get(url)
|
17 |
+
soup = bs(response.text, 'html.parser')
|
18 |
+
article_elems = soup.select('#uamods > .article_body > div > p')
|
19 |
+
sentence_text = "\n".join([article_elem.text for article_elem in article_elems])
|
20 |
+
message = '' + sentence_text
|
21 |
+
sentences = [line.strip() for line in re.split('。|?|\n|!', message) if line.strip() != ""]
|
22 |
+
|
23 |
+
fact_opinion_list = fact_opinion_classifer(client, sentences)
|
24 |
+
|
25 |
+
if len(sentences) != len(fact_opinion_list):
|
26 |
+
raise ValueError(f'GPTの回答の数が一致しませんでした。: {fact_opinion_list}, {sentences}')
|
27 |
+
|
28 |
+
opinions = []
|
29 |
+
facts = []
|
30 |
+
for sentence, fact_opinion in zip(sentences, fact_opinion_list):
|
31 |
+
if fact_opinion == "Opinion":
|
32 |
+
message = message.replace(sentence, f'<span style="opacity:0.6;">{sentence}</span>')
|
33 |
+
opinions.append(sentence)
|
34 |
+
elif fact_opinion == "Fact":
|
35 |
+
facts.append(sentence)
|
36 |
+
else:
|
37 |
+
print(f'error: not known fact_opinion option: {fact_opinion}')
|
38 |
+
|
39 |
+
message = '<p>' + message.replace('\n', '</p>\n<p>') + '</p>'
|
40 |
+
|
41 |
+
fact_sentence = fact_summarizer(client, sentence_text)
|
42 |
+
|
43 |
+
if len(opinions) == 0:
|
44 |
+
opinion_reason_text = "<h3>この文章には意見に関する文が見つかりませんでした。</h3>"
|
45 |
+
else:
|
46 |
+
opinion_reasons = opinion_reason_classifer(client, opinions)
|
47 |
+
opinion_reason_text = ""
|
48 |
+
for opinion, reason in zip(opinions, opinion_reasons):
|
49 |
+
opinion_reason_text += f'<p>- {opinion}<p><p> →{reason}</p><br><hr>'
|
50 |
+
|
51 |
+
html_txt = f"""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 500px;"> {message}</div>"""
|
52 |
+
opinion_reason_text = f"""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 450px;"> {opinion_reason_text}</div>"""
|
53 |
+
return html_txt, opinion_reason_text, fact_sentence, "None"
|
54 |
+
|
55 |
+
|
56 |
+
if __name__ == "__main__":
|
57 |
+
'''main'''
|
58 |
+
with gr.Blocks(
|
59 |
+
title='東京大学 | 情報リテラシーサポートツール',
|
60 |
+
theme='shivi/calm_seafoam',
|
61 |
+
css='''
|
62 |
+
#title_{
|
63 |
+
text-align: center;
|
64 |
+
}
|
65 |
+
#lightblue_img{
|
66 |
+
display: inline-block;
|
67 |
+
width:15%;
|
68 |
+
}
|
69 |
+
#lightblue_img img{
|
70 |
+
width:100%;
|
71 |
+
}
|
72 |
+
@media screen and (max-width:480px) {
|
73 |
+
#lightblue_img{
|
74 |
+
width:50%;
|
75 |
+
}
|
76 |
+
}
|
77 |
+
'''
|
78 |
+
) as demo:
|
79 |
+
with gr.Row():
|
80 |
+
with gr.Column():
|
81 |
+
gr.HTML(value='''
|
82 |
+
<h2 id="title_">情報リテラシーサポートツール</h2>
|
83 |
+
''')
|
84 |
+
gr.Markdown('### ※意見文の抽出精度は <u>93.3%</u> です。誤っている可能性には十分注意してください。')
|
85 |
+
inp = gr.Textbox(label='',placeholder="こちらに記事のURLをコピペしてください。")
|
86 |
+
|
87 |
+
btn = gr.Button("Enter")
|
88 |
+
with gr.Row():
|
89 |
+
with gr.Column(scale=1):
|
90 |
+
out1 = gr.HTML("""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 500px;"><span style="opacity: 0.5;">こちらに本文が表示されます。</span></div>""")
|
91 |
+
with gr.Column(scale=1):
|
92 |
+
with gr.Tab('意見についての文章'):
|
93 |
+
out2 = gr.HTML()
|
94 |
+
with gr.Tab('本文から確実に言えること'):
|
95 |
+
out3 = gr.Markdown()
|
96 |
+
with gr.Tab('使用されているデータの詳細'):
|
97 |
+
out4 = gr.Markdown()
|
98 |
+
with gr.Row():
|
99 |
+
with gr.Column():
|
100 |
+
gr.Markdown(value='''
|
101 |
+
⚠︎実行には10~30秒ほどかかります。ご了承ください。
|
102 |
+
***
|
103 |
+
''')
|
104 |
+
|
105 |
+
description_jp = gr.Markdown(value='''
|
106 |
+
## ニュース記事を読むにあたって総合的な情報リテラシーの不足のサポートを行うツールです
|
107 |
+
|
108 |
+
### ※情報リテラシーとは
|
109 |
+
1. 加工されていない生のデータが何か分かる
|
110 |
+
2. 事実と意見の区別がつく
|
111 |
+
3. 文章中から確実に言えることが何か分かる
|
112 |
+
|
113 |
+
***
|
114 |
+
''')
|
115 |
+
|
116 |
+
gr.HTML(value='''
|
117 |
+
<h1>本デモについて</h1>
|
118 |
+
<fieldset style="border: 1px dashed #000000; padding: 10px;">
|
119 |
+
<legend><h2>作成意図</h2></legend>
|
120 |
+
<p>偽情報や誤情報といった情報は通常の20倍もの速度で拡散されるといいます。このとき拡散する人はどうやら情報リテラシーが低い人が多いそうです。</p>
|
121 |
+
<p>情報リ��ラシーの改善には教育が急務ですがなかなか教材だけでは追いつかないことがあるかと思います。</p>
|
122 |
+
<p>本デモがそういった皆様の情報リテラシー向上の一助となれば幸いです。</p>
|
123 |
+
</fieldset>
|
124 |
+
|
125 |
+
|
126 |
+
|
127 |
+
<h2>作成者</h2>
|
128 |
+
<img src="https://tegakisozai.com/wp-content/uploads/2021/04/doubutu_penguin.png" width=150px>
|
129 |
+
東京大学工学部システム創成学科4年
|
130 |
+
<p>堀川祐生</p>
|
131 |
+
<p>
|
132 |
+
🔗
|
133 |
+
<a href= "https://www.linkedin.com/in/祐生-堀川-a0a7a328b/" >LinkedIn</a>
|
134 |
+
</p>
|
135 |
+
''')
|
136 |
+
|
137 |
+
inp.submit(main, [inp], [out1, out2, out3, out4])
|
138 |
+
btn.click(main, [inp], [out1, out2, out3, out4])
|
139 |
+
demo.launch(auth=(os.getenv('USER_NAME'), os.getenv('PASSWORD')))
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
openai==1.50.2
|
2 |
+
gradio==4.44.0
|
3 |
+
beautifulsoup4==4.12.3
|
util/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .fact_opinion_classifer import fact_opinion_classifer
|
2 |
+
from .fact_summarizer import fact_summarizer
|
3 |
+
from .opinion_reason_classifer import opinion_reason_classifer
|
util/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (376 Bytes). View file
|
|
util/__pycache__/fact_opinion_classifer.cpython-311.pyc
ADDED
Binary file (2.69 kB). View file
|
|
util/__pycache__/fact_summarizer.cpython-311.pyc
ADDED
Binary file (1.2 kB). View file
|
|
util/__pycache__/opinion_reason_classifer.cpython-311.pyc
ADDED
Binary file (2.7 kB). View file
|
|
util/fact_opinion_classifer.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import OpenAI
|
2 |
+
|
3 |
+
MESSAGE_FORMAT = """あなたは文章を「事実(Fact))」か「意見(Opinion)」かを判定する熟練の判定者です。以下に箇条書きで与える文章がそれぞれ事実か意見かを判定してください。なお回答は以下の例の形式で行なってください。
|
4 |
+
### 回答形式例
|
5 |
+
- Fact
|
6 |
+
- Fact
|
7 |
+
- Opinion
|
8 |
+
|
9 |
+
### 文章
|
10 |
+
- {}
|
11 |
+
"""
|
12 |
+
|
13 |
+
|
14 |
+
def fact_opinion_classifer(client: OpenAI, questions, chunk_size=5, model_name='gpt-4o-mini-2024-07-18'):
|
15 |
+
chunk_num = len(questions) // chunk_size
|
16 |
+
if len(questions) % chunk_size != 0:
|
17 |
+
chunk_num += 1
|
18 |
+
chunked_questions = [[] for i in range(chunk_num)]
|
19 |
+
|
20 |
+
for i in range(len(questions)):
|
21 |
+
chunked_questions[i // chunk_size].append(questions[i])
|
22 |
+
|
23 |
+
responses = []
|
24 |
+
for chunk in chunked_questions:
|
25 |
+
# print('chunk_size', len(chunk))
|
26 |
+
message = MESSAGE_FORMAT.format('\n- '.join(chunk))
|
27 |
+
response = client.chat.completions.create(
|
28 |
+
messages=[
|
29 |
+
{
|
30 |
+
"role": "user",
|
31 |
+
"content": message
|
32 |
+
}
|
33 |
+
],
|
34 |
+
model=model_name,
|
35 |
+
temperature=0
|
36 |
+
)
|
37 |
+
responses.append(response.choices[0].message.content)
|
38 |
+
|
39 |
+
gpt_answers = []
|
40 |
+
for gpt_answers_text in responses:
|
41 |
+
# print(len(gpt_answers_text.split('\n')))
|
42 |
+
gpt_answers.extend([ans[2:] for ans in gpt_answers_text.split('\n')])
|
43 |
+
|
44 |
+
return gpt_answers
|
util/fact_summarizer.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import OpenAI
|
2 |
+
|
3 |
+
FACT_MESSAGE_FORMAT = """あなたはニュース記事に含まれる意見に惑わされず「事実のみ」をまとめる読解サポーターです。以下の文章をもとに確実に言えることを5つ箇条書きで答えてください。
|
4 |
+
|
5 |
+
### 文章
|
6 |
+
{}
|
7 |
+
|
8 |
+
### 回答"""
|
9 |
+
|
10 |
+
|
11 |
+
def fact_summarizer(client: OpenAI, sentence: str, model_name='gpt-4o-mini-2024-07-18'):
|
12 |
+
message = FACT_MESSAGE_FORMAT.format(sentence)
|
13 |
+
response = client.chat.completions.create(
|
14 |
+
messages=[
|
15 |
+
{
|
16 |
+
"role": "user",
|
17 |
+
"content": message
|
18 |
+
}
|
19 |
+
],
|
20 |
+
model=model_name,
|
21 |
+
temperature=0
|
22 |
+
)
|
23 |
+
|
24 |
+
return response.choices[0].message.content
|
util/opinion_reason_classifer.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import OpenAI
|
2 |
+
|
3 |
+
REASON_MESSAGE_FORMAT = """あなたは意見に関する文章をどの箇所から意見と読み取れるかを判定する熟練の判定者です。以下に箇条書きで与える文章が事実ではなく意見と判断できる理由をそれぞれ簡潔に30字以内で記述してください
|
4 |
+
|
5 |
+
### 文章
|
6 |
+
- {}
|
7 |
+
|
8 |
+
### 意見と読み取れる理由"""
|
9 |
+
|
10 |
+
|
11 |
+
def opinion_reason_classifer(client: OpenAI, questions, chunk_size=5, model_name='gpt-4o-mini-2024-07-18'):
|
12 |
+
chunk_num = len(questions) // chunk_size
|
13 |
+
if len(questions) % chunk_size != 0:
|
14 |
+
chunk_num += 1
|
15 |
+
chunked_questions = [[] for i in range(chunk_num)]
|
16 |
+
|
17 |
+
for i in range(len(questions)):
|
18 |
+
chunked_questions[i//chunk_size].append(questions[i])
|
19 |
+
|
20 |
+
responses = []
|
21 |
+
for chunk in chunked_questions:
|
22 |
+
# print('chunk_size', len(chunk))
|
23 |
+
message = REASON_MESSAGE_FORMAT.format('\n- '.join(chunk))
|
24 |
+
response = client.chat.completions.create(
|
25 |
+
messages=[
|
26 |
+
{
|
27 |
+
"role": "user",
|
28 |
+
"content": message
|
29 |
+
}
|
30 |
+
],
|
31 |
+
model=model_name,
|
32 |
+
temperature=0
|
33 |
+
)
|
34 |
+
responses.append(response.choices[0].message.content)
|
35 |
+
|
36 |
+
gpt_answers = []
|
37 |
+
for gpt_answers_text in responses:
|
38 |
+
# print(len(gpt_answers_text.split('\n')))
|
39 |
+
gpt_answers.extend([ans[2:] for ans in gpt_answers_text.split('\n')])
|
40 |
+
|
41 |
+
return gpt_answers
|