Spaces:
Sleeping
Sleeping
File size: 4,571 Bytes
0583214 9d0bbec 0583214 9d0bbec 0583214 9d0bbec 0583214 2ce0e0f 0583214 9d0bbec 0583214 9d0bbec 0583214 2ce0e0f 0583214 9d0bbec 0583214 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
from io import StringIO
import itertools
import gradio as gr
import pandas as pd
import spacy
nlp = spacy.load('en_core_web_sm')
HTML_RED = '<span style="background-color: rgba(255, 0, 0, 0.2)">{t}</span>'
HTML_GRN = '<span style="background-color: rgba(0, 255, 0, 0.3)">{t}</span>'
HTML_YLW = '<span style="background-color: rgba(255, 255, 0, 0.3)">{t}</span>'
HTML_BLU = '<span style="background-color: rgba(0, 0, 255, 0.2)">{t}</span>'
HTML_PLN = '<span>{t}</span>'
TABLE_CSS = '''
th, td {
padding: 4px;
}
table, th, td {
border: 1px solid black;
border-collapse: collapse;
}
'''
def colorize(file_obj):
with open(file_obj.name, 'r') as f:
raw = f.read()
raw = raw[raw.find('example_id'):]
data = pd.read_csv(StringIO(raw))
table_content = []
for row in data.iterrows():
id_ = row[1]['example_id']
gold, genA, genB = nlp.pipe((
row[1]['target summary'],
row[1]['model summary A'],
row[1]['model summary B']
))
tokens_gold = {token.lemma_.lower(): 0 for token in gold}
for token in itertools.chain(genA, genB):
if token.lemma_.lower() in tokens_gold:
tokens_gold[token.lemma_.lower()] += 1
gold_text = ''.join([
(
HTML_PLN.format(t=token.text)
if token.pos_ not in {'NOUN', 'PROPN', 'VERB'}
else (
(
HTML_BLU if tokens_gold[token.lemma_.lower()] > 0
else HTML_YLW
).format(t=token.text)
)
) + token.whitespace_
for token in gold
])
table_content.append(
[id_, gold_text] +
[
''.join(
(
HTML_PLN.format(t=token.text)
if token.pos_ not in {'NOUN', 'PROPN', 'VERB'}
else (
HTML_GRN.format(t=token.text)
if token.lemma_.lower() in tokens_gold
else HTML_RED.format(t=token.text)
)
) + token.whitespace_
for token in gen
)
for gen in (genA, genB)
]
)
# return an HTML table using data in table_content
return '\n'.join((
'<table>',
"<tr>"
"<td><b>id</b></td>",
"<td><b>Gold</b></td>",
"<td><b>Model A</b></td>",
"<td><b>Model B</b></td>",
"</tr>",
'\n'.join(
'<tr>\n' +
'\n'.join('<td>{}</td>'.format(cell) for cell in row) +
'\n</tr>'
for row in table_content
),
'</table>'
))
def main():
with gr.Blocks(css=TABLE_CSS) as demo:
gr.Markdown(
"After uploading, click Run and switch to the Visualization tab."
)
with gr.Tabs():
with gr.TabItem("Upload"):
data = gr.File(
label='upload csv with Annotations', type='file'
)
run = gr.Button(label='Run')
with gr.TabItem("Visualization"):
gr.HTML(
''.join(
(
"<b>Explanation of colors:</b>",
"<br><ul>",
"<li><b>",
HTML_RED.format(t='Red'),
"</b>: word is in generated, but not in gold.</li>",
"<li><b>",
HTML_GRN.format(t='Green'),
"</b>: word is in generated summary and gold.</li>",
"<li><b>",
HTML_YLW.format(t='Yellow'),
"</b>: word is in gold, but not in generated.</li>",
"<li><b>",
HTML_BLU.format(t='Blue'),
"</b>: word is in gold and in generated.</li>",
"</ul>",
"<br>",
"<b>Important</b>: Only nouns, verbs and proper ",
"nouns are colored.</b>"
)
)
)
viz = gr.HTML(label='Upload a csv file to start.')
run.click(colorize, data, viz)
demo.launch()
if __name__ == '__main__':
main()
|