File size: 2,718 Bytes
0583214
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ce0e0f
0583214
 
 
 
 
 
 
2ce0e0f
0583214
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ce0e0f
0583214
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from io import StringIO

import gradio as gr
import pandas as pd
import spacy


nlp = spacy.load('en_core_web_sm')

HTML_RED = '<span style="background-color: rgba(255, 0, 0, 0.2)">{t}</span>'
HTML_GRN = '<span style="background-color: rgba(0, 255, 0, 0.3)">{t}</span>'
HTML_BLU = '<span style="background-color: rgba(0, 0, 255, 0.2)">{t}</span>'
HTML_PLN = '<span>{t}</span>'
TABLE_CSS = '''
th, td {
    padding: 4px;
}
table, th, td {
  border: 1px solid black;
  border-collapse: collapse;

}
'''

def colorize(file_obj):
    with open(file_obj.name, 'r') as f:
        raw = f.read()
        raw = raw[raw.find('example_id'):]
        data = pd.read_csv(StringIO(raw))

    table_content = []

    for row in data.iterrows():
        id_ = row[1]['example_id']
        gold, genA, genB = nlp.pipe((
            row[1]['target summary'],
            row[1]['model summary A'],
            row[1]['model summary B']
        ))
        tokens_gold = {token.lemma_.lower() for token in gold}
        table_content.append(
            [id_, gold.text] +
            [
                ''.join(
                    (
                        HTML_PLN.format(t=token.text)
                        if token.pos_ not in {'NOUN', 'PROPN', 'VERB'}
                        else (
                            HTML_GRN.format(t=token.text)
                            if token.lemma_.lower() in tokens_gold
                            else HTML_RED.format(t=token.text)
                        )
                    ) + token.whitespace_
                    for token in gen
                )
                for gen in (genA, genB)
            ]
        )

    # return an HTML table using data in table_content
    return '\n'.join((
        '<table>',
        "<tr>"
        "<td><b>id</b></td>",
        "<td><b>Gold</b></td>",
        "<td><b>Model A</b></td>",
        "<td><b>Model B</b></td>",
        "</tr>",
        '\n'.join(
            '<tr>\n' +
            '\n'.join('<td>{}</td>'.format(cell) for cell in row) +
            '\n</tr>'
            for row in table_content
        ),
        '</table>'
    ))


def main():
    with gr.Blocks(css=TABLE_CSS) as demo:
        gr.Markdown(
            "After uploading, click Run and switch to the Visualization tab."
        )
        with gr.Tabs():
            with gr.TabItem("Upload"):
                data = gr.File(
                    label='upload csv with Annotations', type='file'
                )
                run = gr.Button(label='Run')
            with gr.TabItem("Visualization"):
                viz = gr.HTML(label='Upload a csv file to start.')
        run.click(colorize, data, viz)

    demo.launch()


if __name__ == '__main__':
    main()