Spaces:

soldni
/

viz_summaries

Sleeping

File size: 4,571 Bytes

from io import StringIO
import itertools

import gradio as gr
import pandas as pd
import spacy


nlp = spacy.load('en_core_web_sm')

HTML_RED = '<span style="background-color: rgba(255, 0, 0, 0.2)">{t}</span>'
HTML_GRN = '<span style="background-color: rgba(0, 255, 0, 0.3)">{t}</span>'
HTML_YLW = '<span style="background-color: rgba(255, 255, 0, 0.3)">{t}</span>'
HTML_BLU = '<span style="background-color: rgba(0, 0, 255, 0.2)">{t}</span>'
HTML_PLN = '<span>{t}</span>'
TABLE_CSS = '''
th, td {
    padding: 4px;
}
table, th, td {
  border: 1px solid black;
  border-collapse: collapse;

}
'''


def colorize(file_obj):
    with open(file_obj.name, 'r') as f:
        raw = f.read()
        raw = raw[raw.find('example_id'):]
        data = pd.read_csv(StringIO(raw))

    table_content = []

    for row in data.iterrows():
        id_ = row[1]['example_id']
        gold, genA, genB = nlp.pipe((
            row[1]['target summary'],
            row[1]['model summary A'],
            row[1]['model summary B']
        ))
        tokens_gold = {token.lemma_.lower(): 0 for token in gold}
        for token in itertools.chain(genA, genB):
            if token.lemma_.lower() in tokens_gold:
                tokens_gold[token.lemma_.lower()] += 1

        gold_text = ''.join([
            (
                HTML_PLN.format(t=token.text)
                if token.pos_ not in {'NOUN', 'PROPN', 'VERB'}
                else (
                    (
                        HTML_BLU if tokens_gold[token.lemma_.lower()] > 0
                        else HTML_YLW
                    ).format(t=token.text)
                )
            ) + token.whitespace_
            for token in gold
        ])
        table_content.append(
            [id_, gold_text] +
            [
                ''.join(
                    (
                        HTML_PLN.format(t=token.text)
                        if token.pos_ not in {'NOUN', 'PROPN', 'VERB'}
                        else (
                            HTML_GRN.format(t=token.text)
                            if token.lemma_.lower() in tokens_gold
                            else HTML_RED.format(t=token.text)
                        )
                    ) + token.whitespace_
                    for token in gen
                )
                for gen in (genA, genB)
            ]
        )

    # return an HTML table using data in table_content
    return '\n'.join((
        '<table>',
        "<tr>"
        "<td><b>id</b></td>",
        "<td><b>Gold</b></td>",
        "<td><b>Model A</b></td>",
        "<td><b>Model B</b></td>",
        "</tr>",
        '\n'.join(
            '<tr>\n' +
            '\n'.join('<td>{}</td>'.format(cell) for cell in row) +
            '\n</tr>'
            for row in table_content
        ),
        '</table>'
    ))


def main():
    with gr.Blocks(css=TABLE_CSS) as demo:
        gr.Markdown(
            "After uploading, click Run and switch to the Visualization tab."
        )
        with gr.Tabs():
            with gr.TabItem("Upload"):
                data = gr.File(
                    label='upload csv with Annotations', type='file'
                )
                run = gr.Button(label='Run')
            with gr.TabItem("Visualization"):
                gr.HTML(
                    ''.join(
                        (
                            "<b>Explanation of colors:</b>",
                            "<br><ul>",
                            "<li><b>",
                            HTML_RED.format(t='Red'),
                            "</b>: word is in generated, but not in gold.</li>",
                            "<li><b>",
                            HTML_GRN.format(t='Green'),
                            "</b>: word is in generated summary and gold.</li>",
                            "<li><b>",
                            HTML_YLW.format(t='Yellow'),
                            "</b>: word is in gold, but not in generated.</li>",
                            "<li><b>",
                            HTML_BLU.format(t='Blue'),
                            "</b>: word is in gold and in generated.</li>",
                            "</ul>",
                            "<br>",
                            "<b>Important</b>: Only nouns, verbs and proper ",
                            "nouns are colored.</b>"
                        )
                    )
                )
                viz = gr.HTML(label='Upload a csv file to start.')
        run.click(colorize, data, viz)

    demo.launch()


if __name__ == '__main__':
    main()