File size: 2,828 Bytes
9d3e407
681b67d
9d3e407
0d59e36
 
 
 
 
c11be31
9d3e407
ea7867a
681b67d
 
 
0eec9d7
 
 
 
 
 
 
 
 
 
681b67d
 
 
 
 
 
6b811d9
681b67d
 
 
 
c11be31
681b67d
 
0eec9d7
2dd6655
 
 
0eec9d7
2dd6655
 
9361469
 
 
0eec9d7
ea7867a
 
 
 
 
9361469
ea7867a
 
 
 
0d59e36
c11be31
681b67d
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import gradio as gr
import evaluate

suite = evaluate.EvaluationSuite.load("Vipitis/ShaderEval") #downloads it

#TODO: can you import it locally instead?
# from ShaderEval import Suite
# suite = Suite("Vipitis/ShaderEval")
# save resutls to a file?

text = """# Welcome to the ShaderEval Suite.
            
            This space hosts the ShaderEval Suite. more to follow soon.
            
            # Task1: Return Completion
            ## Explanation
            Modelled after the [CodeXGLUE code_completion_line](https://huggingface.co/datasets/code_x_glue_cc_code_completion_line) task. 
            Using the "return_completion" subset of the [Shadertoys-fine dataset](https://huggingface.co/datasets/Vipits/Shadertoys-fine).
            All preprocessing and post proessing is done by the custom evaluator for this suite. It should be as easy as just giving it a model checkpoint that can do the "text-generation" task.
            Evaluated is currently with just [exact_match](https://huggingface.co/metrics/exact_match).

            ## Notice
            should you find any model that throws an error, please let me know in the issues tab. Several parts of this suite are still missing.

            ## Instructions
            ### Run the code yourself:.
            ```python
            import evaluate
            suite = evaluate.EvaluationSuite.load("Vipitis/ShaderEval")
            model_cp = "gpt2"
            suite.run(model_cp, snippet=300)
            ```
            
            ### try the demo below
            - Select a **model checkpoint** from the "dropdown"
            - Select how many **samples** to run (there us up to 300 from the test set)
            - Click **Run** to run the suite
            - The results will be displayed in the **Output** box
            
            ## Results 
            ![](file/bar.png)

            ## Todo (feel free to contribute in a Pull Request)
            - [~] leaderboard 
            - [?] supporting batches to speed up inference 
            - [ ] CER metric (via a custom metric perhaps?)
            - [x] removing the pad_token warning
            - [ ] adding OpenVINO pipelines for inference, pending on OpenVINO release
            """


def run_suite(model_cp, snippet):
    # print(model_cp, snippet)
    results = suite.run(model_cp, snippet)
    print(results) # so they show up in the logs for me.
    return results[0]

with gr.Blocks() as site:
    text_md = gr.Markdown(text)
    model_cp = gr.Textbox(value="gpt2", label="Model Checkpoint", interactive=True)
    first_n = gr.Slider(minimum=1, maximum=300, default=5, label="num_samples", step=1.0)
    output = gr.Textbox(label="Output")
    run_button = gr.Button(label="Run")
    run_button.click(fn=run_suite, inputs=[model_cp, first_n], outputs=output)
site.launch()