Spaces:
Running
Running
Jason Zheng
commited on
Commit
•
310a5d6
1
Parent(s):
77e8689
add latest news
Browse files- app.py +2 -18
- text_content.py +10 -0
app.py
CHANGED
@@ -4,7 +4,7 @@ import gradio as gr
|
|
4 |
import pandas as pd
|
5 |
|
6 |
from css_html import custom_css
|
7 |
-
from text_content import ABOUT_TEXT, CITATION_BUTTON_TEXT, CITATION_BUTTON_LABEL, ACKNOWLEDGEMENT_TEXT, NOTES_TEXT
|
8 |
from utils import (
|
9 |
AutoEvalColumn,
|
10 |
fields,
|
@@ -66,23 +66,7 @@ with demo:
|
|
66 |
elem_classes="markdown-text",
|
67 |
)
|
68 |
|
69 |
-
gr.Markdown(
|
70 |
-
"""
|
71 |
-
Based on the 🏎️RACE benchmark, we demonstrated the ability of different LLMs to generate code that is **_correct_** and **_meets the requirements of real-world development scenarios_**.
|
72 |
-
|
73 |
-
More details about how to evalute the LLM are available in the [🏎️RACE GitHub repository](https://github.com/jszheng21/RACE). For a complete description of RACE benchmark and related experimental analysis, please refer to the paper: [**Beyond Correctness: Benchmarking Multi-dimensional Code Generation for Large Language Models**](https://arxiv.org/abs/2407.11470). [![](https://img.shields.io/badge/arXiv-2407.11470-b31b1b.svg)](https://arxiv.org/abs/2407.11470)
|
74 |
-
""",
|
75 |
-
elem_classes="markdown-text",
|
76 |
-
)
|
77 |
-
|
78 |
-
# gr.Markdown(
|
79 |
-
# """<div style="text-align: center;"><h1> 🏎️RACE Leaderboard</h1></div>\
|
80 |
-
# <br>\
|
81 |
-
# <p>Based on the 🏎️RACE benchmark, we demonstrated the ability of different LLMs to generate code that is <b><i>correct</i></b> and <b><i>meets the requirements of real-world development scenarios</i></b>.</p>
|
82 |
-
# <p>More details about how to evalute the LLM are available in the <a href="https://github.com/jszheng21/RACE">🏎️RACE GitHub repository</a>. For a complete description of RACE benchmark and related experimental analysis, please refer to the paper: Beyond Correctness: Benchmarking Multi-dimensional Code Generation for Large Language Models</p>
|
83 |
-
# """,
|
84 |
-
# elem_classes="markdown-text",
|
85 |
-
# )
|
86 |
|
87 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
88 |
with gr.Column():
|
|
|
4 |
import pandas as pd
|
5 |
|
6 |
from css_html import custom_css
|
7 |
+
from text_content import ABOUT_TEXT, CITATION_BUTTON_TEXT, CITATION_BUTTON_LABEL, ACKNOWLEDGEMENT_TEXT, NOTES_TEXT, HEAD_TEXT
|
8 |
from utils import (
|
9 |
AutoEvalColumn,
|
10 |
fields,
|
|
|
66 |
elem_classes="markdown-text",
|
67 |
)
|
68 |
|
69 |
+
gr.Markdown(HEAD_TEXT, elem_classes="markdown-text")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
72 |
with gr.Column():
|
text_content.py
CHANGED
@@ -1,3 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
ABOUT_TEXT = """# What is RACE benchmark?
|
2 |
RACE is a multi-dimensional benchmark for code generation that focuses on **R**eadability, m**A**intainability, **C**orrectness, and **E**fficiency.
|
3 |
Its goal is to evaluate LLM's ability to generate code that is correct and meets the requirements of real-world development scenarios.
|
|
|
1 |
+
HEAD_TEXT = """
|
2 |
+
Based on the 🏎️RACE benchmark, we demonstrated the ability of different LLMs to generate code that is **_correct_** and **_meets the requirements of real-world development scenarios_**.
|
3 |
+
|
4 |
+
More details about how to evalute the LLM are available in the [🏎️RACE GitHub repository](https://github.com/jszheng21/RACE). For a complete description of RACE benchmark and related experimental analysis, please refer to the paper: [Beyond Correctness: Benchmarking Multi-dimensional Code Generation for Large Language Models](https://arxiv.org/abs/2407.11470). [![](https://img.shields.io/badge/arXiv-2407.11470-b31b1b.svg)](https://arxiv.org/abs/2407.11470)
|
5 |
+
|
6 |
+
**_Latest News_** 🔥
|
7 |
+
- [24/07/24] We add the evaluation results of `claude-3.5-sonnet` and `Qwen2-72B-Instruct` in [RACE leaderboard](https://huggingface.co/spaces/jszheng/RACE_leaderboard).
|
8 |
+
- [24/07/16] We release our RACE benchmark, leaderboard and paper.
|
9 |
+
"""
|
10 |
+
|
11 |
ABOUT_TEXT = """# What is RACE benchmark?
|
12 |
RACE is a multi-dimensional benchmark for code generation that focuses on **R**eadability, m**A**intainability, **C**orrectness, and **E**fficiency.
|
13 |
Its goal is to evaluate LLM's ability to generate code that is correct and meets the requirements of real-world development scenarios.
|