amphora commited on
Commit
8e298e3
โ€ข
1 Parent(s): f229c82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -60
app.py CHANGED
@@ -2,31 +2,126 @@ import streamlit as st
2
 
3
  st.set_page_config(page_title="HAERAE Open Research Questions", layout="wide")
4
 
5
- st.title("HAERAE Open Research Questions")
6
-
7
- st.write("""
8
- HAERAE is a non-profit research lab focused on the interpretability and evaluation of Korean language models.
9
- Our mission is to advance the field with insightful benchmarks and tools. Below is an overview of our projects.
10
-
11
- We've been doing most of our projects internally, but for those that have been unsolvable,
12
- we are planning to open them to get help from the open-source community.
13
- """)
14
-
15
- st.header("HAERAE-Math Challenge")
16
-
17
- st.write("""
18
- Today we are introducing our first challenge: HAERAE-Math. We've created high-quality instructions on math
19
- but don't have an idea on how to generate high-quality answers for them. We are looking for solutions that
20
- use open-source models with openly available licenses.
21
-
22
- We have created a total of 20,000 instructions already and are generating more. We've opened up a preview
23
- of 50 of them in this link: [HAERAE-Math Samples](https://huggingface.co/datasets/HAERAE-HUB/HAERAE-Math-samples)
24
-
25
- For those who generate answers for the 50 and share the methodology/results with us, we'll share the
26
- remaining instructions and credit for the resulting dataset.
27
- """)
28
-
29
- st.subheader("Example Question")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  example_question = """
32
  ํ•œ๊ตญ์˜ ๋ณด์•ˆ ์ „๋ฌธ๊ฐ€๊ฐ€ ๊ณ ๋„ํ™”๋œ ๋ฐ์ดํ„ฐ ๋ณดํ˜ธ ์‹œ์Šคํ…œ์„ ๊ฐœ๋ฐœํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค. ์ด ์‹œ์Šคํ…œ์€ 3์ฐจ์› ๊ธฐํ•˜ํ•™์  ์ž ๊ธˆ ๋ฉ”์ปค๋‹ˆ์ฆ˜์„ ์‚ฌ์šฉํ•˜๋Š”๋ฐ, ์ž ๊ธˆ ์žฅ์น˜๋Š” ์›๋ฟ” ๋ชจ์–‘์œผ๋กœ ๋˜์–ด ์žˆ๊ณ , ๋ฐ‘๋ฉด์˜ ๋ฐ˜์ง€๋ฆ„์€ 6cm, ๋†’์ด๋Š” 8cm์ž…๋‹ˆ๋‹ค. ์ด ์›๋ฟ” ๋ชจ์–‘์˜ ์ž ๊ธˆ ์žฅ์น˜์—๋Š” ์›ํ†ต ๋ชจ์–‘์˜ ์—ด์‡ ๊ฐ€ ๋”ฑ ๋งž๊ฒŒ ๋“ค์–ด๊ฐ€๊ฒŒ ์„ค๊ณ„๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค.
@@ -50,38 +145,17 @@ example_question = """
50
 
51
  st.code(example_question, language="markdown")
52
 
53
- st.header("How to Participate")
54
-
55
- st.write("""
56
- 1. Access the 50 sample questions from the provided Hugging Face dataset link.
57
- 2. Generate high-quality answers for these questions using open-source models.
58
- 3. Document your methodology and results.
59
- 4. Share your findings with us through [contact information or submission form].
60
- 5. If your approach is promising, we'll provide access to the full dataset of 20,000 instructions.
61
- 6. Collaborate with us to refine and improve the answer generation process.
62
- 7. Receive credit as a contributor to the final HAERAE-Math dataset.
63
- """)
64
-
65
- st.header("Why Participate?")
66
-
67
- st.write("""
68
- - Contribute to advancing Korean language model research
69
- - Gain access to a large, high-quality dataset of math instructions
70
- - Collaborate with HAERAE researchers
71
- - Receive recognition in the field of NLP and math education
72
- - Potential for co-authorship on related publications
73
- """)
74
-
75
- st.header("Contact Us")
76
-
77
- st.write("""
78
- For more information or to submit your results, please contact us at:
79
- [Your contact information or a link to a submission form]
80
- """)
81
-
82
- st.sidebar.title("About HAERAE")
83
- st.sidebar.info("""
84
- HAERAE is a non-profit research lab dedicated to advancing the field of
85
- Korean language model interpretability and evaluation. Our work focuses on
86
- creating insightful benchmarks and tools to push the boundaries of NLP research.
87
- """)
 
2
 
3
  st.set_page_config(page_title="HAERAE Open Research Questions", layout="wide")
4
 
5
+ # Language selection
6
+ lang = st.radio("Language / ์–ธ์–ด", ["English", "ํ•œ๊ตญ์–ด"])
7
+
8
+ # Content in both languages
9
+ content = {
10
+ "English": {
11
+ "title": "HAERAE Open Research Questions",
12
+ "intro": """
13
+ HAERAE is a non-profit research lab focused on the interpretability and evaluation of Korean language models.
14
+ Our mission is to advance the field with insightful benchmarks and tools. Below is an overview of our projects.
15
+
16
+ We've been doing most of our projects internally, but for those that have been unsolvable,
17
+ we are planning to open them to get help from the open-source community.
18
+ """,
19
+ "challenge_title": "HAERAE-Math Challenge",
20
+ "challenge_desc": """
21
+ Today we are introducing our first challenge: HAERAE-Math. We've created high-quality instructions on math
22
+ but don't have an idea on how to generate high-quality answers for them. We are looking for solutions that
23
+ use open-source models with openly available licenses.
24
+
25
+ We have created a total of 20,000 instructions already and are generating more. We've opened up a preview
26
+ of 50 of them in this link: [HAERAE-Math Samples](https://huggingface.co/datasets/HAERAE-HUB/HAERAE-Math-samples)
27
+
28
+ For those who generate answers for the 50 and share the methodology/results with us, we'll share the
29
+ remaining instructions and credit for the resulting dataset.
30
+ """,
31
+ "example_title": "Example Question",
32
+ "how_to_title": "How to Participate",
33
+ "how_to": """
34
+ 1. Access the 50 sample questions from the provided Hugging Face dataset link.
35
+ 2. Generate high-quality answers for these questions using open-source models.
36
+ 3. Document your methodology and results.
37
+ 4. Share your findings with us through [contact information or submission form].
38
+ 5. If your approach is promising, we'll provide access to the full dataset of 20,000 instructions.
39
+ 6. Collaborate with us to refine and improve the answer generation process.
40
+ 7. Receive credit as a contributor to the final HAERAE-Math dataset.
41
+ """,
42
+ "why_title": "Why Participate?",
43
+ "why": """
44
+ - Contribute to advancing Korean language model research
45
+ - Gain access to a large, high-quality dataset of math instructions
46
+ - Collaborate with HAERAE researchers
47
+ - Receive recognition in the field of NLP and math education
48
+ - Potential for co-authorship on related publications
49
+ """,
50
+ "contact_title": "Contact Us",
51
+ "contact": """
52
+ For more information or to submit your results, please contact us at:
53
54
+ """,
55
+ "sidebar_title": "About HAERAE",
56
+ "sidebar_content": """
57
+ HAERAE is a non-profit research lab dedicated to advancing the field of
58
+ Korean language model interpretability and evaluation. Our work focuses on
59
+ creating insightful benchmarks and tools to push the boundaries of NLP research.
60
+ """
61
+ },
62
+ "ํ•œ๊ตญ์–ด": {
63
+ "title": "HAERAE ๊ณต๊ฐœ ์—ฐ๊ตฌ ์งˆ๋ฌธ",
64
+ "intro": """
65
+ HAERAE๋Š” ํ•œ๊ตญ์–ด ์–ธ์–ด ๋ชจ๋ธ์˜ ํ•ด์„ ๊ฐ€๋Šฅ์„ฑ๊ณผ ํ‰๊ฐ€์— ์ค‘์ ์„ ๋‘” ๋น„์˜๋ฆฌ ์—ฐ๊ตฌ์†Œ์ž…๋‹ˆ๋‹ค.
66
+ ์šฐ๋ฆฌ์˜ ๋ฏธ์…˜์€ ํ†ต์ฐฐ๋ ฅ ์žˆ๋Š” ๋ฒค์น˜๋งˆํฌ์™€ ๋„๊ตฌ๋ฅผ ํ†ตํ•ด ์ด ๋ถ„์•ผ๋ฅผ ๋ฐœ์ „์‹œํ‚ค๋Š” ๊ฒƒ์ž…๋‹ˆ๋‹ค. ๋‹ค์Œ์€ ์šฐ๋ฆฌ ํ”„๋กœ์ ํŠธ์˜ ๊ฐœ์š”์ž…๋‹ˆ๋‹ค.
67
+
68
+ ๋Œ€๋ถ€๋ถ„์˜ ํ”„๋กœ์ ํŠธ๋ฅผ ๋‚ด๋ถ€์ ์œผ๋กœ ์ˆ˜ํ–‰ํ•ด ์™”์ง€๋งŒ, ํ•ด๊ฒฐํ•˜๊ธฐ ์–ด๋ ค์šด ๋ฌธ์ œ๋“ค์— ๋Œ€ํ•ด์„œ๋Š”
69
+ ์˜คํ”ˆ ์†Œ์Šค ์ปค๋ฎค๋‹ˆํ‹ฐ์˜ ๋„์›€์„ ๋ฐ›๊ณ ์ž ๊ณต๊ฐœํ•  ๊ณ„ํš์ž…๋‹ˆ๋‹ค.
70
+ """,
71
+ "challenge_title": "HAERAE-Math ์ฑŒ๋ฆฐ์ง€",
72
+ "challenge_desc": """
73
+ ์˜ค๋Š˜ ์šฐ๋ฆฌ๋Š” ์ฒซ ๋ฒˆ์งธ ์ฑŒ๋ฆฐ์ง€์ธ HAERAE-Math๋ฅผ ์†Œ๊ฐœํ•ฉ๋‹ˆ๋‹ค. ์šฐ๋ฆฌ๋Š” ์ˆ˜ํ•™์— ๊ด€ํ•œ ๊ณ ํ’ˆ์งˆ ์ง€์‹œ๋ฌธ์„ ๋งŒ๋“ค์—ˆ์ง€๋งŒ
74
+ ์ด์— ๋Œ€ํ•œ ๊ณ ํ’ˆ์งˆ ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•˜๋Š” ๋ฐฉ๋ฒ•์— ๋Œ€ํ•œ ์•„์ด๋””์–ด๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค. ์šฐ๋ฆฌ๋Š” ๊ณต๊ฐœ์ ์œผ๋กœ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ผ์ด์„ ์Šค๋ฅผ ๊ฐ€์ง„
75
+ ์˜คํ”ˆ ์†Œ์Šค ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•˜๋Š” ์†”๋ฃจ์…˜์„ ์ฐพ๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.
76
+
77
+ ์šฐ๋ฆฌ๋Š” ์ด๋ฏธ ์ด 20,000๊ฐœ์˜ ์ง€์‹œ๋ฌธ์„ ๋งŒ๋“ค์—ˆ๊ณ  ๋” ๋งŽ์ด ์ƒ์„ฑํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค. ์šฐ๋ฆฌ๋Š” ์ด ์ค‘ 50๊ฐœ์˜ ๋ฏธ๋ฆฌ๋ณด๊ธฐ๋ฅผ
78
+ ๋‹ค์Œ ๋งํฌ์—์„œ ๊ณต๊ฐœํ–ˆ์Šต๋‹ˆ๋‹ค: [HAERAE-Math ์ƒ˜ํ”Œ](https://huggingface.co/datasets/HAERAE-HUB/HAERAE-Math-samples)
79
+
80
+ 50๊ฐœ์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•˜๊ณ  ๋ฐฉ๋ฒ•๋ก /๊ฒฐ๊ณผ๋ฅผ ์šฐ๋ฆฌ์™€ ๊ณต์œ ํ•˜๋Š” ๋ถ„๋“ค์—๊ฒŒ๋Š” ๋‚˜๋จธ์ง€ ์ง€์‹œ๋ฌธ์„ ๊ณต์œ ํ•˜๊ณ 
81
+ ์ตœ์ข… ๋ฐ์ดํ„ฐ์…‹์— ๋Œ€ํ•œ ํฌ๋ ˆ๋”ง์„ ๊ณต์œ ํ•  ๊ฒƒ์ž…๋‹ˆ๋‹ค.
82
+ """,
83
+ "example_title": "์˜ˆ์‹œ ์งˆ๋ฌธ",
84
+ "how_to_title": "์ฐธ์—ฌ ๋ฐฉ๋ฒ•",
85
+ "how_to": """
86
+ 1. ์ œ๊ณต๋œ Hugging Face ๋ฐ์ดํ„ฐ์…‹ ๋งํฌ์—์„œ 50๊ฐœ์˜ ์ƒ˜ํ”Œ ์งˆ๋ฌธ์— ์ ‘๊ทผํ•ฉ๋‹ˆ๋‹ค.
87
+ 2. ์˜คํ”ˆ ์†Œ์Šค ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•˜์—ฌ ์ด ์งˆ๋ฌธ๋“ค์— ๋Œ€ํ•œ ๊ณ ํ’ˆ์งˆ ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
88
+ 3. ๋ฐฉ๋ฒ•๋ก ๊ณผ ๊ฒฐ๊ณผ๋ฅผ ๋ฌธ์„œํ™”ํ•ฉ๋‹ˆ๋‹ค.
89
+ 4. [์—ฐ๋ฝ์ฒ˜ ์ •๋ณด ๋˜๋Š” ์ œ์ถœ ์–‘์‹]์„ ํ†ตํ•ด ๊ท€ํ•˜์˜ ๊ฒฐ๊ณผ๋ฅผ ์šฐ๋ฆฌ์™€ ๊ณต์œ ํ•ฉ๋‹ˆ๋‹ค.
90
+ 5. ๊ท€ํ•˜์˜ ์ ‘๊ทผ ๋ฐฉ์‹์ด ์œ ๋งํ•˜๋‹ค๋ฉด, 20,000๊ฐœ์˜ ์ „์ฒด ์ง€์‹œ๋ฌธ ๋ฐ์ดํ„ฐ์…‹์— ๋Œ€ํ•œ ์ ‘๊ทผ ๊ถŒํ•œ์„ ์ œ๊ณตํ•  ๊ฒƒ์ž…๋‹ˆ๋‹ค.
91
+ 6. ๋‹ต๋ณ€ ์ƒ์„ฑ ๊ณผ์ •์„ ๊ฐœ์„ ํ•˜๊ณ  ๋ฐœ์ „์‹œํ‚ค๊ธฐ ์œ„ํ•ด ์šฐ๋ฆฌ์™€ ํ˜‘๋ ฅํ•ฉ๋‹ˆ๋‹ค.
92
+ 7. ์ตœ์ข… HAERAE-Math ๋ฐ์ดํ„ฐ์…‹์˜ ๊ธฐ์—ฌ์ž๋กœ ์ธ์ •๋ฐ›์Šต๋‹ˆ๋‹ค.
93
+ """,
94
+ "why_title": "์™œ ์ฐธ์—ฌํ•ด์•ผ ํ•˜๋‚˜์š”?",
95
+ "why": """
96
+ - ํ•œ๊ตญ์–ด ์–ธ์–ด ๋ชจ๋ธ ์—ฐ๊ตฌ ๋ฐœ์ „์— ๊ธฐ์—ฌ
97
+ - ๋Œ€๊ทœ๋ชจ์˜ ๊ณ ํ’ˆ์งˆ ์ˆ˜ํ•™ ์ง€์‹œ๋ฌธ ๋ฐ์ดํ„ฐ์…‹์— ์ ‘๊ทผ
98
+ - HAERAE ์—ฐ๊ตฌ์›๋“ค๊ณผ ํ˜‘๋ ฅ
99
+ - NLP ๋ฐ ์ˆ˜ํ•™ ๊ต์œก ๋ถ„์•ผ์—์„œ ์ธ์ •๋ฐ›์„ ๊ธฐํšŒ
100
+ - ๊ด€๋ จ ์ถœํŒ๋ฌผ์˜ ๊ณต๋™ ์ €์ž๊ฐ€ ๋  ๊ฐ€๋Šฅ์„ฑ
101
+ """,
102
+ "contact_title": "์—ฐ๋ฝ์ฒ˜",
103
+ "contact": """
104
+ ๋” ๋งŽ์€ ์ •๋ณด๋ฅผ ์›ํ•˜์‹œ๊ฑฐ๋‚˜ ๊ฒฐ๊ณผ๋ฅผ ์ œ์ถœํ•˜๋ ค๋ฉด ๋‹ค์Œ ์—ฐ๋ฝ์ฒ˜๋กœ ๋ฌธ์˜ํ•ด ์ฃผ์„ธ์š”:
105
106
+ """,
107
+ "sidebar_title": "HAERAE ์†Œ๊ฐœ",
108
+ "sidebar_content": """
109
+ HAERAE๋Š” ํ•œ๊ตญ์–ด ์–ธ์–ด ๋ชจ๋ธ์˜ ํ•ด์„ ๊ฐ€๋Šฅ์„ฑ๊ณผ ํ‰๊ฐ€ ๋ถ„์•ผ๋ฅผ ๋ฐœ์ „์‹œํ‚ค๋Š” ๋ฐ ์ „๋…ํ•˜๋Š” ๋น„์˜๋ฆฌ ์—ฐ๊ตฌ์†Œ์ž…๋‹ˆ๋‹ค.
110
+ ์šฐ๋ฆฌ์˜ ์—ฐ๊ตฌ๋Š” NLP ์—ฐ๊ตฌ์˜ ๊ฒฝ๊ณ„๋ฅผ ๋„“ํžˆ๊ธฐ ์œ„ํ•œ ํ†ต์ฐฐ๋ ฅ ์žˆ๋Š” ๋ฒค์น˜๋งˆํฌ์™€ ๋„๊ตฌ๋ฅผ ๋งŒ๋“œ๋Š” ๋ฐ ์ค‘์ ์„ ๋‘ก๋‹ˆ๋‹ค.
111
+ """
112
+ }
113
+ }
114
+
115
+ # Main content
116
+ st.title(content[lang]["title"])
117
+
118
+ st.write(content[lang]["intro"])
119
+
120
+ st.header(content[lang]["challenge_title"])
121
+
122
+ st.write(content[lang]["challenge_desc"])
123
+
124
+ st.subheader(content[lang]["example_title"])
125
 
126
  example_question = """
127
  ํ•œ๊ตญ์˜ ๋ณด์•ˆ ์ „๋ฌธ๊ฐ€๊ฐ€ ๊ณ ๋„ํ™”๋œ ๋ฐ์ดํ„ฐ ๋ณดํ˜ธ ์‹œ์Šคํ…œ์„ ๊ฐœ๋ฐœํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค. ์ด ์‹œ์Šคํ…œ์€ 3์ฐจ์› ๊ธฐํ•˜ํ•™์  ์ž ๊ธˆ ๋ฉ”์ปค๋‹ˆ์ฆ˜์„ ์‚ฌ์šฉํ•˜๋Š”๋ฐ, ์ž ๊ธˆ ์žฅ์น˜๋Š” ์›๋ฟ” ๋ชจ์–‘์œผ๋กœ ๋˜์–ด ์žˆ๊ณ , ๋ฐ‘๋ฉด์˜ ๋ฐ˜์ง€๋ฆ„์€ 6cm, ๋†’์ด๋Š” 8cm์ž…๋‹ˆ๋‹ค. ์ด ์›๋ฟ” ๋ชจ์–‘์˜ ์ž ๊ธˆ ์žฅ์น˜์—๋Š” ์›ํ†ต ๋ชจ์–‘์˜ ์—ด์‡ ๊ฐ€ ๋”ฑ ๋งž๊ฒŒ ๋“ค์–ด๊ฐ€๊ฒŒ ์„ค๊ณ„๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค.
 
145
 
146
  st.code(example_question, language="markdown")
147
 
148
+ st.header(content[lang]["how_to_title"])
149
+
150
+ st.write(content[lang]["how_to"])
151
+
152
+ st.header(content[lang]["why_title"])
153
+
154
+ st.write(content[lang]["why"])
155
+
156
+ st.header(content[lang]["contact_title"])
157
+
158
+ st.write(content[lang]["contact"])
159
+
160
+ st.sidebar.title(content[lang]["sidebar_title"])
161
+ st.sidebar.info(content[lang]["sidebar_content"])