RicardoDominguez
commited on
Commit
•
3ae2781
1
Parent(s):
45c6daa
sc and songer
Browse files- src/about.py +8 -8
src/about.py
CHANGED
@@ -13,8 +13,8 @@ class Task:
|
|
13 |
class Tasks(Enum):
|
14 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
15 |
task0 = Task("caselawqa", "exact_match,default", "CaselawQA")
|
16 |
-
task1 = Task("
|
17 |
-
task2 = Task("
|
18 |
|
19 |
NUM_FEWSHOT = 0 # Change with your few shot
|
20 |
# ---------------------------------------------------
|
@@ -34,14 +34,14 @@ From a substantive legal perspective, efficient solutions to such classification
|
|
34 |
LLM_BENCHMARKS_TEXT = f"""
|
35 |
## Introduction
|
36 |
|
37 |
-
CaselawQA is a benchmark comprising legal classification tasks
|
38 |
The majority of its 10,000 questions are multiple-choice, with 5,000 sourced from each database.
|
39 |
-
The questions are randomly selected from the test sets of the [Lawma tasks](https://huggingface.co/datasets/ricdomolm/lawma-tasks)
|
|
|
|
|
40 |
From a technical machine learning perspective, these tasks provide highly non-trivial classification problems where even the best models leave much room for improvement.
|
41 |
From a substantive legal perspective, efficient solutions to such classification problems have rich and important applications in legal research.
|
42 |
-
|
43 |
-
CaselawQA Tiny consists of 49 Lawma tasks with fewer than 150 training examples.
|
44 |
-
CaselawQA Hard comprises tasks where [Lawma 70B](https://huggingface.co/ricdomolm/lawma-70b) achieves less than 70% accuracy.
|
45 |
|
46 |
You can find more information in the [Lawma arXiv preprint](https://arxiv.org/abs/2407.16615) and [GitHub repository](https://github.com/socialfoundations/lawma).
|
47 |
|
@@ -50,7 +50,7 @@ You can find more information in the [Lawma arXiv preprint](https://arxiv.org/ab
|
|
50 |
With evaluate CaselawQA using [this](https://github.com/socialfoundations/lm-evaluation-harness/tree/caselawqa) LM Eval Harness implementation:
|
51 |
|
52 |
```bash
|
53 |
-
lm_eval --model hf --model_args "pretrained=<your_model>,dtype=bfloat16" --tasks caselawqa
|
54 |
"""
|
55 |
|
56 |
EVALUATION_QUEUE_TEXT = """
|
|
|
13 |
class Tasks(Enum):
|
14 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
15 |
task0 = Task("caselawqa", "exact_match,default", "CaselawQA")
|
16 |
+
task1 = Task("caselawqa_sc", "exact_match,default", "Supreme Court")
|
17 |
+
task2 = Task("caselawqa_songer", "exact_match,default", "Courts of Appeals")
|
18 |
|
19 |
NUM_FEWSHOT = 0 # Change with your few shot
|
20 |
# ---------------------------------------------------
|
|
|
34 |
LLM_BENCHMARKS_TEXT = f"""
|
35 |
## Introduction
|
36 |
|
37 |
+
CaselawQA is a benchmark comprising legal classification tasks derived from the Supreme Court and Songer Court of Appeals legal databases.
|
38 |
The majority of its 10,000 questions are multiple-choice, with 5,000 sourced from each database.
|
39 |
+
The questions are randomly selected from the test sets of the [Lawma tasks](https://huggingface.co/datasets/ricdomolm/lawma-tasks).
|
40 |
+
|
41 |
+
|
42 |
From a technical machine learning perspective, these tasks provide highly non-trivial classification problems where even the best models leave much room for improvement.
|
43 |
From a substantive legal perspective, efficient solutions to such classification problems have rich and important applications in legal research.
|
44 |
+
|
|
|
|
|
45 |
|
46 |
You can find more information in the [Lawma arXiv preprint](https://arxiv.org/abs/2407.16615) and [GitHub repository](https://github.com/socialfoundations/lawma).
|
47 |
|
|
|
50 |
With evaluate CaselawQA using [this](https://github.com/socialfoundations/lm-evaluation-harness/tree/caselawqa) LM Eval Harness implementation:
|
51 |
|
52 |
```bash
|
53 |
+
lm_eval --model hf --model_args "pretrained=<your_model>,dtype=bfloat16" --tasks caselawqa --output_path=<output_path>
|
54 |
"""
|
55 |
|
56 |
EVALUATION_QUEUE_TEXT = """
|