File size: 2,518 Bytes
a3337f3
 
37d493c
bba6ca7
37d493c
bba6ca7
 
 
 
 
 
 
22f3279
 
a3337f3
22f3279
 
a3337f3
22f3279
a3337f3
22f3279
a3337f3
22f3279
ceb2b55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
ABOUT_INFO = "Polish ASR leaderboard by [AMU-CAI team](https://huggingface.co/amu-cai) aims to provide comprehensive overview of performance of ASR/STT systems for Polish. <br>\
The leaderboard currently supports [BIGOS V2](https://huggingface.co/datasets/amu-cai/pl-asr-bigos-v2) and [PELCRA for BIGOS](https://huggingface.co/datasets/pelcra/pl-asr-pelcra-for-bigos) datasets.<br>\
If you want to add your system or dataset to the leaderboard, please contact Michał Junczyk ([email protected]) or open a pull request on [GitHub](https://github.com/goodmike31/pl-asr-bigos-tools) <br>\
To learn more please read blog post [here](https://huggingface.co/blog/michaljunczyk/introducing-polish-asr-leaderboard).<br> \
If you use this work, please cite it as follows: <br> \
```@misc{amu_cai_pl_asr_leaderboard, \
  author       = {Michał Junczyk}, \
  title        = {{AMU Polish ASR Leaderboard}}, \
  year         = {2024}, \
  howpublished = {url{https://huggingface.co/spaces/amu-cai/pl-asr-leaderboard}}, \
  publisher    = {Hugging Face} \
}```"
 
BIGOS_INFO = "BIGOS (Benchmark Intended Grouping of Open Speech) is the collection of freely available speech datasets curated by the [AMU-CAI team](https://huggingface.co/amu-cai). \
Learn more [here](https://huggingface.co/datasets/amu-cai/pl-asr-bigos-v2)"

PELCRA_INFO = "PELCRA for BIGOS is the subset of speech corpora created by the [PELCRA group](http://pelcra.pl/new/), curated for the BIGOS benchmark by the [AMU-CAI team](https://huggingface.co/amu-cai). \
Learn more [here](https://huggingface.co/datasets/pelcra/pl-asr-pelcra-for-bigos)"

ANALYSIS_INFO = "Here we examine ASR accuracy depending on the system type, model size, audio duration, speaking rate and speaker charactertics (age and gender)" 

INSPECTION_INFO = "Here you can inspect the performance of specific ASR systems on the specific audio samples"

COMPARISON_INFO = "Here you can compare the performance of different ASR systems on the specific datasets using metrics and visualizations of your choice."


asr_systems_colors_mapping = {
        'azure': '#1f77b4',     # Blue
        'google': '#2ca02c',    # Green
        'wav2vec2': '#d62728',  # Red
        'nemo': '#9467bd',      # Purple
        'assemblyai': '#8c564b',  # Brown
        'mms': '#e377c2',       # Pink
        'google_v2': '#7f7f7f', # Gray
        'whisper_cloud': '#bcbd22',  # Olive
        'whisper_local': '#ff7f0e',   # Orange
        
        # Add or override other systems and their colors
    }