Spaces:
Running
Running
datasets: | |
- namespace: lilac | |
name: Capybara | |
source: | |
dataset_name: LDJnr/Capybara | |
source_name: huggingface | |
embeddings: | |
- path: | |
- conversation | |
- '*' | |
- input | |
embedding: gte-small | |
- path: | |
- conversation | |
- '*' | |
- output | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- - conversation | |
- '*' | |
- input | |
- - conversation | |
- '*' | |
- output | |
tags: | |
- datasets | |
- namespace: lilac | |
name: glaive-code-assistant | |
source: | |
dataset_name: glaiveai/glaive-code-assistant | |
source_name: huggingface | |
embeddings: | |
- path: question | |
embedding: gte-small | |
- path: answer | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- question | |
- answer | |
tags: | |
- datasets | |
- namespace: lilac | |
name: glaive-function-calling-v2 | |
source: | |
dataset_name: lilacai/glaive-function-calling-v2-sharegpt | |
source_name: huggingface | |
embeddings: | |
- path: | |
- conversations | |
- '*' | |
- value | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- - conversations | |
- '*' | |
- value | |
tags: | |
- datasets | |
- namespace: lilac | |
name: open-assistant-conversations-2 | |
source: | |
dataset_name: OpenAssistant/oasst2 | |
source_name: huggingface | |
embeddings: | |
- path: text | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- text | |
tags: | |
- datasets | |
- namespace: lilac | |
name: lmsys-chat-1m | |
source: | |
dataset_name: lmsys/lmsys-chat-1m | |
source_name: huggingface | |
embeddings: | |
- path: | |
- conversation | |
- '*' | |
- content | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- - conversation | |
- '*' | |
- content | |
tags: | |
- logs | |
- namespace: lilac | |
name: OpenOrca | |
source: | |
dataset_name: Open-Orca/OpenOrca | |
source_name: huggingface | |
embeddings: | |
- path: question | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- question | |
- response | |
tags: | |
- datasets | |
- namespace: lilac | |
name: SlimOrca | |
source: | |
dataset_name: Open-Orca/SlimOrca | |
source_name: huggingface | |
embeddings: | |
- path: | |
- conversations | |
- '*' | |
- value | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- - conversations | |
- '*' | |
- value | |
tags: | |
- datasets | |
- namespace: lilac | |
name: UltraChat-200k | |
source: | |
dataset_name: HuggingFaceH4/ultrachat_200k | |
source_name: huggingface | |
settings: | |
ui: | |
media_paths: | |
- - messages | |
- '*' | |
- content | |
tags: | |
- datasets | |
- namespace: lilac | |
name: roblox_luau_corpus | |
source: | |
dataset_name: Roblox/luau_corpus | |
source_name: huggingface | |
embeddings: | |
- path: prompt | |
embedding: gte-small | |
- path: completion | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- prompt | |
- completion | |
tags: | |
- datasets | |
- namespace: lilac | |
name: hncomments-1m | |
source: | |
filepaths: | |
- /Users/brian/dev/lilac/data/datasets/local/hncomments-duckprogress/data-00000-of-00001.parquet | |
sample_size: 1000000 | |
source_name: parquet | |
embeddings: | |
- path: text | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- text | |
tags: | |
- datasets | |
- namespace: lilac | |
name: MMLU | |
source: | |
dataset_name: cais/mmlu | |
config_name: all | |
source_name: huggingface | |
settings: | |
ui: | |
media_paths: | |
- question | |
- - choices | |
- '*' | |
- answer | |
tags: | |
- eval | |
- namespace: lilac | |
name: ARC-Easy | |
source: | |
dataset_name: allenai/ai2_arc | |
config_name: ARC-Easy | |
source_name: huggingface | |
settings: | |
ui: | |
media_paths: | |
- question | |
- - choices | |
- text | |
- '*' | |
- answerKey | |
tags: | |
- eval | |
- namespace: lilac | |
name: ARC-Challenge | |
source: | |
dataset_name: allenai/ai2_arc | |
config_name: ARC-Challenge | |
source_name: huggingface | |
settings: | |
ui: | |
media_paths: | |
- question | |
- - choices | |
- text | |
- '*' | |
- answerKey | |
tags: | |
- eval | |
- namespace: lilac | |
name: HellaSwag | |
source: | |
dataset_name: Rowan/hellaswag | |
source_name: huggingface | |
settings: | |
ui: | |
media_paths: | |
- ctx | |
- ctx_a | |
- ctx_b | |
- - endings | |
- '*' | |
tags: | |
- eval | |
- namespace: lilac | |
name: HumanEval | |
source: | |
dataset_name: openai_humaneval | |
source_name: huggingface | |
settings: | |
ui: | |
media_paths: | |
- prompt | |
- canonical_solution | |
- test | |
tags: | |
- eval | |
- namespace: lilac | |
name: mbpp | |
source: | |
dataset_name: mbpp | |
source_name: huggingface | |
settings: | |
ui: | |
media_paths: | |
- code | |
- text | |
tags: | |
- eval | |
- namespace: lilac | |
name: TruthfulQA-MultipleChoice | |
source: | |
dataset_name: truthful_qa | |
config_name: multiple_choice | |
source_name: huggingface | |
settings: | |
ui: | |
media_paths: | |
- question | |
- - mc1_targets | |
- choices | |
- '*' | |
- - mc2_targets | |
- choices | |
- '*' | |
tags: | |
- eval | |
- namespace: lilac | |
name: TruthfulQA-Generation | |
source: | |
dataset_name: truthful_qa | |
config_name: generation | |
source_name: huggingface | |
settings: | |
ui: | |
media_paths: | |
- question | |
- - correct_answers | |
- '*' | |
- - incorrect_answers | |
- '*' | |
- source | |
tags: | |
- eval | |
- namespace: lilac | |
name: GSM8K-main | |
source: | |
dataset_name: gsm8k | |
config_name: main | |
source_name: huggingface | |
settings: | |
ui: | |
media_paths: | |
- question | |
- answer | |
tags: | |
- eval | |
- namespace: lilac | |
name: GSM8K-socratic | |
source: | |
dataset_name: gsm8k | |
config_name: socratic | |
source_name: huggingface | |
settings: | |
ui: | |
media_paths: | |
- question | |
- answer | |
tags: | |
- eval | |
- namespace: lilac | |
name: WinoGrande | |
source: | |
dataset_name: winogrande | |
config_name: winogrande_xl | |
source_name: huggingface | |
settings: | |
ui: | |
media_paths: | |
- sentence | |
- option1 | |
- option2 | |
- answer | |
tags: | |
- eval | |
- namespace: lilac | |
name: databricks-dolly-15k-curated-en | |
source: | |
dataset_name: argilla/databricks-dolly-15k-curated-en | |
source_name: huggingface | |
embeddings: | |
- path: original-instruction | |
embedding: gte-small | |
- path: original-context | |
embedding: gte-small | |
- path: original-response | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- original-instruction | |
- original-context | |
- original-response | |
- - new-instruction | |
- value | |
- '*' | |
- - new-context | |
- value | |
- '*' | |
- - new-response | |
- value | |
- '*' | |
tags: | |
- datasets | |
- namespace: lilac | |
name: mosaic-instruct-v3 | |
source: | |
dataset_name: mosaicml/instruct-v3 | |
source_name: huggingface | |
embeddings: | |
- path: prompt | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- prompt | |
- response | |
tags: | |
- datasets | |
- namespace: lilac | |
name: dolphin | |
source: | |
dataset_name: cognitivecomputations/dolphin | |
config_name: flan1m-alpaca-uncensored | |
source_name: huggingface | |
embeddings: | |
- path: instruction | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- instruction | |
- input | |
- output | |
tags: | |
- datasets | |
use_garden: true | |
signals: | |
- signal_name: text_statistics | |
- signal_name: lang_detection | |
concept_model_cache_embeddings: | |
- gte-small | |
- gte-base | |
- sbert | |
- openai | |
- cohere | |
clusters: | |
- dataset_namespace: lilac | |
dataset_name: Capybara | |
input_path: !!python/tuple | |
- conversation | |
- '*' | |
- input | |
- dataset_namespace: lilac | |
dataset_name: glaive-code-assistant | |
input_path: !!python/tuple | |
- question | |
- dataset_namespace: lilac | |
dataset_name: glaive-function-calling-v2 | |
input_selector: | |
format: sharegpt | |
selector: human | |
output_path: !!python/tuple | |
- conversation_clusters | |
- dataset_namespace: lilac | |
dataset_name: open-assistant-conversations-2 | |
input_path: !!python/tuple | |
- text | |
- dataset_namespace: lilac | |
dataset_name: lmsys-chat-1m | |
input_selector: | |
format: openai_conversation_json | |
selector: user | |
output_path: !!python/tuple | |
- conversation__clusters | |
- dataset_namespace: lilac | |
dataset_name: OpenOrca | |
input_path: !!python/tuple | |
- question | |
- dataset_namespace: lilac | |
dataset_name: SlimOrca | |
input_selector: | |
format: sharegpt | |
selector: human | |
output_path: !!python/tuple | |
- conversation__clusters | |
- dataset_namespace: lilac | |
dataset_name: databricks-dolly-15k-curated-en | |
input_path: !!python/tuple | |
- original-instruction | |
- dataset_namespace: lilac | |
dataset_name: mosaic-instruct-v3 | |
input_path: !!python/tuple | |
- prompt | |
- dataset_namespace: lilac | |
dataset_name: dolphin | |
input_path: !!python/tuple | |
- input | |
- dataset_namespace: lilac | |
dataset_name: UltraChat-200k | |
input_selector: | |
format: openai_json | |
selector: user | |
output_path: !!python/tuple | |
- messages__clusters | |
- dataset_namespace: lilac | |
dataset_name: roblox_luau_corpus | |
input_path: !!python/tuple | |
- prompt | |
- dataset_namespace: lilac | |
dataset_name: roblox_luau_corpus | |
input_path: !!python/tuple | |
- completion | |
- dataset_namespace: lilac | |
dataset_name: MMLU | |
input_path: !!python/tuple | |
- question | |
- dataset_namespace: lilac | |
dataset_name: ARC-Easy | |
input_path: !!python/tuple | |
- question | |
- dataset_namespace: lilac | |
dataset_name: ARC-Challenge | |
input_path: !!python/tuple | |
- question | |
- dataset_namespace: lilac | |
dataset_name: HellaSwag | |
input_path: !!python/tuple | |
- ctx | |
- dataset_namespace: lilac | |
dataset_name: HumanEval | |
input_path: !!python/tuple | |
- prompt | |
- dataset_namespace: lilac | |
dataset_name: mbpp | |
input_path: !!python/tuple | |
- text | |
- dataset_namespace: lilac | |
dataset_name: TruthfulQA-Generation | |
input_path: !!python/tuple | |
- question | |
- dataset_namespace: lilac | |
dataset_name: TruthfulQA-MultipleChoice | |
input_path: !!python/tuple | |
- question | |
- dataset_namespace: lilac | |
dataset_name: GSM8K-main | |
input_path: !!python/tuple | |
- question | |
- dataset_namespace: lilac | |
dataset_name: GSM8K-socratic | |
input_path: !!python/tuple | |
- question | |
- dataset_namespace: lilac | |
dataset_name: WinoGrande | |
input_path: !!python/tuple | |
- sentence | |