nsthorat-lilac commited on
Commit
96e9569
1 Parent(s): 31fbf3d

Push to HF space

Browse files
README.md CHANGED
@@ -9,6 +9,7 @@ datasets:
9
  - lilacai/lilac-Capybara
10
  - lilacai/lilac-glaive-code-assistant
11
  - lilacai/lilac-open-assistant-conversations-2
 
12
  - lilacai/lilac-databricks-dolly-15k-curated-en
13
  - lilacai/lilac-OpenOrca-100k
14
  - lilacai/lilac-dolphin
 
9
  - lilacai/lilac-Capybara
10
  - lilacai/lilac-glaive-code-assistant
11
  - lilacai/lilac-open-assistant-conversations-2
12
+ - lilacai/lilac-lmsys-chat-1m
13
  - lilacai/lilac-databricks-dolly-15k-curated-en
14
  - lilacai/lilac-OpenOrca-100k
15
  - lilacai/lilac-dolphin
data/lilac.yml CHANGED
@@ -59,6 +59,25 @@ datasets:
59
  - text
60
  tags:
61
  - datasets
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  - namespace: lilac
63
  name: databricks-dolly-15k-curated-en
64
  source:
@@ -127,10 +146,6 @@ use_garden: true
127
  signals:
128
  - signal_name: text_statistics
129
  - signal_name: lang_detection
130
- - embedding: gte-small
131
- namespace: lilac
132
- concept_name: profanity
133
- signal_name: concept_score
134
  concept_model_cache_embeddings:
135
  - gte-small
136
  - gte-base
@@ -152,6 +167,13 @@ clusters:
152
  dataset_name: open-assistant-conversations-2
153
  input_path: !!python/tuple
154
  - text
 
 
 
 
 
 
 
155
  - dataset_namespace: lilac
156
  dataset_name: databricks-dolly-15k-curated-en
157
  input_path: !!python/tuple
 
59
  - text
60
  tags:
61
  - datasets
62
+ - namespace: lilac
63
+ name: lmsys-chat-1m
64
+ source:
65
+ dataset_name: OpenAssistant/oasst2
66
+ source_name: huggingface
67
+ embeddings:
68
+ - path:
69
+ - conversation
70
+ - '*'
71
+ - content
72
+ embedding: gte-small
73
+ settings:
74
+ ui:
75
+ media_paths:
76
+ - - conversation
77
+ - '*'
78
+ - content
79
+ tags:
80
+ - logs
81
  - namespace: lilac
82
  name: databricks-dolly-15k-curated-en
83
  source:
 
146
  signals:
147
  - signal_name: text_statistics
148
  - signal_name: lang_detection
 
 
 
 
149
  concept_model_cache_embeddings:
150
  - gte-small
151
  - gte-base
 
167
  dataset_name: open-assistant-conversations-2
168
  input_path: !!python/tuple
169
  - text
170
+ - dataset_namespace: lilac
171
+ dataset_name: lmsys-chat-1m
172
+ input_selector:
173
+ format: openai_json
174
+ selector: user
175
+ output_path: !!python/tuple
176
+ - conversation__clusters
177
  - dataset_namespace: lilac
178
  dataset_name: databricks-dolly-15k-curated-en
179
  input_path: !!python/tuple
dist/lilac-1337.0.0-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74732a23985fa54c07ace5aafd0311c998c13d7a17e8cb25ae511054638b56ad
3
- size 2478879
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e51b13532baa6dc500a59207750d21bc56d99f566590ec85c6c07b91bce7ab25
3
+ size 2480452