nsthorat commited on
Commit
44fb940
1 Parent(s): cd01a89
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +0 -106
  2. Dockerfile +3 -0
  3. README.md +8 -3
  4. data/.cache/lilac/concept/lilac/negative-sentiment/gte-small.pkl +0 -0
  5. data/.cache/lilac/concept/lilac/positive-sentiment/gte-small.pkl +0 -0
  6. data/.cache/lilac/concept/lilac/profanity/gte-small.pkl +0 -3
  7. data/.cache/lilac/concept/lilac/question/gte-small.pkl +0 -0
  8. data/.cache/lilac/concept/lilac/source-code/gte-small.pkl +0 -0
  9. data/.cache/lilac/concept/lilac/toxicity/gte-small.pkl +0 -3
  10. data/.cache/lilac/concept/local/question/cohere.pkl +0 -0
  11. data/.cache/lilac/concept/local/question/gte-base.pkl +0 -0
  12. data/.cache/lilac/concept/local/question/gte-small.pkl +0 -0
  13. data/.cache/lilac/concept/local/question/openai.pkl +0 -0
  14. data/.cache/lilac/concept/local/question/palm.pkl +0 -0
  15. data/.cache/lilac/concept/local/question/sbert.pkl +0 -0
  16. data/datasets/lilac/OpenOrca-100k/.DS_Store +0 -0
  17. data/datasets/lilac/OpenOrca-100k/config.yml +0 -28
  18. data/datasets/lilac/OpenOrca-100k/data-00000-of-00001.parquet +0 -3
  19. data/datasets/lilac/OpenOrca-100k/manifest.json +0 -24
  20. data/datasets/lilac/OpenOrca-100k/question/gte-small/hnsw.hnswlib.bin +0 -3
  21. data/datasets/lilac/OpenOrca-100k/question/gte-small/hnsw.lookup.pkl +0 -3
  22. data/datasets/lilac/OpenOrca-100k/question/gte-small/signal_manifest.json +0 -32
  23. data/datasets/lilac/OpenOrca-100k/question/gte-small/spans.pkl +0 -3
  24. data/datasets/lilac/OpenOrca-100k/question/lang_detection/data-00000-of-00001.parquet +0 -3
  25. data/datasets/lilac/OpenOrca-100k/question/lang_detection/signal_manifest.json +0 -28
  26. data/datasets/lilac/OpenOrca-100k/question/near_dup/data-00000-of-00001.parquet +0 -3
  27. data/datasets/lilac/OpenOrca-100k/question/near_dup/signal_manifest.json +0 -33
  28. data/datasets/lilac/OpenOrca-100k/question/pii/data-00000-of-00001.parquet +0 -3
  29. data/datasets/lilac/OpenOrca-100k/question/pii/signal_manifest.json +0 -42
  30. data/datasets/lilac/OpenOrca-100k/question/text_statistics/data-00000-of-00001.parquet +0 -3
  31. data/datasets/lilac/OpenOrca-100k/question/text_statistics/signal_manifest.json +0 -56
  32. data/datasets/lilac/OpenOrca-100k/response/gte-small/hnsw.hnswlib.bin +0 -3
  33. data/datasets/lilac/OpenOrca-100k/response/gte-small/hnsw.lookup.pkl +0 -3
  34. data/datasets/lilac/OpenOrca-100k/response/gte-small/signal_manifest.json +0 -32
  35. data/datasets/lilac/OpenOrca-100k/response/gte-small/spans.pkl +0 -3
  36. data/datasets/lilac/OpenOrca-100k/response/lang_detection/data-00000-of-00001.parquet +0 -3
  37. data/datasets/lilac/OpenOrca-100k/response/lang_detection/signal_manifest.json +0 -28
  38. data/datasets/lilac/OpenOrca-100k/response/near_dup/data-00000-of-00001.parquet +0 -3
  39. data/datasets/lilac/OpenOrca-100k/response/near_dup/signal_manifest.json +0 -33
  40. data/datasets/lilac/OpenOrca-100k/response/pii/data-00000-of-00001.parquet +0 -3
  41. data/datasets/lilac/OpenOrca-100k/response/pii/signal_manifest.json +0 -42
  42. data/datasets/lilac/OpenOrca-100k/response/text_statistics/data-00000-of-00001.parquet +0 -3
  43. data/datasets/lilac/OpenOrca-100k/response/text_statistics/signal_manifest.json +0 -56
  44. data/datasets/lilac/databricks-dolly-15k-curated-en/config.yml +0 -67
  45. data/datasets/lilac/databricks-dolly-15k-curated-en/data-00000-of-00001.parquet +0 -3
  46. data/datasets/lilac/databricks-dolly-15k-curated-en/manifest.json +0 -87
  47. data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/gte-small/hnsw.hnswlib.bin +0 -3
  48. data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/gte-small/hnsw.lookup.pkl +0 -0
  49. data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/gte-small/signal_manifest.json +0 -40
  50. data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/gte-small/spans.pkl +0 -0
.gitattributes DELETED
@@ -1,106 +0,0 @@
1
- data/.cache/lilac/concept/lilac/profanity/gte-small.pkl filter=lfs diff=lfs merge=lfs -text
2
- data/.cache/lilac/concept/lilac/toxicity/gte-small.pkl filter=lfs diff=lfs merge=lfs -text
3
- data/datasets/lilac/piqa/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
4
- data/datasets/lilac/piqa/goal/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
5
- data/datasets/lilac/piqa/goal/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
6
- data/datasets/lilac/piqa/goal/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
7
- data/datasets/lilac/piqa/sol1/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
8
- data/datasets/lilac/piqa/sol1/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
9
- data/datasets/lilac/piqa/sol1/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
10
- data/datasets/lilac/piqa/sol2/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
11
- data/datasets/lilac/piqa/sol2/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
12
- data/datasets/lilac/piqa/sol2/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
13
- data/datasets/lilac/science-qa-derek-thomas/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
14
- data/datasets/lilac/science-qa-derek-thomas/lecture/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
15
- data/datasets/lilac/science-qa-derek-thomas/lecture/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
16
- data/datasets/lilac/science-qa-derek-thomas/lecture/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
17
- data/datasets/lilac/enron-emails/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
18
- data/datasets/lilac/enron-emails/text/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
19
- data/datasets/lilac/enron-emails/text/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
20
- data/datasets/lilac/enron-emails/text/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
21
- data/datasets/lilac/enron-emails/text/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
22
- data/datasets/lilac/enron-emails/text/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
23
- data/datasets/lilac/enron-emails/text/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
24
- data/datasets/lilac/mmlu_professional_law/choices/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
25
- data/datasets/lilac/mmlu_professional_law/choices/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
26
- data/datasets/lilac/mmlu_professional_law/choices/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
27
- data/datasets/lilac/mmlu_professional_law/choices/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
28
- data/datasets/lilac/mmlu_professional_law/choices/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
29
- data/datasets/lilac/mmlu_professional_law/choices/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
30
- data/datasets/lilac/mmlu_professional_law/choices/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
31
- data/datasets/lilac/mmlu_professional_law/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
32
- data/datasets/lilac/mmlu_professional_law/question/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
33
- data/datasets/lilac/mmlu_professional_law/question/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
34
- data/datasets/lilac/mmlu_professional_law/question/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
35
- data/datasets/lilac/mmlu_professional_law/question/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
36
- data/datasets/lilac/mmlu_professional_law/question/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
37
- data/datasets/lilac/mmlu_professional_law/question/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
38
- data/datasets/lilac/mmlu_professional_law/question/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
39
- data/datasets/lilac/pile-of-law-r-legaladvice/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
40
- data/datasets/lilac/pile-of-law-r-legaladvice/text/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
41
- data/datasets/lilac/pile-of-law-r-legaladvice/text/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
42
- data/datasets/lilac/pile-of-law-r-legaladvice/text/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
43
- data/datasets/lilac/pile-of-law-r-legaladvice/text/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
44
- data/datasets/lilac/pile-of-law-r-legaladvice/text/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
45
- data/datasets/lilac/pile-of-law-r-legaladvice/text/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
46
- data/datasets/lilac/pile-of-law-r-legaladvice/text/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
47
- data/datasets/lilac/open-asssistant-conversations/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
48
- data/datasets/lilac/open-asssistant-conversations/text/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
49
- data/datasets/lilac/open-asssistant-conversations/text/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
50
- data/datasets/lilac/open-asssistant-conversations/text/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
51
- data/datasets/lilac/open-asssistant-conversations/text/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
52
- data/datasets/lilac/open-asssistant-conversations/text/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
53
- data/datasets/lilac/open-asssistant-conversations/text/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
54
- data/datasets/lilac/open-asssistant-conversations/text/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
55
- data/datasets/lilac/squad_v2/answers/text/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
56
- data/datasets/lilac/squad_v2/answers/text/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
57
- data/datasets/lilac/squad_v2/answers/text/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
58
- data/datasets/lilac/squad_v2/answers/text/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
59
- data/datasets/lilac/squad_v2/context/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
60
- data/datasets/lilac/squad_v2/context/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
61
- data/datasets/lilac/squad_v2/context/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
62
- data/datasets/lilac/squad_v2/context/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
63
- data/datasets/lilac/squad_v2/context/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
64
- data/datasets/lilac/squad_v2/context/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
65
- data/datasets/lilac/squad_v2/context/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
66
- data/datasets/lilac/squad_v2/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
67
- data/datasets/lilac/squad_v2/question/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
68
- data/datasets/lilac/squad_v2/question/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
69
- data/datasets/lilac/squad_v2/question/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
70
- data/datasets/lilac/squad_v2/question/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
71
- data/datasets/lilac/imdb/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
72
- data/datasets/lilac/imdb/text/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
73
- data/datasets/lilac/imdb/text/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
74
- data/datasets/lilac/imdb/text/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
75
- data/datasets/lilac/imdb/text/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
76
- data/datasets/lilac/imdb/text/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
77
- data/datasets/lilac/imdb/text/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
78
- data/datasets/lilac/imdb/text/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
79
- data/datasets/lilac/databricks-dolly-15k-curated-en/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
80
- data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
81
- data/datasets/lilac/databricks-dolly-15k-curated-en/original-context/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
82
- data/datasets/lilac/OpenOrca-100k/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
83
- data/datasets/lilac/OpenOrca-100k/question/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
84
- data/datasets/lilac/OpenOrca-100k/question/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
85
- data/datasets/lilac/OpenOrca-100k/question/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
86
- data/datasets/lilac/OpenOrca-100k/question/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
87
- data/datasets/lilac/OpenOrca-100k/question/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
88
- data/datasets/lilac/OpenOrca-100k/question/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
89
- data/datasets/lilac/OpenOrca-100k/question/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
90
- data/datasets/lilac/OpenOrca-100k/response/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
91
- data/datasets/lilac/OpenOrca-100k/response/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
92
- data/datasets/lilac/OpenOrca-100k/response/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
93
- data/datasets/lilac/OpenOrca-100k/response/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
94
- data/datasets/lilac/OpenOrca-100k/response/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
95
- data/datasets/lilac/OpenOrca-100k/response/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
96
- data/datasets/lilac/OpenOrca-100k/response/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
97
- data/datasets/lilac/wikitext-2-raw-v1/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
98
- data/datasets/lilac/wikitext-2-raw-v1/text/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
99
- data/datasets/lilac/wikitext-2-raw-v1/text/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
100
- data/datasets/lilac/wikitext-2-raw-v1/text/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
101
- data/datasets/lilac/wikitext-2-raw-v1/text/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
102
- data/datasets/lilac/wikitext-2-raw-v1/text/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
103
- data/datasets/lilac/wikitext-2-raw-v1/text/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
104
- data/datasets/lilac/wikitext-2-raw-v1/text/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
105
- data/datasets/lilac/opus100-en-us-validation/translation/en/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
106
- data/datasets/lilac/opus100-en-us-validation/translation/es/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile CHANGED
@@ -14,6 +14,9 @@ RUN pip install --no-cache-dir -r requirements.txt
14
 
15
  COPY .env .
16
  COPY .env.demo .
 
 
 
17
  COPY LICENSE .
18
 
19
  # Copy python files.
 
14
 
15
  COPY .env .
16
  COPY .env.demo .
17
+ COPY demo_config.yml .
18
+ # Copy the README so we can read the datasets from the HuggingFace config.
19
+ COPY README.md .
20
  COPY LICENSE .
21
 
22
  # Copy python files.
README.md CHANGED
@@ -1,8 +1,13 @@
1
  ---
2
- title: Lilac
3
- emoji: 🌷
4
  colorFrom: purple
5
  colorTo: purple
 
 
 
 
 
6
  sdk: docker
7
- app_port: 5432
 
8
  ---
 
1
  ---
2
+ app_port: 5432
 
3
  colorFrom: purple
4
  colorTo: purple
5
+ datasets: [lilacai/lilac-piqa, lilacai/lilac-science-qa-derek-thomas, lilacai/lilac-enron-emails,
6
+ lilacai/lilac-mmlu_professional_law, lilacai/lilac-pile-of-law-r-legaladvice, lilacai/lilac-open-asssistant-conversations,
7
+ lilacai/lilac-squad_v2, lilacai/lilac-imdb, lilacai/lilac-databricks-dolly-15k-curated-en,
8
+ lilacai/lilac-OpenOrca-100k, lilacai/lilac-wikitext-2-raw-v1, lilacai/lilac-opus100-en-us-validation]
9
+ emoji: "\U0001F337"
10
  sdk: docker
11
+ title: Lilac
12
+
13
  ---
data/.cache/lilac/concept/lilac/negative-sentiment/gte-small.pkl DELETED
Binary file (202 kB)
 
data/.cache/lilac/concept/lilac/positive-sentiment/gte-small.pkl DELETED
Binary file (180 kB)
 
data/.cache/lilac/concept/lilac/profanity/gte-small.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7cf7bf81552f4965f217d537747e806715f508250c2095793743ef276ecddb18
3
- size 1672960
 
 
 
 
data/.cache/lilac/concept/lilac/question/gte-small.pkl DELETED
Binary file (611 kB)
 
data/.cache/lilac/concept/lilac/source-code/gte-small.pkl DELETED
Binary file (126 kB)
 
data/.cache/lilac/concept/lilac/toxicity/gte-small.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:097d149cbb575e054ea00eac3bbae56498dcd4b0e9ef7b9d32231efc538acb89
3
- size 1886446
 
 
 
 
data/.cache/lilac/concept/local/question/cohere.pkl DELETED
Binary file (956 kB)
 
data/.cache/lilac/concept/local/question/gte-base.pkl DELETED
Binary file (184 kB)
 
data/.cache/lilac/concept/local/question/gte-small.pkl DELETED
Binary file (95.9 kB)
 
data/.cache/lilac/concept/local/question/openai.pkl DELETED
Binary file (362 kB)
 
data/.cache/lilac/concept/local/question/palm.pkl DELETED
Binary file (181 kB)
 
data/.cache/lilac/concept/local/question/sbert.pkl DELETED
Binary file (94.7 kB)
 
data/datasets/lilac/OpenOrca-100k/.DS_Store DELETED
Binary file (6.15 kB)
 
data/datasets/lilac/OpenOrca-100k/config.yml DELETED
@@ -1,28 +0,0 @@
1
- embeddings:
2
- - {embedding: gte-small, path: response}
3
- - {embedding: gte-small, path: question}
4
- name: OpenOrca-100k
5
- namespace: local
6
- settings:
7
- preferred_embedding: gte-small
8
- ui:
9
- media_paths: [question, response]
10
- signals:
11
- - path: question
12
- signal: {signal_name: near_dup}
13
- - path: question
14
- signal: {signal_name: text_statistics}
15
- - path: question
16
- signal: {signal_name: pii}
17
- - path: question
18
- signal: {signal_name: lang_detection}
19
- - path: response
20
- signal: {signal_name: near_dup}
21
- - path: response
22
- signal: {signal_name: text_statistics}
23
- - path: response
24
- signal: {signal_name: pii}
25
- - path: response
26
- signal: {signal_name: lang_detection}
27
- source: {dataset_name: Open-Orca/OpenOrca, sample_size: 100000, source_name: huggingface}
28
- tags: [machine-learning]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/lilac/OpenOrca-100k/data-00000-of-00001.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f95588367446af55ccc2f089092779670c57308ee1f72a849e41f22e126d5052
3
- size 105147761
 
 
 
 
data/datasets/lilac/OpenOrca-100k/manifest.json DELETED
@@ -1,24 +0,0 @@
1
- {
2
- "files": [
3
- "data-00000-of-00001.parquet"
4
- ],
5
- "data_schema": {
6
- "fields": {
7
- "id": {
8
- "dtype": "string"
9
- },
10
- "system_prompt": {
11
- "dtype": "string"
12
- },
13
- "question": {
14
- "dtype": "string"
15
- },
16
- "response": {
17
- "dtype": "string"
18
- },
19
- "__hfsplit__": {
20
- "dtype": "string"
21
- }
22
- }
23
- }
24
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/lilac/OpenOrca-100k/question/gte-small/hnsw.hnswlib.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f154c2dc5a0d69538c39df10508fe05cc36fb5489b61c303c9869320ef04581
3
- size 596704812
 
 
 
 
data/datasets/lilac/OpenOrca-100k/question/gte-small/hnsw.lookup.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c0bac1790aa5247eb288c2a828a92eb313090b36a015665f6aae42e5a4dcb18
3
- size 9378299
 
 
 
 
data/datasets/lilac/OpenOrca-100k/question/gte-small/signal_manifest.json DELETED
@@ -1,32 +0,0 @@
1
- {
2
- "files": [],
3
- "parquet_id": "question.gte-small",
4
- "data_schema": {
5
- "fields": {
6
- "question": {
7
- "fields": {
8
- "gte-small": {
9
- "repeated_field": {
10
- "fields": {
11
- "embedding": {
12
- "dtype": "embedding"
13
- }
14
- },
15
- "dtype": "string_span"
16
- },
17
- "signal": {
18
- "signal_name": "gte-small"
19
- }
20
- }
21
- }
22
- }
23
- }
24
- },
25
- "signal": {
26
- "signal_name": "gte-small"
27
- },
28
- "enriched_path": [
29
- "question"
30
- ],
31
- "vector_store": "hnsw"
32
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/lilac/OpenOrca-100k/question/gte-small/spans.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef19c506d4af4eab34aec3b280663687002db0792108b84d313f8ab6f532aa6c
3
- size 6922769
 
 
 
 
data/datasets/lilac/OpenOrca-100k/question/lang_detection/data-00000-of-00001.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1a09e7085a4019205c62c28e6dcb46254fea37243e8087346d9c7298e05f9e1
3
- size 3327888
 
 
 
 
data/datasets/lilac/OpenOrca-100k/question/lang_detection/signal_manifest.json DELETED
@@ -1,28 +0,0 @@
1
- {
2
- "files": [
3
- "data-00000-of-00001.parquet"
4
- ],
5
- "parquet_id": "question.lang_detection",
6
- "data_schema": {
7
- "fields": {
8
- "question": {
9
- "fields": {
10
- "lang_detection": {
11
- "dtype": "string",
12
- "signal": {
13
- "split_by_paragraph": false,
14
- "signal_name": "lang_detection"
15
- }
16
- }
17
- }
18
- }
19
- }
20
- },
21
- "signal": {
22
- "split_by_paragraph": false,
23
- "signal_name": "lang_detection"
24
- },
25
- "enriched_path": [
26
- "question"
27
- ]
28
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/lilac/OpenOrca-100k/question/near_dup/data-00000-of-00001.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1ff51f57fb136ab846d0c34a248aca4ef86d09fa0945737cd2c276d2f5dcb7d
3
- size 3884385
 
 
 
 
data/datasets/lilac/OpenOrca-100k/question/near_dup/signal_manifest.json DELETED
@@ -1,33 +0,0 @@
1
- {
2
- "files": [
3
- "data-00000-of-00001.parquet"
4
- ],
5
- "parquet_id": "question.near_dup",
6
- "data_schema": {
7
- "fields": {
8
- "question": {
9
- "fields": {
10
- "near_dup": {
11
- "fields": {
12
- "cluster_id": {
13
- "dtype": "uint32",
14
- "categorical": true
15
- }
16
- },
17
- "signal": {
18
- "threshold": 0.85,
19
- "signal_name": "near_dup"
20
- }
21
- }
22
- }
23
- }
24
- }
25
- },
26
- "signal": {
27
- "threshold": 0.85,
28
- "signal_name": "near_dup"
29
- },
30
- "enriched_path": [
31
- "question"
32
- ]
33
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/lilac/OpenOrca-100k/question/pii/data-00000-of-00001.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:acc4cd2ae7c51b4450d159c63fee3e9739b3c1d5a36cfbf3bf45fe29e2ac15b5
3
- size 3317869
 
 
 
 
data/datasets/lilac/OpenOrca-100k/question/pii/signal_manifest.json DELETED
@@ -1,42 +0,0 @@
1
- {
2
- "files": [
3
- "data-00000-of-00001.parquet"
4
- ],
5
- "parquet_id": "question.pii",
6
- "data_schema": {
7
- "fields": {
8
- "question": {
9
- "fields": {
10
- "pii": {
11
- "fields": {
12
- "emails": {
13
- "repeated_field": {
14
- "dtype": "string_span"
15
- }
16
- },
17
- "ip_addresses": {
18
- "repeated_field": {
19
- "dtype": "string_span"
20
- }
21
- },
22
- "secrets": {
23
- "repeated_field": {
24
- "dtype": "string_span"
25
- }
26
- }
27
- },
28
- "signal": {
29
- "signal_name": "pii"
30
- }
31
- }
32
- }
33
- }
34
- }
35
- },
36
- "signal": {
37
- "signal_name": "pii"
38
- },
39
- "enriched_path": [
40
- "question"
41
- ]
42
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/lilac/OpenOrca-100k/question/text_statistics/data-00000-of-00001.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6703f93314760ee15d64532812a601c85d2f411254c1d809c6b3f558cc1c7c7
3
- size 4321496
 
 
 
 
data/datasets/lilac/OpenOrca-100k/question/text_statistics/signal_manifest.json DELETED
@@ -1,56 +0,0 @@
1
- {
2
- "files": [
3
- "data-00000-of-00001.parquet"
4
- ],
5
- "parquet_id": "question.text_statistics",
6
- "data_schema": {
7
- "fields": {
8
- "question": {
9
- "fields": {
10
- "text_statistics": {
11
- "fields": {
12
- "num_characters": {
13
- "dtype": "int32"
14
- },
15
- "readability": {
16
- "dtype": "float32"
17
- },
18
- "log(type_token_ratio)": {
19
- "dtype": "float32"
20
- },
21
- "frac_non_ascii": {
22
- "dtype": "float32",
23
- "bins": [
24
- [
25
- "Low",
26
- null,
27
- 0.15
28
- ],
29
- [
30
- "Medium",
31
- 0.15,
32
- 0.3
33
- ],
34
- [
35
- "High",
36
- 0.3,
37
- null
38
- ]
39
- ]
40
- }
41
- },
42
- "signal": {
43
- "signal_name": "text_statistics"
44
- }
45
- }
46
- }
47
- }
48
- }
49
- },
50
- "signal": {
51
- "signal_name": "text_statistics"
52
- },
53
- "enriched_path": [
54
- "question"
55
- ]
56
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/lilac/OpenOrca-100k/response/gte-small/hnsw.hnswlib.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2374770842450f7d1712e2d56bc2e50bb1579af4cda061df2baf4631965dbcd
3
- size 482647596
 
 
 
 
data/datasets/lilac/OpenOrca-100k/response/gte-small/hnsw.lookup.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:afd6636965df6ed8f6aadd52a9638edf201c36dd470b816e6488e5417dcfe3c4
3
- size 8159214
 
 
 
 
data/datasets/lilac/OpenOrca-100k/response/gte-small/signal_manifest.json DELETED
@@ -1,32 +0,0 @@
1
- {
2
- "files": [],
3
- "parquet_id": "response.gte-small",
4
- "data_schema": {
5
- "fields": {
6
- "response": {
7
- "fields": {
8
- "gte-small": {
9
- "repeated_field": {
10
- "fields": {
11
- "embedding": {
12
- "dtype": "embedding"
13
- }
14
- },
15
- "dtype": "string_span"
16
- },
17
- "signal": {
18
- "signal_name": "gte-small"
19
- }
20
- }
21
- }
22
- }
23
- }
24
- },
25
- "signal": {
26
- "signal_name": "gte-small"
27
- },
28
- "enriched_path": [
29
- "response"
30
- ],
31
- "vector_store": "hnsw"
32
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/lilac/OpenOrca-100k/response/gte-small/spans.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf61917d291a1e3157ca017b4eacdf8983bf8094b3b22d710031381927f19b16
3
- size 6373377
 
 
 
 
data/datasets/lilac/OpenOrca-100k/response/lang_detection/data-00000-of-00001.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:62af2b56e9bf3cbcddbceab6f858fc35fef50953b73b06a7da3bc1d2e62d3a53
3
- size 3339983
 
 
 
 
data/datasets/lilac/OpenOrca-100k/response/lang_detection/signal_manifest.json DELETED
@@ -1,28 +0,0 @@
1
- {
2
- "files": [
3
- "data-00000-of-00001.parquet"
4
- ],
5
- "parquet_id": "response.lang_detection",
6
- "data_schema": {
7
- "fields": {
8
- "response": {
9
- "fields": {
10
- "lang_detection": {
11
- "dtype": "string",
12
- "signal": {
13
- "split_by_paragraph": false,
14
- "signal_name": "lang_detection"
15
- }
16
- }
17
- }
18
- }
19
- }
20
- },
21
- "signal": {
22
- "split_by_paragraph": false,
23
- "signal_name": "lang_detection"
24
- },
25
- "enriched_path": [
26
- "response"
27
- ]
28
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/lilac/OpenOrca-100k/response/near_dup/data-00000-of-00001.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc30679d1a2e6f2d3d45b2f145932daebf8a3f6ae4b73cfa9da3dbf5c495967d
3
- size 3902985
 
 
 
 
data/datasets/lilac/OpenOrca-100k/response/near_dup/signal_manifest.json DELETED
@@ -1,33 +0,0 @@
1
- {
2
- "files": [
3
- "data-00000-of-00001.parquet"
4
- ],
5
- "parquet_id": "response.near_dup",
6
- "data_schema": {
7
- "fields": {
8
- "response": {
9
- "fields": {
10
- "near_dup": {
11
- "fields": {
12
- "cluster_id": {
13
- "dtype": "uint32",
14
- "categorical": true
15
- }
16
- },
17
- "signal": {
18
- "threshold": 0.85,
19
- "signal_name": "near_dup"
20
- }
21
- }
22
- }
23
- }
24
- }
25
- },
26
- "signal": {
27
- "threshold": 0.85,
28
- "signal_name": "near_dup"
29
- },
30
- "enriched_path": [
31
- "response"
32
- ]
33
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/lilac/OpenOrca-100k/response/pii/data-00000-of-00001.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3b5830b33a8ddbe000b1b4403ef882731243075acc6416b5f673c90d4bf25ac
3
- size 3313965
 
 
 
 
data/datasets/lilac/OpenOrca-100k/response/pii/signal_manifest.json DELETED
@@ -1,42 +0,0 @@
1
- {
2
- "files": [
3
- "data-00000-of-00001.parquet"
4
- ],
5
- "parquet_id": "response.pii",
6
- "data_schema": {
7
- "fields": {
8
- "response": {
9
- "fields": {
10
- "pii": {
11
- "fields": {
12
- "emails": {
13
- "repeated_field": {
14
- "dtype": "string_span"
15
- }
16
- },
17
- "ip_addresses": {
18
- "repeated_field": {
19
- "dtype": "string_span"
20
- }
21
- },
22
- "secrets": {
23
- "repeated_field": {
24
- "dtype": "string_span"
25
- }
26
- }
27
- },
28
- "signal": {
29
- "signal_name": "pii"
30
- }
31
- }
32
- }
33
- }
34
- }
35
- },
36
- "signal": {
37
- "signal_name": "pii"
38
- },
39
- "enriched_path": [
40
- "response"
41
- ]
42
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/lilac/OpenOrca-100k/response/text_statistics/data-00000-of-00001.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e5bae031d37b7df9a3df49a616d58a8f9962307750039c1736b8faa56d8501a
3
- size 4281305
 
 
 
 
data/datasets/lilac/OpenOrca-100k/response/text_statistics/signal_manifest.json DELETED
@@ -1,56 +0,0 @@
1
- {
2
- "files": [
3
- "data-00000-of-00001.parquet"
4
- ],
5
- "parquet_id": "response.text_statistics",
6
- "data_schema": {
7
- "fields": {
8
- "response": {
9
- "fields": {
10
- "text_statistics": {
11
- "fields": {
12
- "num_characters": {
13
- "dtype": "int32"
14
- },
15
- "readability": {
16
- "dtype": "float32"
17
- },
18
- "log(type_token_ratio)": {
19
- "dtype": "float32"
20
- },
21
- "frac_non_ascii": {
22
- "dtype": "float32",
23
- "bins": [
24
- [
25
- "Low",
26
- null,
27
- 0.15
28
- ],
29
- [
30
- "Medium",
31
- 0.15,
32
- 0.3
33
- ],
34
- [
35
- "High",
36
- 0.3,
37
- null
38
- ]
39
- ]
40
- }
41
- },
42
- "signal": {
43
- "signal_name": "text_statistics"
44
- }
45
- }
46
- }
47
- }
48
- }
49
- },
50
- "signal": {
51
- "signal_name": "text_statistics"
52
- },
53
- "enriched_path": [
54
- "response"
55
- ]
56
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/lilac/databricks-dolly-15k-curated-en/config.yml DELETED
@@ -1,67 +0,0 @@
1
- embeddings:
2
- - embedding: gte-small
3
- path: [new-context, value, '*']
4
- - {embedding: gte-small, path: original-context}
5
- name: databricks-dolly-15k-curated-en
6
- namespace: lilac
7
- settings:
8
- preferred_embedding: gte-small
9
- ui:
10
- media_paths:
11
- - original-instruction
12
- - original-context
13
- - original-response
14
- - [new-instruction, value, '*']
15
- - [new-context, value, '*']
16
- - [new-response, value, '*']
17
- signals:
18
- - path: original-instruction
19
- signal: {signal_name: near_dup}
20
- - path: original-instruction
21
- signal: {signal_name: text_statistics}
22
- - path: original-instruction
23
- signal: {signal_name: pii}
24
- - path: original-instruction
25
- signal: {signal_name: lang_detection}
26
- - path: original-context
27
- signal: {signal_name: near_dup}
28
- - path: original-context
29
- signal: {signal_name: text_statistics}
30
- - path: original-context
31
- signal: {signal_name: lang_detection}
32
- - path: original-context
33
- signal: {signal_name: pii}
34
- - path: original-response
35
- signal: {signal_name: near_dup}
36
- - path: original-response
37
- signal: {signal_name: text_statistics}
38
- - path: original-response
39
- signal: {signal_name: pii}
40
- - path: original-response
41
- signal: {signal_name: lang_detection}
42
- - path: [new-instruction, value, '*']
43
- signal: {signal_name: near_dup}
44
- - path: [new-instruction, value, '*']
45
- signal: {signal_name: text_statistics}
46
- - path: [new-instruction, value, '*']
47
- signal: {signal_name: pii}
48
- - path: [new-instruction, value, '*']
49
- signal: {signal_name: lang_detection}
50
- - path: [new-context, value, '*']
51
- signal: {signal_name: near_dup}
52
- - path: [new-context, value, '*']
53
- signal: {signal_name: text_statistics}
54
- - path: [new-context, value, '*']
55
- signal: {signal_name: lang_detection}
56
- - path: [new-context, value, '*']
57
- signal: {signal_name: pii}
58
- - path: [new-response, value, '*']
59
- signal: {signal_name: near_dup}
60
- - path: [new-response, value, '*']
61
- signal: {signal_name: text_statistics}
62
- - path: [new-response, value, '*']
63
- signal: {signal_name: pii}
64
- - path: [new-response, value, '*']
65
- signal: {signal_name: lang_detection}
66
- source: {dataset_name: argilla/databricks-dolly-15k-curated-en, source_name: huggingface}
67
- tags: [machine-learning]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/lilac/databricks-dolly-15k-curated-en/data-00000-of-00001.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad225b50d5880a097ea66eb4ca70fc529c0321cf8a5652bd8fbe7a638d016851
3
- size 15882489
 
 
 
 
data/datasets/lilac/databricks-dolly-15k-curated-en/manifest.json DELETED
@@ -1,87 +0,0 @@
1
- {
2
- "files": [
3
- "data-00000-of-00001.parquet"
4
- ],
5
- "data_schema": {
6
- "fields": {
7
- "id": {
8
- "dtype": "string"
9
- },
10
- "category": {
11
- "dtype": "string"
12
- },
13
- "original-instruction": {
14
- "dtype": "string"
15
- },
16
- "original-context": {
17
- "dtype": "string"
18
- },
19
- "original-response": {
20
- "dtype": "string"
21
- },
22
- "new-instruction": {
23
- "fields": {
24
- "user_id": {
25
- "repeated_field": {
26
- "dtype": "string"
27
- }
28
- },
29
- "value": {
30
- "repeated_field": {
31
- "dtype": "string"
32
- }
33
- },
34
- "status": {
35
- "repeated_field": {
36
- "dtype": "string"
37
- }
38
- }
39
- }
40
- },
41
- "new-context": {
42
- "fields": {
43
- "user_id": {
44
- "repeated_field": {
45
- "dtype": "string"
46
- }
47
- },
48
- "value": {
49
- "repeated_field": {
50
- "dtype": "string"
51
- }
52
- },
53
- "status": {
54
- "repeated_field": {
55
- "dtype": "string"
56
- }
57
- }
58
- }
59
- },
60
- "new-response": {
61
- "fields": {
62
- "user_id": {
63
- "repeated_field": {
64
- "dtype": "string"
65
- }
66
- },
67
- "value": {
68
- "repeated_field": {
69
- "dtype": "string"
70
- }
71
- },
72
- "status": {
73
- "repeated_field": {
74
- "dtype": "string"
75
- }
76
- }
77
- }
78
- },
79
- "external_id": {
80
- "dtype": "string"
81
- },
82
- "__hfsplit__": {
83
- "dtype": "string"
84
- }
85
- }
86
- }
87
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/gte-small/hnsw.hnswlib.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c879460250e68b6195eed6b48afa2fa2a7b8127483a299818a13f82ed7fea8dc
3
- size 32553584
 
 
 
 
data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/gte-small/hnsw.lookup.pkl DELETED
Binary file (522 kB)
 
data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/gte-small/signal_manifest.json DELETED
@@ -1,40 +0,0 @@
1
- {
2
- "files": [],
3
- "parquet_id": "new-context.value.gte-small",
4
- "data_schema": {
5
- "fields": {
6
- "new-context": {
7
- "fields": {
8
- "value": {
9
- "repeated_field": {
10
- "fields": {
11
- "gte-small": {
12
- "repeated_field": {
13
- "fields": {
14
- "embedding": {
15
- "dtype": "embedding"
16
- }
17
- },
18
- "dtype": "string_span"
19
- },
20
- "signal": {
21
- "signal_name": "gte-small"
22
- }
23
- }
24
- }
25
- }
26
- }
27
- }
28
- }
29
- }
30
- },
31
- "signal": {
32
- "signal_name": "gte-small"
33
- },
34
- "enriched_path": [
35
- "new-context",
36
- "value",
37
- "*"
38
- ],
39
- "vector_store": "hnsw"
40
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/gte-small/spans.pkl DELETED
Binary file (351 kB)