nsthorat-lilac commited on
Commit
62e7cbc
1 Parent(s): 815971e

5e0c8617b3f7c000255740d8cc83e9b97f3540affbf45d9107328eec05f9afca

Browse files
.gitattributes ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ data/datasets/local/spotify/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
2
+ data/datasets/local/spotify/text/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
3
+ data/datasets/local/spotify/text/sbert/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
4
+ data/datasets/local/spotify/text/sbert/embedding/local/outerspace/v34/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
5
+ data/datasets/local/spotify/text/sbert/embeddings-00000-of-00001.keys.pkl filter=lfs diff=lfs merge=lfs -text
6
+ data/datasets/local/spotify/text/sbert/embeddings-00000-of-00001.npy filter=lfs diff=lfs merge=lfs -text
data/datasets/local/spotify/data-00000-of-00001.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32224657332b09187a737c73ab634f9d14c9ba9a240bd105f1b9819cde2afcef
3
+ size 37128682
data/datasets/local/spotify/manifest.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "files": [
3
+ "data-00000-of-00001.parquet"
4
+ ],
5
+ "data_schema": {
6
+ "fields": {
7
+ "artist": {
8
+ "dtype": "string"
9
+ },
10
+ "song": {
11
+ "dtype": "string"
12
+ },
13
+ "link": {
14
+ "dtype": "string"
15
+ },
16
+ "text": {
17
+ "dtype": "string"
18
+ },
19
+ "__line_number__": {
20
+ "dtype": "int64"
21
+ },
22
+ "__rowid__": {
23
+ "dtype": "string"
24
+ }
25
+ }
26
+ }
27
+ }
data/datasets/local/spotify/settings.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"ui": {"media_paths": [["text"]]}}
data/datasets/local/spotify/text/lang_detection/data-00000-of-00001.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f1555427c8dc3b2f1e9310f5e71b46297e607f710365e107c73c894d5a8e1b0
3
+ size 2033407
data/datasets/local/spotify/text/lang_detection/signal_manifest.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "files": [
3
+ "data-00000-of-00001.parquet"
4
+ ],
5
+ "parquet_id": "lang_detection(text)",
6
+ "data_schema": {
7
+ "fields": {
8
+ "__rowid__": {
9
+ "dtype": "string"
10
+ },
11
+ "text": {
12
+ "fields": {
13
+ "lang_detection": {
14
+ "repeated_field": {
15
+ "fields": {
16
+ "lang_code": {
17
+ "dtype": "string"
18
+ }
19
+ },
20
+ "dtype": "string_span"
21
+ },
22
+ "signal": {
23
+ "signal_name": "lang_detection"
24
+ }
25
+ }
26
+ }
27
+ }
28
+ }
29
+ },
30
+ "signal": {
31
+ "signal_name": "lang_detection"
32
+ },
33
+ "enriched_path": [
34
+ "text"
35
+ ]
36
+ }
data/datasets/local/spotify/text/sbert/data-00000-of-00001.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9796beb630cc3503f3c2ac9db8f71e4c1604570836d78bbf364e801cd427c39e
3
+ size 2709987
data/datasets/local/spotify/text/sbert/embedding/local/outerspace/v34/data-00000-of-00001.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1ba0fe68cc02849b0a20b7f72047c8e9cb8e5ef5b57b0cd642fa0b0be8a6e06
3
+ size 3340135
data/datasets/local/spotify/text/sbert/embedding/local/outerspace/v34/signal_manifest.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "files": [
3
+ "data-00000-of-00001.parquet"
4
+ ],
5
+ "parquet_id": "local/outerspace/v34(text.sbert.*.embedding)",
6
+ "data_schema": {
7
+ "fields": {
8
+ "__rowid__": {
9
+ "dtype": "string"
10
+ },
11
+ "text": {
12
+ "fields": {
13
+ "sbert": {
14
+ "repeated_field": {
15
+ "fields": {
16
+ "embedding": {
17
+ "fields": {
18
+ "local/outerspace/v34": {
19
+ "dtype": "float32",
20
+ "signal": {
21
+ "signal_name": "concept_score",
22
+ "embedding": "sbert",
23
+ "namespace": "local",
24
+ "concept_name": "outerspace",
25
+ "draft": "main",
26
+ "num_negative_examples": 100
27
+ },
28
+ "bins": [
29
+ [
30
+ "Not in concept",
31
+ null,
32
+ 0.5
33
+ ],
34
+ [
35
+ "In concept",
36
+ 0.5,
37
+ null
38
+ ]
39
+ ]
40
+ }
41
+ }
42
+ }
43
+ }
44
+ }
45
+ }
46
+ }
47
+ }
48
+ }
49
+ },
50
+ "signal": {
51
+ "signal_name": "concept_score",
52
+ "embedding": "sbert",
53
+ "namespace": "local",
54
+ "concept_name": "outerspace",
55
+ "draft": "main",
56
+ "num_negative_examples": 100
57
+ },
58
+ "enriched_path": [
59
+ "text",
60
+ "sbert",
61
+ "*",
62
+ "embedding"
63
+ ]
64
+ }
data/datasets/local/spotify/text/sbert/embeddings-00000-of-00001.keys.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5df43291782b8c731d4ce56537946654c642a01dc9a4e37de394836362f6b45
3
+ size 3727400
data/datasets/local/spotify/text/sbert/embeddings-00000-of-00001.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94e10c23d7229541e1f60b791a659d13673b10a03649abf0ae092e0e18c5aee3
3
+ size 170446976
data/datasets/local/spotify/text/sbert/signal_manifest.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "files": [
3
+ "data-00000-of-00001.parquet"
4
+ ],
5
+ "parquet_id": "sbert(text)",
6
+ "data_schema": {
7
+ "fields": {
8
+ "__rowid__": {
9
+ "dtype": "string"
10
+ },
11
+ "text": {
12
+ "fields": {
13
+ "sbert": {
14
+ "repeated_field": {
15
+ "fields": {
16
+ "embedding": {
17
+ "dtype": "embedding"
18
+ }
19
+ },
20
+ "dtype": "string_span"
21
+ },
22
+ "signal": {
23
+ "signal_name": "sbert"
24
+ }
25
+ }
26
+ }
27
+ }
28
+ }
29
+ },
30
+ "signal": {
31
+ "signal_name": "sbert"
32
+ },
33
+ "enriched_path": [
34
+ "text"
35
+ ],
36
+ "embedding_filename_prefix": "embeddings-00000-of-00001"
37
+ }