davidmezzetti commited on
Commit
c926447
1 Parent(s): 8bea2db
Files changed (6) hide show
  1. .gitattributes +3 -0
  2. README.md +41 -0
  3. config.json +27 -0
  4. documents +3 -0
  5. embeddings +3 -0
  6. graph +3 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ documents filter=lfs diff=lfs merge=lfs -text
37
+ embeddings filter=lfs diff=lfs merge=lfs -text
38
+ graph filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ inference: false
3
+ language: en
4
+ license: apache-2.0
5
+ library_name: txtai
6
+ tags:
7
+ - sentence-similarity
8
+ datasets:
9
+ - maxiw/hf-posts
10
+ ---
11
+
12
+ # Hugging Face Posts txtai embeddings index
13
+
14
+ This is a [txtai](https://github.com/neuml/txtai) embeddings index for the [Hugging Face Posts dataset](https://huggingface.co/datasets/maxiw/hf-posts).
15
+
16
+ txtai must be [installed](https://neuml.github.io/txtai/install/) to use this model.
17
+
18
+ ## Example
19
+
20
+ This index can be loaded from the Hugging Face Hub with txtai as shown below.
21
+
22
+ ```python
23
+ from txtai import Embeddings
24
+
25
+ # Load the index from the HF Hub
26
+ embeddings = Embeddings()
27
+ embeddings.load(provider="huggingface-hub", container="neuml/txtai-hfposts")
28
+
29
+ # Search for posts discussing transformers
30
+ embeddings.search("transformers")
31
+ ```
32
+
33
+ ## Use Cases
34
+
35
+ Hugging Face Posts is an exploratory dataset to analyze what is being discussed on the [Hugging Face Posts](https://huggingface.co/posts) platform.
36
+
37
+ An embeddings index generated by txtai is a fully encapsulated index format. It doesn't require a database server or dependencies outside of the Python install.
38
+
39
+ ## More information
40
+
41
+ Read more about this model and how it was built in [this article]().
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "autoid": "uuid5",
3
+ "path": "intfloat/e5-large",
4
+ "instructions": {
5
+ "query": "query: ",
6
+ "data": "passage: "
7
+ },
8
+ "content": true,
9
+ "graph": {
10
+ "approximate": false,
11
+ "minscore": 0.7,
12
+ "backend": "networkx"
13
+ },
14
+ "dimensions": 1024,
15
+ "backend": "faiss",
16
+ "offset": 2454,
17
+ "build": {
18
+ "create": "2024-11-21T16:05:41Z",
19
+ "python": "3.9.20",
20
+ "settings": {
21
+ "components": "IDMap,Flat"
22
+ },
23
+ "system": "Linux (x86_64)",
24
+ "txtai": "8.1.0"
25
+ },
26
+ "update": "2024-11-21T16:05:41Z"
27
+ }
documents ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42e4b5c483a9d53376a2f3fb4923181cf3f5af8c8c58f688605768ede1a15dc6
3
+ size 5894144
embeddings ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72b4e4b8a160ffe186296a2a9f76df9f7016b0a138cf5baec6beb9db3869fc52
3
+ size 10071306
graph ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b3c68c09d72da18e813117f28f2b940534b1cf8719f6b78b969a97bb472ce02
3
+ size 2707445