oceansweep
commited on
Commit
•
e88c10c
1
Parent(s):
74df9f9
Update Config_Files/config.txt
Browse files- Config_Files/config.txt +10 -6
Config_Files/config.txt
CHANGED
@@ -61,18 +61,22 @@ chroma_db_path = chroma_db
|
|
61 |
|
62 |
[Embeddings]
|
63 |
embedding_provider = openai
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
#
|
|
|
|
|
70 |
|
71 |
[Chunking]
|
72 |
method = words
|
|
|
73 |
max_size = 400
|
74 |
overlap = 200
|
75 |
adaptive = false
|
|
|
76 |
multi_level = false
|
77 |
language = english
|
78 |
|
|
|
61 |
|
62 |
[Embeddings]
|
63 |
embedding_provider = openai
|
64 |
+
embedding_model = text-embedding-3-small
|
65 |
+
embedding_api_url = http://localhost:8080/v1/embeddings
|
66 |
+
embedding_api_key = your_api_key_here
|
67 |
+
chunk_size = 400
|
68 |
+
overlap = 200
|
69 |
+
# 'embedding_provider' Can be 'openai', 'local', or 'huggingface'
|
70 |
+
# `embedding_model` Set to the model name you want to use for embeddings. For OpenAI, this can be 'text-embedding-3-small', or 'text-embedding-3-large'.
|
71 |
+
# huggingface: model = dunzhang/stella_en_400M_v5
|
72 |
|
73 |
[Chunking]
|
74 |
method = words
|
75 |
+
# 'method' Can be 'words' / 'sentences' / 'paragraphs' / 'semantic' / 'tokens'
|
76 |
max_size = 400
|
77 |
overlap = 200
|
78 |
adaptive = false
|
79 |
+
# Use ntlk+punkt to split text into sentences and then ID average sentence length and set that as the chunk size
|
80 |
multi_level = false
|
81 |
language = english
|
82 |
|