|
|
|
|
|
|
|
database:
|
|
sqlite_path: './Databases/media_summary.db'
|
|
chroma_db_path: 'chroma_db'
|
|
|
|
|
|
chunking:
|
|
default_method: 'sentences'
|
|
default_size: 1000
|
|
default_overlap: 100
|
|
adaptive: true
|
|
language: 'en'
|
|
methods:
|
|
- 'sentences'
|
|
- 'words'
|
|
- 'paragraphs'
|
|
- 'tokens'
|
|
|
|
|
|
import:
|
|
batch_size: 1000
|
|
default_skip_redirects: true
|
|
default_namespaces: [0]
|
|
single_item_default: false
|
|
|
|
|
|
processing:
|
|
max_workers: 4
|
|
|
|
|
|
embeddings:
|
|
provider: 'openai'
|
|
model: 'text-embedding-ada-002'
|
|
api_key: 'your_openai_api_key_here'
|
|
local_url: 'http://localhost:8080/embeddings'
|
|
|
|
|
|
chromadb:
|
|
collection_prefix: 'mediawiki_'
|
|
|
|
|
|
logging:
|
|
level: 'INFO'
|
|
file: 'mediawiki_import.log'
|
|
|
|
|
|
checkpoints:
|
|
enabled: true
|
|
directory: 'import_checkpoints'
|
|
|
|
|
|
error_handling:
|
|
max_retries: 3
|
|
retry_delay: 5
|
|
|
|
|
|
ui:
|
|
default_chunk_size: 1000
|
|
min_chunk_size: 100
|
|
max_chunk_size: 2000
|
|
default_chunk_overlap: 100 |