h2ogpt-chatbot / requirements.txt
pseudotensor's picture
Update with h2oGPT hash 880439992dce589c865d5ba3a4f183902f6fc8ec
ae8a3db
raw
history blame
2.09 kB
# for generate (gradio server) and finetune
datasets==2.12.0
sentencepiece==0.1.97
accelerate==0.18.0
gradio==3.31.0
huggingface_hub==0.14.1
appdirs==1.4.4
fire==0.5.0
docutils==0.19
torch==2.0.1
evaluate==0.4.0
rouge_score==0.1.2
sacrebleu==2.3.1
scikit-learn==1.2.2
alt-profanity-check==1.2.2
better-profanity==0.6.1
numpy==1.24.2
pandas==2.0.0
matplotlib==3.7.1
loralib==0.1.1
bitsandbytes==0.38.1
git+https://github.com/huggingface/peft.git@098962fa6515f2e4fe83a757f5995d3ffbb1c373
transformers==4.28.1
tokenizers==0.13.3
APScheduler==3.10.1
# optional for generate
pynvml==11.5.0
psutil==5.9.4
boto3==1.26.101
botocore==1.29.101
# optional for finetune
tensorboard==2.12.1
neptune==1.1.1
# for gradio client
gradio_client==0.2.5
beautifulsoup4==4.12.2
markdown==3.4.1
# data and testing
pytest==7.2.2
pytest-xdist==3.2.1
nltk==3.8.1
textstat==0.7.3
pandoc==2.3
#pypandoc==1.11
pypandoc_binary==1.11
openpyxl==3.1.2
lm_dataformat==0.0.20
bioc==2.0
# To install with constraints
# grep -v '#\|peft' requirements.txt > req_constraints.txt ; pip install -r requirements_optional_langchain.txt -c req_constraints.txt
# optional for chat with PDF
langchain==0.0.178
pypdf==3.8.1
tiktoken==0.3.3
# avoid textract, requires old six
#textract==1.6.5
# choose:
#faiss-cpu
faiss-gpu==1.7.2
# for HF embeddings
sentence_transformers==2.2.2
# for OpenAI embeddings (requires key)
openai==0.27.6
# local vector db
chromadb==0.3.23
# server vector db
#pymilvus==2.2.8
# weak url support, if can't install opencv etc. If comment-in this one, then comment-out unstructured[local-inference]==0.6.6
# unstructured==0.6.6
# strong support for images
# Requires on Ubuntu: sudo apt-get install libmagic-dev poppler-utils tesseract-ocr libreoffice
unstructured[local-inference]==0.6.6
#pdf2image==1.16.3
#pytesseract==0.3.10
pillow
pdfminer.six==20221105
urllib3==1.26.6
requests_file==1.5.1
#pdf2image==1.16.3
#pytesseract==0.3.10
tabulate==0.9.0
# FYI pandoc already part of requirements.txt
jq==1.4.1
# to check licenses
# Run: pip-licenses|grep -v 'BSD\|Apache\|MIT'
pip-licenses==4.3.0