diff --git "a/gpt_langchain.py" "b/gpt_langchain.py"
new file mode 100644--- /dev/null
+++ "b/gpt_langchain.py"
@@ -0,0 +1,5443 @@
+import ast
+import asyncio
+import copy
+import functools
+import glob
+import gzip
+import inspect
+import json
+import os
+import pathlib
+import pickle
+import shutil
+import subprocess
+import tempfile
+import time
+import traceback
+import types
+import typing
+import urllib.error
+import uuid
+import zipfile
+from collections import defaultdict
+from datetime import datetime
+from functools import reduce
+from operator import concat
+import filelock
+import tabulate
+import yaml
+
+from joblib import delayed
+from langchain.callbacks import streaming_stdout
+from langchain.embeddings import HuggingFaceInstructEmbeddings
+from langchain.llms.huggingface_pipeline import VALID_TASKS
+from langchain.llms.utils import enforce_stop_tokens
+from langchain.schema import LLMResult, Generation
+from langchain.tools import PythonREPLTool
+from langchain.tools.json.tool import JsonSpec
+from tqdm import tqdm
+
+from utils import wrapped_partial, EThread, import_matplotlib, sanitize_filename, makedirs, get_url, flatten_list, \
+    get_device, ProgressParallel, remove, hash_file, clear_torch_cache, NullContext, get_hf_server, FakeTokenizer, \
+    have_libreoffice, have_arxiv, have_playwright, have_selenium, have_tesseract, have_doctr, have_pymupdf, set_openai, \
+    get_list_or_str, have_pillow, only_selenium, only_playwright, only_unstructured_urls, get_sha, get_short_name, \
+    get_accordion, have_jq, get_doc, get_source, have_chromamigdb, get_token_count, reverse_ucurve_list
+from enums import DocumentSubset, no_lora_str, model_token_mapping, source_prefix, source_postfix, non_query_commands, \
+    LangChainAction, LangChainMode, DocumentChoice, LangChainTypes, font_size, head_acc, super_source_prefix, \
+    super_source_postfix, langchain_modes_intrinsic, get_langchain_prompts, LangChainAgent
+from evaluate_params import gen_hyper, gen_hyper0
+from gen import get_model, SEED, get_limited_prompt, get_docs_tokens
+from prompter import non_hf_types, PromptType, Prompter
+from src.serpapi import H2OSerpAPIWrapper
+from utils_langchain import StreamingGradioCallbackHandler, _chunk_sources, _add_meta, add_parser, fix_json_meta
+
+import_matplotlib()
+
+import numpy as np
+import pandas as pd
+import requests
+from langchain.chains.qa_with_sources import load_qa_with_sources_chain
+# , GCSDirectoryLoader, GCSFileLoader
+# , OutlookMessageLoader # GPL3
+# ImageCaptionLoader, # use our own wrapper
+#  ReadTheDocsLoader,  # no special file, some path, so have to give as special option
+from langchain.document_loaders import PyPDFLoader, TextLoader, CSVLoader, PythonLoader, TomlLoader, \
+    UnstructuredURLLoader, UnstructuredHTMLLoader, UnstructuredWordDocumentLoader, UnstructuredMarkdownLoader, \
+    EverNoteLoader, UnstructuredEmailLoader, UnstructuredODTLoader, UnstructuredPowerPointLoader, \
+    UnstructuredEPubLoader, UnstructuredImageLoader, UnstructuredRTFLoader, ArxivLoader, UnstructuredPDFLoader, \
+    UnstructuredExcelLoader, JSONLoader
+from langchain.text_splitter import Language
+from langchain.chains.question_answering import load_qa_chain
+from langchain.docstore.document import Document
+from langchain import PromptTemplate, HuggingFaceTextGenInference, HuggingFacePipeline
+from langchain.vectorstores import Chroma
+from chromamig import ChromaMig
+
+
+def split_list(input_list, split_size):
+    for i in range(0, len(input_list), split_size):
+        yield input_list[i:i + split_size]
+
+
+def get_db(sources, use_openai_embedding=False, db_type='faiss',
+           persist_directory=None, load_db_if_exists=True,
+           langchain_mode='notset',
+           langchain_mode_paths={},
+           langchain_mode_types={},
+           collection_name=None,
+           hf_embedding_model=None,
+           migrate_embedding_model=False,
+           auto_migrate_db=False,
+           n_jobs=-1):
+    if not sources:
+        return None
+    user_path = langchain_mode_paths.get(langchain_mode)
+    if persist_directory is None:
+        langchain_type = langchain_mode_types.get(langchain_mode, LangChainTypes.EITHER.value)
+        persist_directory, langchain_type = get_persist_directory(langchain_mode, langchain_type=langchain_type)
+        langchain_mode_types[langchain_mode] = langchain_type
+    assert hf_embedding_model is not None
+
+    # get freshly-determined embedding model
+    embedding = get_embedding(use_openai_embedding, hf_embedding_model=hf_embedding_model)
+    assert collection_name is not None or langchain_mode != 'notset'
+    if collection_name is None:
+        collection_name = langchain_mode.replace(' ', '_')
+
+    # Create vector database
+    if db_type == 'faiss':
+        from langchain.vectorstores import FAISS
+        db = FAISS.from_documents(sources, embedding)
+    elif db_type == 'weaviate':
+        import weaviate
+        from weaviate.embedded import EmbeddedOptions
+        from langchain.vectorstores import Weaviate
+
+        if os.getenv('WEAVIATE_URL', None):
+            client = _create_local_weaviate_client()
+        else:
+            client = weaviate.Client(
+                embedded_options=EmbeddedOptions(persistence_data_path=persist_directory)
+            )
+        index_name = collection_name.capitalize()
+        db = Weaviate.from_documents(documents=sources, embedding=embedding, client=client, by_text=False,
+                                     index_name=index_name)
+    elif db_type in ['chroma', 'chroma_old']:
+        assert persist_directory is not None
+        # use_base already handled when making persist_directory, unless was passed into get_db()
+        makedirs(persist_directory, exist_ok=True)
+
+        # see if already actually have persistent db, and deal with possible changes in embedding
+        db, use_openai_embedding, hf_embedding_model = \
+            get_existing_db(None, persist_directory, load_db_if_exists, db_type,
+                            use_openai_embedding,
+                            langchain_mode, langchain_mode_paths, langchain_mode_types,
+                            hf_embedding_model, migrate_embedding_model, auto_migrate_db,
+                            verbose=False,
+                            n_jobs=n_jobs)
+        if db is None:
+            import logging
+            logging.getLogger("chromadb").setLevel(logging.ERROR)
+            if db_type == 'chroma':
+                from chromadb.config import Settings
+                settings_extra_kwargs = dict(is_persistent=True)
+            else:
+                from chromamigdb.config import Settings
+                settings_extra_kwargs = dict(chroma_db_impl="duckdb+parquet")
+            client_settings = Settings(anonymized_telemetry=False,
+                                       persist_directory=persist_directory,
+                                       **settings_extra_kwargs)
+            if n_jobs in [None, -1]:
+                n_jobs = int(os.getenv('OMP_NUM_THREADS', str(os.cpu_count() // 2)))
+                num_threads = max(1, min(n_jobs, 8))
+            else:
+                num_threads = max(1, n_jobs)
+            collection_metadata = {"hnsw:num_threads": num_threads}
+            from_kwargs = dict(embedding=embedding,
+                               persist_directory=persist_directory,
+                               collection_name=collection_name,
+                               client_settings=client_settings,
+                               collection_metadata=collection_metadata)
+            if db_type == 'chroma':
+                import chromadb
+                api = chromadb.PersistentClient(path=persist_directory)
+                max_batch_size = api._producer.max_batch_size
+                sources_batches = split_list(sources, max_batch_size)
+                for sources_batch in sources_batches:
+                    db = Chroma.from_documents(documents=sources_batch, **from_kwargs)
+                    db.persist()
+            else:
+                db = ChromaMig.from_documents(documents=sources, **from_kwargs)
+            clear_embedding(db)
+            save_embed(db, use_openai_embedding, hf_embedding_model)
+        else:
+            # then just add
+            # doesn't check or change embedding, just saves it in case not saved yet, after persisting
+            db, num_new_sources, new_sources_metadata = add_to_db(db, sources, db_type=db_type,
+                                                                  use_openai_embedding=use_openai_embedding,
+                                                                  hf_embedding_model=hf_embedding_model)
+    else:
+        raise RuntimeError("No such db_type=%s" % db_type)
+
+    # once here, db is not changing and embedding choices in calling functions does not matter
+    return db
+
+
+def _get_unique_sources_in_weaviate(db):
+    batch_size = 100
+    id_source_list = []
+    result = db._client.data_object.get(class_name=db._index_name, limit=batch_size)
+
+    while result['objects']:
+        id_source_list += [(obj['id'], obj['properties']['source']) for obj in result['objects']]
+        last_id = id_source_list[-1][0]
+        result = db._client.data_object.get(class_name=db._index_name, limit=batch_size, after=last_id)
+
+    unique_sources = {source for _, source in id_source_list}
+    return unique_sources
+
+
+def del_from_db(db, sources, db_type=None):
+    if db_type in ['chroma', 'chroma_old'] and db is not None:
+        # sources should be list of x.metadata['source'] from document metadatas
+        if isinstance(sources, str):
+            sources = [sources]
+        else:
+            assert isinstance(sources, (list, tuple, types.GeneratorType))
+        metadatas = set(sources)
+        client_collection = db._client.get_collection(name=db._collection.name,
+                                                      embedding_function=db._collection._embedding_function)
+        for source in metadatas:
+            meta = dict(source=source)
+            try:
+                client_collection.delete(where=meta)
+            except KeyError:
+                pass
+
+
+def add_to_db(db, sources, db_type='faiss',
+              avoid_dup_by_file=False,
+              avoid_dup_by_content=True,
+              use_openai_embedding=False,
+              hf_embedding_model=None):
+    assert hf_embedding_model is not None
+    num_new_sources = len(sources)
+    if not sources:
+        return db, num_new_sources, []
+    if db_type == 'faiss':
+        db.add_documents(sources)
+    elif db_type == 'weaviate':
+        # FIXME: only control by file name, not hash yet
+        if avoid_dup_by_file or avoid_dup_by_content:
+            unique_sources = _get_unique_sources_in_weaviate(db)
+            sources = [x for x in sources if x.metadata['source'] not in unique_sources]
+        num_new_sources = len(sources)
+        if num_new_sources == 0:
+            return db, num_new_sources, []
+        db.add_documents(documents=sources)
+    elif db_type in ['chroma', 'chroma_old']:
+        collection = get_documents(db)
+        # files we already have:
+        metadata_files = set([x['source'] for x in collection['metadatas']])
+        if avoid_dup_by_file:
+            # Too weak in case file changed content, assume parent shouldn't pass true for this for now
+            raise RuntimeError("Not desired code path")
+        if avoid_dup_by_content:
+            # look at hash, instead of page_content
+            # migration: If no hash previously, avoid updating,
+            #  since don't know if need to update and may be expensive to redo all unhashed files
+            metadata_hash_ids = set(
+                [x['hashid'] for x in collection['metadatas'] if 'hashid' in x and x['hashid'] not in ["None", None]])
+            # avoid sources with same hash
+            sources = [x for x in sources if x.metadata.get('hashid') not in metadata_hash_ids]
+            num_nohash = len([x for x in sources if not x.metadata.get('hashid')])
+            print("Found %s new sources (%d have no hash in original source,"
+                  " so have to reprocess for migration to sources with hash)" % (len(sources), num_nohash), flush=True)
+            # get new file names that match existing file names.  delete existing files we are overridding
+            dup_metadata_files = set([x.metadata['source'] for x in sources if x.metadata['source'] in metadata_files])
+            print("Removing %s duplicate files from db because ingesting those as new documents" % len(
+                dup_metadata_files), flush=True)
+            client_collection = db._client.get_collection(name=db._collection.name,
+                                                          embedding_function=db._collection._embedding_function)
+            for dup_file in dup_metadata_files:
+                dup_file_meta = dict(source=dup_file)
+                try:
+                    client_collection.delete(where=dup_file_meta)
+                except KeyError:
+                    pass
+        num_new_sources = len(sources)
+        if num_new_sources == 0:
+            return db, num_new_sources, []
+        if hasattr(db, '_persist_directory'):
+            print("Existing db, adding to %s" % db._persist_directory, flush=True)
+            # chroma only
+            lock_file = get_db_lock_file(db)
+            context = filelock.FileLock
+        else:
+            lock_file = None
+            context = NullContext
+        with context(lock_file):
+            # this is place where add to db, but others maybe accessing db, so lock access.
+            # else see RuntimeError: Index seems to be corrupted or unsupported
+            import chromadb
+            api = chromadb.PersistentClient(path=db._persist_directory)
+            max_batch_size = api._producer.max_batch_size
+            sources_batches = split_list(sources, max_batch_size)
+            for sources_batch in sources_batches:
+                db.add_documents(documents=sources_batch)
+                db.persist()
+            clear_embedding(db)
+            # save here is for migration, in case old db directory without embedding saved
+            save_embed(db, use_openai_embedding, hf_embedding_model)
+    else:
+        raise RuntimeError("No such db_type=%s" % db_type)
+
+    new_sources_metadata = [x.metadata for x in sources]
+
+    return db, num_new_sources, new_sources_metadata
+
+
+def create_or_update_db(db_type, persist_directory, collection_name,
+                        user_path, langchain_type,
+                        sources, use_openai_embedding, add_if_exists, verbose,
+                        hf_embedding_model, migrate_embedding_model, auto_migrate_db,
+                        n_jobs=-1):
+    if not os.path.isdir(persist_directory) or not add_if_exists:
+        if os.path.isdir(persist_directory):
+            if verbose:
+                print("Removing %s" % persist_directory, flush=True)
+            remove(persist_directory)
+        if verbose:
+            print("Generating db", flush=True)
+    if db_type == 'weaviate':
+        import weaviate
+        from weaviate.embedded import EmbeddedOptions
+
+        if os.getenv('WEAVIATE_URL', None):
+            client = _create_local_weaviate_client()
+        else:
+            client = weaviate.Client(
+                embedded_options=EmbeddedOptions(persistence_data_path=persist_directory)
+            )
+
+        index_name = collection_name.replace(' ', '_').capitalize()
+        if client.schema.exists(index_name) and not add_if_exists:
+            client.schema.delete_class(index_name)
+            if verbose:
+                print("Removing %s" % index_name, flush=True)
+    elif db_type in ['chroma', 'chroma_old']:
+        pass
+
+    if not add_if_exists:
+        if verbose:
+            print("Generating db", flush=True)
+    else:
+        if verbose:
+            print("Loading and updating db", flush=True)
+
+    db = get_db(sources,
+                use_openai_embedding=use_openai_embedding,
+                db_type=db_type,
+                persist_directory=persist_directory,
+                langchain_mode=collection_name,
+                langchain_mode_paths={collection_name: user_path},
+                langchain_mode_types={collection_name: langchain_type},
+                hf_embedding_model=hf_embedding_model,
+                migrate_embedding_model=migrate_embedding_model,
+                auto_migrate_db=auto_migrate_db,
+                n_jobs=n_jobs)
+
+    return db
+
+
+from langchain.embeddings import FakeEmbeddings
+
+
+class H2OFakeEmbeddings(FakeEmbeddings):
+    """Fake embedding model, but constant instead of random"""
+
+    size: int
+    """The size of the embedding vector."""
+
+    def _get_embedding(self) -> typing.List[float]:
+        return [1] * self.size
+
+    def embed_documents(self, texts: typing.List[str]) -> typing.List[typing.List[float]]:
+        return [self._get_embedding() for _ in texts]
+
+    def embed_query(self, text: str) -> typing.List[float]:
+        return self._get_embedding()
+
+
+def get_embedding(use_openai_embedding, hf_embedding_model=None, preload=False):
+    assert hf_embedding_model is not None
+    # Get embedding model
+    if use_openai_embedding:
+        assert os.getenv("OPENAI_API_KEY") is not None, "Set ENV OPENAI_API_KEY"
+        from langchain.embeddings import OpenAIEmbeddings
+        embedding = OpenAIEmbeddings(disallowed_special=())
+    elif hf_embedding_model == 'fake':
+        embedding = H2OFakeEmbeddings(size=1)
+    else:
+        if isinstance(hf_embedding_model, str):
+            pass
+        elif isinstance(hf_embedding_model, dict):
+            # embedding itself preloaded globally
+            return hf_embedding_model['model']
+        else:
+            # object
+            return hf_embedding_model
+        # to ensure can fork without deadlock
+        from langchain.embeddings import HuggingFaceEmbeddings
+
+        device, torch_dtype, context_class = get_device_dtype()
+        model_kwargs = dict(device=device)
+        if 'instructor' in hf_embedding_model:
+            encode_kwargs = {'normalize_embeddings': True}
+            embedding = HuggingFaceInstructEmbeddings(model_name=hf_embedding_model,
+                                                      model_kwargs=model_kwargs,
+                                                      encode_kwargs=encode_kwargs)
+        else:
+            embedding = HuggingFaceEmbeddings(model_name=hf_embedding_model, model_kwargs=model_kwargs)
+        embedding.client.preload = preload
+    return embedding
+
+
+def get_answer_from_sources(chain, sources, question):
+    return chain(
+        {
+            "input_documents": sources,
+            "question": question,
+        },
+        return_only_outputs=True,
+    )["output_text"]
+
+
+"""Wrapper around Huggingface text generation inference API."""
+from functools import partial
+from typing import Any, Dict, List, Optional, Set, Iterable
+
+from pydantic import Extra, Field, root_validator
+
+from langchain.callbacks.manager import CallbackManagerForLLMRun, AsyncCallbackManagerForLLMRun
+from langchain.llms.base import LLM
+
+
+class GradioInference(LLM):
+    """
+    Gradio generation inference API.
+    """
+    inference_server_url: str = ""
+
+    temperature: float = 0.8
+    top_p: Optional[float] = 0.95
+    top_k: Optional[int] = None
+    num_beams: Optional[int] = 1
+    max_new_tokens: int = 512
+    min_new_tokens: int = 1
+    early_stopping: bool = False
+    max_time: int = 180
+    repetition_penalty: Optional[float] = None
+    num_return_sequences: Optional[int] = 1
+    do_sample: bool = False
+    chat_client: bool = False
+
+    return_full_text: bool = False
+    stream_output: bool = False
+    sanitize_bot_response: bool = False
+
+    prompter: Any = None
+    context: Any = ''
+    iinput: Any = ''
+    client: Any = None
+    tokenizer: Any = None
+
+    system_prompt: Any = None
+    visible_models: Any = None
+    h2ogpt_key: Any = None
+
+    count_input_tokens: Any = 0
+    count_output_tokens: Any = 0
+
+    min_max_new_tokens: Any = 256
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        extra = Extra.forbid
+
+    @root_validator()
+    def validate_environment(cls, values: Dict) -> Dict:
+        """Validate that python package exists in environment."""
+
+        try:
+            if values['client'] is None:
+                import gradio_client
+                values["client"] = gradio_client.Client(
+                    values["inference_server_url"]
+                )
+        except ImportError:
+            raise ImportError(
+                "Could not import gradio_client python package. "
+                "Please install it with `pip install gradio_client`."
+            )
+        return values
+
+    @property
+    def _llm_type(self) -> str:
+        """Return type of llm."""
+        return "gradio_inference"
+
+    def _call(
+            self,
+            prompt: str,
+            stop: Optional[List[str]] = None,
+            run_manager: Optional[CallbackManagerForLLMRun] = None,
+            **kwargs: Any,
+    ) -> str:
+        # NOTE: prompt here has no prompt_type (e.g. human: bot:) prompt injection,
+        # so server should get prompt_type or '', not plain
+        # This is good, so gradio server can also handle stopping.py conditions
+        # this is different than TGI server that uses prompter to inject prompt_type prompting
+        stream_output = self.stream_output
+        gr_client = self.client
+        client_langchain_mode = 'Disabled'
+        client_add_chat_history_to_context = True
+        client_add_search_to_context = False
+        client_chat_conversation = []
+        client_langchain_action = LangChainAction.QUERY.value
+        client_langchain_agents = []
+        top_k_docs = 1
+        chunk = True
+        chunk_size = 512
+        client_kwargs = dict(instruction=prompt if self.chat_client else '',  # only for chat=True
+                             iinput=self.iinput if self.chat_client else '',  # only for chat=True
+                             context=self.context,
+                             # streaming output is supported, loops over and outputs each generation in streaming mode
+                             # but leave stream_output=False for simple input/output mode
+                             stream_output=stream_output,
+                             prompt_type=self.prompter.prompt_type,
+                             prompt_dict='',
+
+                             temperature=self.temperature,
+                             top_p=self.top_p,
+                             top_k=self.top_k,
+                             num_beams=self.num_beams,
+                             max_new_tokens=self.max_new_tokens,
+                             min_new_tokens=self.min_new_tokens,
+                             early_stopping=self.early_stopping,
+                             max_time=self.max_time,
+                             repetition_penalty=self.repetition_penalty,
+                             num_return_sequences=self.num_return_sequences,
+                             do_sample=self.do_sample,
+                             chat=self.chat_client,
+
+                             instruction_nochat=prompt if not self.chat_client else '',
+                             iinput_nochat=self.iinput if not self.chat_client else '',
+                             langchain_mode=client_langchain_mode,
+                             add_chat_history_to_context=client_add_chat_history_to_context,
+                             langchain_action=client_langchain_action,
+                             langchain_agents=client_langchain_agents,
+                             top_k_docs=top_k_docs,
+                             chunk=chunk,
+                             chunk_size=chunk_size,
+                             document_subset=DocumentSubset.Relevant.name,
+                             document_choice=[DocumentChoice.ALL.value],
+                             pre_prompt_query=None,
+                             prompt_query=None,
+                             pre_prompt_summary=None,
+                             prompt_summary=None,
+                             system_prompt=self.system_prompt,
+                             image_loaders=None,  # don't need to further do doc specific things
+                             pdf_loaders=None,  # don't need to further do doc specific things
+                             url_loaders=None,  # don't need to further do doc specific things
+                             jq_schema=None,  # don't need to further do doc specific things
+                             visible_models=self.visible_models,
+                             h2ogpt_key=self.h2ogpt_key,
+                             add_search_to_context=client_add_search_to_context,
+                             chat_conversation=client_chat_conversation,
+                             text_context_list=None,
+                             docs_ordering_type=None,
+                             min_max_new_tokens=self.min_max_new_tokens,
+                             )
+        api_name = '/submit_nochat_api'  # NOTE: like submit_nochat but stable API for string dict passing
+        self.count_input_tokens += self.get_num_tokens(prompt)
+
+        if not stream_output:
+            res = gr_client.predict(str(dict(client_kwargs)), api_name=api_name)
+            res_dict = ast.literal_eval(res)
+            text = res_dict['response']
+            ret = self.prompter.get_response(prompt + text, prompt=prompt,
+                                             sanitize_bot_response=self.sanitize_bot_response)
+            self.count_output_tokens += self.get_num_tokens(ret)
+            return ret
+        else:
+            text_callback = None
+            if run_manager:
+                text_callback = partial(
+                    run_manager.on_llm_new_token, verbose=self.verbose
+                )
+
+            job = gr_client.submit(str(dict(client_kwargs)), api_name=api_name)
+            text0 = ''
+            while not job.done():
+                if job.communicator.job.latest_status.code.name == 'FINISHED':
+                    break
+                e = job.future._exception
+                if e is not None:
+                    break
+                outputs_list = job.communicator.job.outputs
+                if outputs_list:
+                    res = job.communicator.job.outputs[-1]
+                    res_dict = ast.literal_eval(res)
+                    text = res_dict['response']
+                    text = self.prompter.get_response(prompt + text, prompt=prompt,
+                                                      sanitize_bot_response=self.sanitize_bot_response)
+                    # FIXME: derive chunk from full for now
+                    text_chunk = text[len(text0):]
+                    if not text_chunk:
+                        continue
+                    # save old
+                    text0 = text
+
+                    if text_callback:
+                        text_callback(text_chunk)
+
+                time.sleep(0.01)
+
+            # ensure get last output to avoid race
+            res_all = job.outputs()
+            if len(res_all) > 0:
+                res = res_all[-1]
+                res_dict = ast.literal_eval(res)
+                text = res_dict['response']
+                # FIXME: derive chunk from full for now
+            else:
+                # go with old if failure
+                text = text0
+            text_chunk = text[len(text0):]
+            if text_callback:
+                text_callback(text_chunk)
+            ret = self.prompter.get_response(prompt + text, prompt=prompt,
+                                             sanitize_bot_response=self.sanitize_bot_response)
+            self.count_output_tokens += self.get_num_tokens(ret)
+            return ret
+
+    def get_token_ids(self, text: str) -> List[int]:
+        return self.tokenizer.encode(text)
+        # avoid base method that is not aware of how to properly tokenize (uses GPT2)
+        # return _get_token_ids_default_method(text)
+
+
+class H2OHuggingFaceTextGenInference(HuggingFaceTextGenInference):
+    max_new_tokens: int = 512
+    do_sample: bool = False
+    top_k: Optional[int] = None
+    top_p: Optional[float] = 0.95
+    typical_p: Optional[float] = 0.95
+    temperature: float = 0.8
+    repetition_penalty: Optional[float] = None
+    return_full_text: bool = False
+    stop_sequences: List[str] = Field(default_factory=list)
+    seed: Optional[int] = None
+    inference_server_url: str = ""
+    timeout: int = 300
+    headers: dict = None
+    stream_output: bool = False
+    sanitize_bot_response: bool = False
+    prompter: Any = None
+    context: Any = ''
+    iinput: Any = ''
+    tokenizer: Any = None
+    async_sem: Any = None
+    count_input_tokens: Any = 0
+    count_output_tokens: Any = 0
+
+    def _call(
+            self,
+            prompt: str,
+            stop: Optional[List[str]] = None,
+            run_manager: Optional[CallbackManagerForLLMRun] = None,
+            **kwargs: Any,
+    ) -> str:
+        if stop is None:
+            stop = self.stop_sequences.copy()
+        else:
+            stop += self.stop_sequences.copy()
+        stop_tmp = stop.copy()
+        stop = []
+        [stop.append(x) for x in stop_tmp if x not in stop]
+
+        # HF inference server needs control over input tokens
+        assert self.tokenizer is not None
+        from h2oai_pipeline import H2OTextGenerationPipeline
+        prompt, num_prompt_tokens = H2OTextGenerationPipeline.limit_prompt(prompt, self.tokenizer)
+
+        # NOTE: TGI server does not add prompting, so must do here
+        data_point = dict(context=self.context, instruction=prompt, input=self.iinput)
+        prompt = self.prompter.generate_prompt(data_point)
+        self.count_input_tokens += self.get_num_tokens(prompt)
+
+        gen_server_kwargs = dict(do_sample=self.do_sample,
+                                 stop_sequences=stop,
+                                 max_new_tokens=self.max_new_tokens,
+                                 top_k=self.top_k,
+                                 top_p=self.top_p,
+                                 typical_p=self.typical_p,
+                                 temperature=self.temperature,
+                                 repetition_penalty=self.repetition_penalty,
+                                 return_full_text=self.return_full_text,
+                                 seed=self.seed,
+                                 )
+        gen_server_kwargs.update(kwargs)
+
+        # lower bound because client is re-used if multi-threading
+        self.client.timeout = max(300, self.timeout)
+
+        if not self.stream_output:
+            res = self.client.generate(
+                prompt,
+                **gen_server_kwargs,
+            )
+            if self.return_full_text:
+                gen_text = res.generated_text[len(prompt):]
+            else:
+                gen_text = res.generated_text
+            # remove stop sequences from the end of the generated text
+            for stop_seq in stop:
+                if stop_seq in gen_text:
+                    gen_text = gen_text[:gen_text.index(stop_seq)]
+            text = prompt + gen_text
+            text = self.prompter.get_response(text, prompt=prompt,
+                                              sanitize_bot_response=self.sanitize_bot_response)
+        else:
+            text_callback = None
+            if run_manager:
+                text_callback = partial(
+                    run_manager.on_llm_new_token, verbose=self.verbose
+                )
+            # parent handler of streamer expects to see prompt first else output="" and lose if prompt=None in prompter
+            if text_callback:
+                text_callback(prompt)
+            text = ""
+            # Note: Streaming ignores return_full_text=True
+            for response in self.client.generate_stream(prompt, **gen_server_kwargs):
+                text_chunk = response.token.text
+                text += text_chunk
+                text = self.prompter.get_response(prompt + text, prompt=prompt,
+                                                  sanitize_bot_response=self.sanitize_bot_response)
+                # stream part
+                is_stop = False
+                for stop_seq in stop:
+                    if stop_seq in text_chunk:
+                        is_stop = True
+                        break
+                if is_stop:
+                    break
+                if not response.token.special:
+                    if text_callback:
+                        text_callback(text_chunk)
+        self.count_output_tokens += self.get_num_tokens(text)
+        return text
+
+    async def _acall(
+            self,
+            prompt: str,
+            stop: Optional[List[str]] = None,
+            run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+            **kwargs: Any,
+    ) -> str:
+        # print("acall", flush=True)
+        if stop is None:
+            stop = self.stop_sequences.copy()
+        else:
+            stop += self.stop_sequences.copy()
+        stop_tmp = stop.copy()
+        stop = []
+        [stop.append(x) for x in stop_tmp if x not in stop]
+
+        # HF inference server needs control over input tokens
+        assert self.tokenizer is not None
+        from h2oai_pipeline import H2OTextGenerationPipeline
+        prompt, num_prompt_tokens = H2OTextGenerationPipeline.limit_prompt(prompt, self.tokenizer)
+
+        # NOTE: TGI server does not add prompting, so must do here
+        data_point = dict(context=self.context, instruction=prompt, input=self.iinput)
+        prompt = self.prompter.generate_prompt(data_point)
+
+        gen_text = await super()._acall(prompt, stop=stop, run_manager=run_manager, **kwargs)
+
+        # remove stop sequences from the end of the generated text
+        for stop_seq in stop:
+            if stop_seq in gen_text:
+                gen_text = gen_text[:gen_text.index(stop_seq)]
+        text = prompt + gen_text
+        text = self.prompter.get_response(text, prompt=prompt,
+                                          sanitize_bot_response=self.sanitize_bot_response)
+        # print("acall done", flush=True)
+        return text
+
+    async def _agenerate(
+            self,
+            prompts: List[str],
+            stop: Optional[List[str]] = None,
+            run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+            **kwargs: Any,
+    ) -> LLMResult:
+        """Run the LLM on the given prompt and input."""
+        generations = []
+        new_arg_supported = inspect.signature(self._acall).parameters.get("run_manager")
+        self.count_input_tokens += sum([self.get_num_tokens(prompt) for prompt in prompts])
+        tasks = [
+            asyncio.ensure_future(self._agenerate_one(prompt, stop=stop, run_manager=run_manager,
+                                                      new_arg_supported=new_arg_supported, **kwargs))
+            for prompt in prompts
+        ]
+        texts = await asyncio.gather(*tasks)
+        self.count_output_tokens += sum([self.get_num_tokens(text) for text in texts])
+        [generations.append([Generation(text=text)]) for text in texts]
+        return LLMResult(generations=generations)
+
+    async def _agenerate_one(
+            self,
+            prompt: str,
+            stop: Optional[List[str]] = None,
+            run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+            new_arg_supported=None,
+            **kwargs: Any,
+    ) -> str:
+        async with self.async_sem:  # semaphore limits num of simultaneous downloads
+            return await self._acall(prompt, stop=stop, run_manager=run_manager, **kwargs) \
+                if new_arg_supported else \
+                await self._acall(prompt, stop=stop, **kwargs)
+
+    def get_token_ids(self, text: str) -> List[int]:
+        return self.tokenizer.encode(text)
+        # avoid base method that is not aware of how to properly tokenize (uses GPT2)
+        # return _get_token_ids_default_method(text)
+
+
+from langchain.chat_models import ChatOpenAI, AzureChatOpenAI
+from langchain.llms import OpenAI, AzureOpenAI, Replicate
+from langchain.llms.openai import _streaming_response_template, completion_with_retry, _update_response, \
+    update_token_usage
+
+
+class H2OOpenAI(OpenAI):
+    """
+    New class to handle vLLM's use of OpenAI, no vllm_chat supported, so only need here
+    Handles prompting that OpenAI doesn't need, stopping as well
+    """
+    stop_sequences: Any = None
+    sanitize_bot_response: bool = False
+    prompter: Any = None
+    context: Any = ''
+    iinput: Any = ''
+    tokenizer: Any = None
+
+    @classmethod
+    def _all_required_field_names(cls) -> Set:
+        _all_required_field_names = super(OpenAI, cls)._all_required_field_names()
+        _all_required_field_names.update(
+            {'top_p', 'frequency_penalty', 'presence_penalty', 'stop_sequences', 'sanitize_bot_response', 'prompter',
+             'tokenizer', 'logit_bias'})
+        return _all_required_field_names
+
+    def _generate(
+            self,
+            prompts: List[str],
+            stop: Optional[List[str]] = None,
+            run_manager: Optional[CallbackManagerForLLMRun] = None,
+            **kwargs: Any,
+    ) -> LLMResult:
+        stop_tmp = self.stop_sequences if not stop else self.stop_sequences + stop
+        stop = []
+        [stop.append(x) for x in stop_tmp if x not in stop]
+
+        # HF inference server needs control over input tokens
+        assert self.tokenizer is not None
+        from h2oai_pipeline import H2OTextGenerationPipeline
+        for prompti, prompt in enumerate(prompts):
+            prompt, num_prompt_tokens = H2OTextGenerationPipeline.limit_prompt(prompt, self.tokenizer)
+            # NOTE: OpenAI/vLLM server does not add prompting, so must do here
+            data_point = dict(context=self.context, instruction=prompt, input=self.iinput)
+            prompt = self.prompter.generate_prompt(data_point)
+            prompts[prompti] = prompt
+
+        params = self._invocation_params
+        params = {**params, **kwargs}
+        sub_prompts = self.get_sub_prompts(params, prompts, stop)
+        choices = []
+        token_usage: Dict[str, int] = {}
+        # Get the token usage from the response.
+        # Includes prompt, completion, and total tokens used.
+        _keys = {"completion_tokens", "prompt_tokens", "total_tokens"}
+        text = ''
+        for _prompts in sub_prompts:
+            if self.streaming:
+                text_with_prompt = ""
+                prompt = _prompts[0]
+                if len(_prompts) > 1:
+                    raise ValueError("Cannot stream results with multiple prompts.")
+                params["stream"] = True
+                response = _streaming_response_template()
+                first = True
+                for stream_resp in completion_with_retry(
+                        self, prompt=_prompts, **params
+                ):
+                    if first:
+                        stream_resp["choices"][0]["text"] = prompt + stream_resp["choices"][0]["text"]
+                        first = False
+                    text_chunk = stream_resp["choices"][0]["text"]
+                    text_with_prompt += text_chunk
+                    text = self.prompter.get_response(text_with_prompt, prompt=prompt,
+                                                      sanitize_bot_response=self.sanitize_bot_response)
+                    if run_manager:
+                        run_manager.on_llm_new_token(
+                            text_chunk,
+                            verbose=self.verbose,
+                            logprobs=stream_resp["choices"][0]["logprobs"],
+                        )
+                    _update_response(response, stream_resp)
+                choices.extend(response["choices"])
+            else:
+                response = completion_with_retry(self, prompt=_prompts, **params)
+                choices.extend(response["choices"])
+            if not self.streaming:
+                # Can't update token usage if streaming
+                update_token_usage(_keys, response, token_usage)
+        if self.streaming:
+            choices[0]['text'] = text
+        return self.create_llm_result(choices, prompts, token_usage)
+
+    def get_token_ids(self, text: str) -> List[int]:
+        if self.tokenizer is not None:
+            return self.tokenizer.encode(text)
+        else:
+            # OpenAI uses tiktoken
+            return super().get_token_ids(text)
+
+
+class H2OReplicate(Replicate):
+    stop_sequences: Any = None
+    sanitize_bot_response: bool = False
+    prompter: Any = None
+    context: Any = ''
+    iinput: Any = ''
+    tokenizer: Any = None
+
+    def _call(
+            self,
+            prompt: str,
+            stop: Optional[List[str]] = None,
+            run_manager: Optional[CallbackManagerForLLMRun] = None,
+            **kwargs: Any,
+    ) -> str:
+        """Call to replicate endpoint."""
+        stop_tmp = self.stop_sequences if not stop else self.stop_sequences + stop
+        stop = []
+        [stop.append(x) for x in stop_tmp if x not in stop]
+
+        # HF inference server needs control over input tokens
+        assert self.tokenizer is not None
+        from h2oai_pipeline import H2OTextGenerationPipeline
+        prompt, num_prompt_tokens = H2OTextGenerationPipeline.limit_prompt(prompt, self.tokenizer)
+        # Note Replicate handles the prompting of the specific model
+        return super()._call(prompt, stop=stop, run_manager=run_manager, **kwargs)
+
+    def get_token_ids(self, text: str) -> List[int]:
+        return self.tokenizer.encode(text)
+        # avoid base method that is not aware of how to properly tokenize (uses GPT2)
+        # return _get_token_ids_default_method(text)
+
+
+class H2OChatOpenAI(ChatOpenAI):
+    @classmethod
+    def _all_required_field_names(cls) -> Set:
+        _all_required_field_names = super(ChatOpenAI, cls)._all_required_field_names()
+        _all_required_field_names.update({'top_p', 'frequency_penalty', 'presence_penalty', 'logit_bias'})
+        return _all_required_field_names
+
+
+class H2OAzureChatOpenAI(AzureChatOpenAI):
+    @classmethod
+    def _all_required_field_names(cls) -> Set:
+        _all_required_field_names = super(AzureChatOpenAI, cls)._all_required_field_names()
+        _all_required_field_names.update({'top_p', 'frequency_penalty', 'presence_penalty', 'logit_bias'})
+        return _all_required_field_names
+
+
+class H2OAzureOpenAI(AzureOpenAI):
+    @classmethod
+    def _all_required_field_names(cls) -> Set:
+        _all_required_field_names = super(AzureOpenAI, cls)._all_required_field_names()
+        _all_required_field_names.update({'top_p', 'frequency_penalty', 'presence_penalty', 'logit_bias'})
+        return _all_required_field_names
+
+
+class H2OHuggingFacePipeline(HuggingFacePipeline):
+    def _call(
+            self,
+            prompt: str,
+            stop: Optional[List[str]] = None,
+            run_manager: Optional[CallbackManagerForLLMRun] = None,
+            **kwargs: Any,
+    ) -> str:
+        response = self.pipeline(prompt, stop=stop)
+        if self.pipeline.task == "text-generation":
+            # Text generation return includes the starter text.
+            text = response[0]["generated_text"][len(prompt):]
+        elif self.pipeline.task == "text2text-generation":
+            text = response[0]["generated_text"]
+        elif self.pipeline.task == "summarization":
+            text = response[0]["summary_text"]
+        else:
+            raise ValueError(
+                f"Got invalid task {self.pipeline.task}, "
+                f"currently only {VALID_TASKS} are supported"
+            )
+        if stop:
+            # This is a bit hacky, but I can't figure out a better way to enforce
+            # stop tokens when making calls to huggingface_hub.
+            text = enforce_stop_tokens(text, stop)
+        return text
+
+
+def get_llm(use_openai_model=False,
+            model_name=None,
+            model=None,
+            tokenizer=None,
+            inference_server=None,
+            langchain_only_model=None,
+            stream_output=False,
+            async_output=True,
+            num_async=3,
+            do_sample=False,
+            temperature=0.1,
+            top_k=40,
+            top_p=0.7,
+            num_beams=1,
+            max_new_tokens=512,
+            min_new_tokens=1,
+            early_stopping=False,
+            max_time=180,
+            repetition_penalty=1.0,
+            num_return_sequences=1,
+            prompt_type=None,
+            prompt_dict=None,
+            prompter=None,
+            context=None,
+            iinput=None,
+            sanitize_bot_response=False,
+            system_prompt='',
+            visible_models=0,
+            h2ogpt_key=None,
+            min_max_new_tokens=None,
+            n_jobs=None,
+            cli=False,
+            llamacpp_dict=None,
+            verbose=False,
+            ):
+    # currently all but h2oai_pipeline case return prompt + new text, but could change
+    only_new_text = False
+
+    if n_jobs in [None, -1]:
+        n_jobs = int(os.getenv('OMP_NUM_THREADS', str(os.cpu_count() // 2)))
+    if inference_server is None:
+        inference_server = ''
+    if inference_server.startswith('replicate'):
+        model_string = ':'.join(inference_server.split(':')[1:])
+        if 'meta/llama' in model_string:
+            temperature = max(0.01, temperature if do_sample else 0)
+        else:
+            temperature =temperature if do_sample else 0
+        gen_kwargs = dict(temperature=temperature,
+                          seed=1234,
+                          max_length=max_new_tokens,  # langchain
+                          max_new_tokens=max_new_tokens,  # replicate docs
+                          top_p=top_p if do_sample else 1,
+                          top_k=top_k,  # not always supported
+                          repetition_penalty=repetition_penalty)
+        if system_prompt in [None, 'None', 'auto']:
+            if prompter.system_prompt:
+                system_prompt = prompter.system_prompt
+            else:
+                system_prompt = ''
+        if system_prompt:
+            gen_kwargs.update(dict(system_prompt=system_prompt))
+
+        # replicate handles prompting, so avoid get_response() filter
+        prompter.prompt_type = 'plain'
+        if stream_output:
+            callbacks = [StreamingGradioCallbackHandler()]
+            streamer = callbacks[0] if stream_output else None
+            llm = H2OReplicate(
+                streaming=True,
+                callbacks=callbacks,
+                model=model_string,
+                input=gen_kwargs,
+                stop=prompter.stop_sequences,
+                stop_sequences=prompter.stop_sequences,
+                sanitize_bot_response=sanitize_bot_response,
+                prompter=prompter,
+                context=context,
+                iinput=iinput,
+                tokenizer=tokenizer,
+            )
+        else:
+            streamer = None
+            llm = H2OReplicate(
+                model=model_string,
+                input=gen_kwargs,
+                stop=prompter.stop_sequences,
+                stop_sequences=prompter.stop_sequences,
+                sanitize_bot_response=sanitize_bot_response,
+                prompter=prompter,
+                context=context,
+                iinput=iinput,
+                tokenizer=tokenizer,
+            )
+    elif use_openai_model or inference_server.startswith('openai') or inference_server.startswith('vllm'):
+        if use_openai_model and model_name is None:
+            model_name = "gpt-3.5-turbo"
+        # FIXME: Will later import be ignored?  I think so, so should be fine
+        openai, inf_type, deployment_name, base_url, api_version = set_openai(inference_server)
+        kwargs_extra = {}
+        if inf_type == 'openai_chat' or inf_type == 'vllm_chat':
+            cls = H2OChatOpenAI
+            # FIXME: Support context, iinput
+            # if inf_type == 'vllm_chat':
+            #    kwargs_extra.update(dict(tokenizer=tokenizer))
+            openai_api_key = openai.api_key
+        elif inf_type == 'openai_azure_chat':
+            cls = H2OAzureChatOpenAI
+            kwargs_extra.update(dict(openai_api_type='azure'))
+            # FIXME: Support context, iinput
+            if os.getenv('OPENAI_AZURE_KEY') is not None:
+                openai_api_key = os.getenv('OPENAI_AZURE_KEY')
+            else:
+                openai_api_key = openai.api_key
+        elif inf_type == 'openai_azure':
+            cls = H2OAzureOpenAI
+            kwargs_extra.update(dict(openai_api_type='azure'))
+            # FIXME: Support context, iinput
+            if os.getenv('OPENAI_AZURE_KEY') is not None:
+                openai_api_key = os.getenv('OPENAI_AZURE_KEY')
+            else:
+                openai_api_key = openai.api_key
+        else:
+            cls = H2OOpenAI
+            if inf_type == 'vllm':
+                kwargs_extra.update(dict(stop_sequences=prompter.stop_sequences,
+                                         sanitize_bot_response=sanitize_bot_response,
+                                         prompter=prompter,
+                                         context=context,
+                                         iinput=iinput,
+                                         tokenizer=tokenizer,
+                                         openai_api_base=openai.api_base,
+                                         client=None))
+            else:
+                assert inf_type == 'openai' or use_openai_model
+            openai_api_key = openai.api_key
+
+        if deployment_name:
+            kwargs_extra.update(dict(deployment_name=deployment_name))
+        if api_version:
+            kwargs_extra.update(dict(openai_api_version=api_version))
+        elif openai.api_version:
+            kwargs_extra.update(dict(openai_api_version=openai.api_version))
+        elif inf_type in ['openai_azure', 'openai_azure_chat']:
+            kwargs_extra.update(dict(openai_api_version="2023-05-15"))
+        if base_url:
+            kwargs_extra.update(dict(openai_api_base=base_url))
+        else:
+            kwargs_extra.update(dict(openai_api_base=openai.api_base))
+
+        callbacks = [StreamingGradioCallbackHandler()]
+        llm = cls(model_name=model_name,
+                  temperature=temperature if do_sample else 0,
+                  # FIXME: Need to count tokens and reduce max_new_tokens to fit like in generate.py
+                  max_tokens=max_new_tokens,
+                  top_p=top_p if do_sample else 1,
+                  frequency_penalty=0,
+                  presence_penalty=1.07 - repetition_penalty + 0.6,  # so good default
+                  callbacks=callbacks if stream_output else None,
+                  openai_api_key=openai_api_key,
+                  logit_bias=None if inf_type == 'vllm' else {},
+                  max_retries=6,
+                  streaming=stream_output,
+                  **kwargs_extra
+                  )
+        streamer = callbacks[0] if stream_output else None
+        if inf_type in ['openai', 'openai_chat', 'openai_azure', 'openai_azure_chat']:
+            prompt_type = inference_server
+        else:
+            # vllm goes here
+            prompt_type = prompt_type or 'plain'
+    elif inference_server and inference_server.startswith('sagemaker'):
+        callbacks = [StreamingGradioCallbackHandler()]  # FIXME
+        streamer = None
+
+        endpoint_name = ':'.join(inference_server.split(':')[1:2])
+        region_name = ':'.join(inference_server.split(':')[2:])
+
+        from sagemaker import H2OSagemakerEndpoint, ChatContentHandler, BaseContentHandler
+        if inference_server.startswith('sagemaker_chat'):
+            content_handler = ChatContentHandler()
+        else:
+            content_handler = BaseContentHandler()
+        model_kwargs = dict(temperature=temperature if do_sample else 1E-10,
+                            return_full_text=False, top_p=top_p, max_new_tokens=max_new_tokens)
+        llm = H2OSagemakerEndpoint(
+            endpoint_name=endpoint_name,
+            region_name=region_name,
+            aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID'),
+            aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY'),
+            model_kwargs=model_kwargs,
+            content_handler=content_handler,
+            endpoint_kwargs={'CustomAttributes': 'accept_eula=true'},
+        )
+    elif inference_server:
+        assert inference_server.startswith(
+            'http'), "Malformed inference_server=%s.  Did you add http:// in front?" % inference_server
+
+        from gradio_utils.grclient import GradioClient
+        from text_generation import Client as HFClient
+        if isinstance(model, GradioClient):
+            gr_client = model
+            hf_client = None
+        else:
+            gr_client = None
+            hf_client = model
+            assert isinstance(hf_client, HFClient)
+
+        inference_server, headers = get_hf_server(inference_server)
+
+        # quick sanity check to avoid long timeouts, just see if can reach server
+        requests.get(inference_server, timeout=int(os.getenv('REQUEST_TIMEOUT_FAST', '10')))
+        callbacks = [StreamingGradioCallbackHandler()]
+
+        if gr_client:
+            async_output = False  # FIXME: not implemented yet
+            chat_client = False
+            llm = GradioInference(
+                inference_server_url=inference_server,
+                return_full_text=False,
+
+                temperature=temperature,
+                top_p=top_p,
+                top_k=top_k,
+                num_beams=num_beams,
+                max_new_tokens=max_new_tokens,
+                min_new_tokens=min_new_tokens,
+                early_stopping=early_stopping,
+                max_time=max_time,
+                repetition_penalty=repetition_penalty,
+                num_return_sequences=num_return_sequences,
+                do_sample=do_sample,
+                chat_client=chat_client,
+
+                callbacks=callbacks if stream_output else None,
+                stream_output=stream_output,
+                prompter=prompter,
+                context=context,
+                iinput=iinput,
+                client=gr_client,
+                sanitize_bot_response=sanitize_bot_response,
+                tokenizer=tokenizer,
+                system_prompt=system_prompt,
+                visible_models=visible_models,
+                h2ogpt_key=h2ogpt_key,
+                min_max_new_tokens=min_max_new_tokens,
+            )
+        elif hf_client:
+            # no need to pass original client, no state and fast, so can use same validate_environment from base class
+            async_sem = asyncio.Semaphore(num_async) if async_output else NullContext()
+            llm = H2OHuggingFaceTextGenInference(
+                inference_server_url=inference_server,
+                do_sample=do_sample,
+                max_new_tokens=max_new_tokens,
+                repetition_penalty=repetition_penalty,
+                return_full_text=False,  # this only controls internal behavior, still returns processed text
+                seed=SEED,
+
+                stop_sequences=prompter.stop_sequences,
+                temperature=temperature,
+                top_k=top_k,
+                top_p=top_p,
+                # typical_p=top_p,
+                callbacks=callbacks if stream_output else None,
+                stream_output=stream_output,
+                prompter=prompter,
+                context=context,
+                iinput=iinput,
+                tokenizer=tokenizer,
+                timeout=max_time,
+                sanitize_bot_response=sanitize_bot_response,
+                async_sem=async_sem,
+            )
+        else:
+            raise RuntimeError("No defined client")
+        streamer = callbacks[0] if stream_output else None
+    elif model_name in non_hf_types:
+        async_output = False  # FIXME: not implemented yet
+        assert langchain_only_model
+        if model_name == 'llama':
+            callbacks = [StreamingGradioCallbackHandler()]
+            streamer = callbacks[0] if stream_output else None
+        else:
+            # stream_output = False
+            # doesn't stream properly as generator, but at least
+            callbacks = [streaming_stdout.StreamingStdOutCallbackHandler()]
+            streamer = None
+        if prompter:
+            prompt_type = prompter.prompt_type
+        else:
+            prompter = Prompter(prompt_type, prompt_dict, debug=False, chat=False, stream_output=stream_output)
+            pass  # assume inputted prompt_type is correct
+        from gpt4all_llm import get_llm_gpt4all
+        max_max_tokens = tokenizer.model_max_length
+        llm = get_llm_gpt4all(model_name,
+                              model=model,
+                              max_new_tokens=max_new_tokens,
+                              temperature=temperature,
+                              repetition_penalty=repetition_penalty,
+                              top_k=top_k,
+                              top_p=top_p,
+                              callbacks=callbacks,
+                              n_jobs=n_jobs,
+                              verbose=verbose,
+                              streaming=stream_output,
+                              prompter=prompter,
+                              context=context,
+                              iinput=iinput,
+                              max_seq_len=max_max_tokens,
+                              llamacpp_dict=llamacpp_dict,
+                              )
+    elif hasattr(model, 'is_exlama') and model.is_exlama():
+        async_output = False  # FIXME: not implemented yet
+        assert langchain_only_model
+        callbacks = [StreamingGradioCallbackHandler()]
+        streamer = callbacks[0] if stream_output else None
+        max_max_tokens = tokenizer.model_max_length
+
+        from src.llm_exllama import Exllama
+        llm = Exllama(streaming=stream_output,
+                      model_path=None,
+                      model=model,
+                      lora_path=None,
+                      temperature=temperature,
+                      top_k=top_k,
+                      top_p=top_p,
+                      typical=.7,
+                      beams=1,
+                      # beam_length = 40,
+                      stop_sequences=prompter.stop_sequences,
+                      callbacks=callbacks,
+                      verbose=verbose,
+                      max_seq_len=max_max_tokens,
+                      fused_attn=False,
+                      # alpha_value = 1.0, #For use with any models
+                      # compress_pos_emb = 4.0, #For use with superhot
+                      # set_auto_map = "3, 2" #Gpu split, this will split 3gigs/2gigs
+                      prompter=prompter,
+                      context=context,
+                      iinput=iinput,
+                      )
+    else:
+        async_output = False  # FIXME: not implemented yet
+        if model is None:
+            # only used if didn't pass model in
+            assert tokenizer is None
+            prompt_type = 'human_bot'
+            if model_name is None:
+                model_name = 'h2oai/h2ogpt-oasst1-512-12b'
+                # model_name = 'h2oai/h2ogpt-oig-oasst1-512-6_9b'
+                # model_name = 'h2oai/h2ogpt-oasst1-512-20b'
+            inference_server = ''
+            model, tokenizer, device = get_model(load_8bit=True, base_model=model_name,
+                                                 inference_server=inference_server, gpu_id=0)
+
+        max_max_tokens = tokenizer.model_max_length
+        only_new_text = True
+        gen_kwargs = dict(do_sample=do_sample,
+                          num_beams=num_beams,
+                          max_new_tokens=max_new_tokens,
+                          min_new_tokens=min_new_tokens,
+                          early_stopping=early_stopping,
+                          max_time=max_time,
+                          repetition_penalty=repetition_penalty,
+                          num_return_sequences=num_return_sequences,
+                          return_full_text=not only_new_text,
+                          handle_long_generation=None)
+        if do_sample:
+            gen_kwargs.update(dict(temperature=temperature,
+                                   top_k=top_k,
+                                   top_p=top_p))
+            assert len(set(gen_hyper).difference(gen_kwargs.keys())) == 0
+        else:
+            assert len(set(gen_hyper0).difference(gen_kwargs.keys())) == 0
+
+        if stream_output:
+            skip_prompt = only_new_text
+            from gen import H2OTextIteratorStreamer
+            decoder_kwargs = {}
+            streamer = H2OTextIteratorStreamer(tokenizer, skip_prompt=skip_prompt, block=False, **decoder_kwargs)
+            gen_kwargs.update(dict(streamer=streamer))
+        else:
+            streamer = None
+
+        from h2oai_pipeline import H2OTextGenerationPipeline
+        pipe = H2OTextGenerationPipeline(model=model, use_prompter=True,
+                                         prompter=prompter,
+                                         context=context,
+                                         iinput=iinput,
+                                         prompt_type=prompt_type,
+                                         prompt_dict=prompt_dict,
+                                         sanitize_bot_response=sanitize_bot_response,
+                                         chat=False, stream_output=stream_output,
+                                         tokenizer=tokenizer,
+                                         # leave some room for 1 paragraph, even if min_new_tokens=0
+                                         max_input_tokens=max_max_tokens - max(min_new_tokens, 256),
+                                         base_model=model_name,
+                                         **gen_kwargs)
+        # pipe.task = "text-generation"
+        # below makes it listen only to our prompt removal,
+        # not built in prompt removal that is less general and not specific for our model
+        pipe.task = "text2text-generation"
+
+        llm = H2OHuggingFacePipeline(pipeline=pipe)
+    return llm, model_name, streamer, prompt_type, async_output, only_new_text
+
+
+def get_device_dtype():
+    # torch.device("cuda") leads to cuda:x cuda:y mismatches for multi-GPU consistently
+    import torch
+    n_gpus = torch.cuda.device_count() if torch.cuda.is_available else 0
+    device = 'cpu' if n_gpus == 0 else 'cuda'
+    # from utils import NullContext
+    # context_class = NullContext if n_gpus > 1 or n_gpus == 0 else context_class
+    context_class = torch.device
+    torch_dtype = torch.float16 if device == 'cuda' else torch.float32
+    return device, torch_dtype, context_class
+
+
+def get_wiki_data(title, first_paragraph_only, text_limit=None, take_head=True):
+    """
+    Get wikipedia data from online
+    :param title:
+    :param first_paragraph_only:
+    :param text_limit:
+    :param take_head:
+    :return:
+    """
+    filename = 'wiki_%s_%s_%s_%s.data' % (first_paragraph_only, title, text_limit, take_head)
+    url = f"https://en.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&explaintext=1&titles={title}"
+    if first_paragraph_only:
+        url += "&exintro=1"
+    import json
+    if not os.path.isfile(filename):
+        data = requests.get(url).json()
+        json.dump(data, open(filename, 'wt'))
+    else:
+        data = json.load(open(filename, "rt"))
+    page_content = list(data["query"]["pages"].values())[0]["extract"]
+    if take_head is not None and text_limit is not None:
+        page_content = page_content[:text_limit] if take_head else page_content[-text_limit:]
+    title_url = str(title).replace(' ', '_')
+    return Document(
+        page_content=str(page_content),
+        metadata={"source": f"https://en.wikipedia.org/wiki/{title_url}"},
+    )
+
+
+def get_wiki_sources(first_para=True, text_limit=None):
+    """
+    Get specific named sources from wikipedia
+    :param first_para:
+    :param text_limit:
+    :return:
+    """
+    default_wiki_sources = ['Unix', 'Microsoft_Windows', 'Linux']
+    wiki_sources = list(os.getenv('WIKI_SOURCES', default_wiki_sources))
+    return [get_wiki_data(x, first_para, text_limit=text_limit) for x in wiki_sources]
+
+
+def get_github_docs(repo_owner, repo_name):
+    """
+    Access github from specific repo
+    :param repo_owner:
+    :param repo_name:
+    :return:
+    """
+    with tempfile.TemporaryDirectory() as d:
+        subprocess.check_call(
+            f"git clone --depth 1 https://github.com/{repo_owner}/{repo_name}.git .",
+            cwd=d,
+            shell=True,
+        )
+        git_sha = (
+            subprocess.check_output("git rev-parse HEAD", shell=True, cwd=d)
+            .decode("utf-8")
+            .strip()
+        )
+        repo_path = pathlib.Path(d)
+        markdown_files = list(repo_path.glob("*/*.md")) + list(
+            repo_path.glob("*/*.mdx")
+        )
+        for markdown_file in markdown_files:
+            with open(markdown_file, "r") as f:
+                relative_path = markdown_file.relative_to(repo_path)
+                github_url = f"https://github.com/{repo_owner}/{repo_name}/blob/{git_sha}/{relative_path}"
+                yield Document(page_content=str(f.read()), metadata={"source": github_url})
+
+
+def get_dai_pickle(dest="."):
+    from huggingface_hub import hf_hub_download
+    # True for case when locally already logged in with correct token, so don't have to set key
+    token = os.getenv('HUGGING_FACE_HUB_TOKEN', True)
+    path_to_zip_file = hf_hub_download('h2oai/dai_docs', 'dai_docs.pickle', token=token, repo_type='dataset')
+    shutil.copy(path_to_zip_file, dest)
+
+
+def get_dai_docs(from_hf=False, get_pickle=True):
+    """
+    Consume DAI documentation, or consume from public pickle
+    :param from_hf: get DAI docs from HF, then generate pickle for later use by LangChain
+    :param get_pickle: Avoid raw DAI docs, just get pickle directly from HF
+    :return:
+    """
+    import pickle
+
+    if get_pickle:
+        get_dai_pickle()
+
+    dai_store = 'dai_docs.pickle'
+    dst = "working_dir_docs"
+    if not os.path.isfile(dai_store):
+        from create_data import setup_dai_docs
+        dst = setup_dai_docs(dst=dst, from_hf=from_hf)
+
+        import glob
+        files = list(glob.glob(os.path.join(dst, '*rst'), recursive=True))
+
+        basedir = os.path.abspath(os.getcwd())
+        from create_data import rst_to_outputs
+        new_outputs = rst_to_outputs(files)
+        os.chdir(basedir)
+
+        pickle.dump(new_outputs, open(dai_store, 'wb'))
+    else:
+        new_outputs = pickle.load(open(dai_store, 'rb'))
+
+    sources = []
+    for line, file in new_outputs:
+        # gradio requires any linked file to be with app.py
+        sym_src = os.path.abspath(os.path.join(dst, file))
+        sym_dst = os.path.abspath(os.path.join(os.getcwd(), file))
+        if os.path.lexists(sym_dst):
+            os.remove(sym_dst)
+        os.symlink(sym_src, sym_dst)
+        itm = Document(page_content=str(line), metadata={"source": file})
+        # NOTE: yield has issues when going into db, loses metadata
+        # yield itm
+        sources.append(itm)
+    return sources
+
+
+def get_supported_types():
+    non_image_types0 = ["pdf", "txt", "csv", "toml", "py", "rst", "xml", "rtf",
+                        "md",
+                        "html", "mhtml", "htm",
+                        "enex", "eml", "epub", "odt", "pptx", "ppt",
+                        "zip",
+                        "gz",
+                        "gzip",
+                        "urls",
+                        ]
+    # "msg",  GPL3
+
+    video_types0 = ['WEBM',
+                    'MPG', 'MP2', 'MPEG', 'MPE', '.PV',
+                    'OGG',
+                    'MP4', 'M4P', 'M4V',
+                    'AVI', 'WMV',
+                    'MOV', 'QT',
+                    'FLV', 'SWF',
+                    'AVCHD']
+    video_types0 = [x.lower() for x in video_types0]
+    if have_pillow:
+        from PIL import Image
+        exts = Image.registered_extensions()
+        image_types0 = {ex for ex, f in exts.items() if f in Image.OPEN if ex not in video_types0 + non_image_types0}
+        image_types0 = sorted(image_types0)
+        image_types0 = [x[1:] if x.startswith('.') else x for x in image_types0]
+    else:
+        image_types0 = []
+    return non_image_types0, image_types0, video_types0
+
+
+non_image_types, image_types, video_types = get_supported_types()
+set_image_types = set(image_types)
+
+if have_libreoffice or True:
+    # or True so it tries to load, e.g. on MAC/Windows, even if don't have libreoffice since works without that
+    non_image_types.extend(["docx", "doc", "xls", "xlsx"])
+if have_jq:
+    non_image_types.extend(["json", "jsonl"])
+
+file_types = non_image_types + image_types
+
+
+def try_as_html(file):
+    # try treating as html as occurs when scraping websites
+    from bs4 import BeautifulSoup
+    with open(file, "rt") as f:
+        try:
+            is_html = bool(BeautifulSoup(f.read(), "html.parser").find())
+        except:  # FIXME
+            is_html = False
+    if is_html:
+        file_url = 'file://' + file
+        doc1 = UnstructuredURLLoader(urls=[file_url]).load()
+        doc1 = [x for x in doc1 if x.page_content]
+    else:
+        doc1 = []
+    return doc1
+
+
+def json_metadata_func(record: dict, metadata: dict) -> dict:
+    # Define the metadata extraction function.
+
+    if isinstance(record, dict):
+        metadata["sender_name"] = record.get("sender_name")
+        metadata["timestamp_ms"] = record.get("timestamp_ms")
+
+    if "source" in metadata:
+        metadata["source_json"] = metadata['source']
+    if "seq_num" in metadata:
+        metadata["seq_num_json"] = metadata['seq_num']
+
+    return metadata
+
+
+def file_to_doc(file,
+                filei=0,
+                base_path=None, verbose=False, fail_any_exception=False,
+                chunk=True, chunk_size=512, n_jobs=-1,
+                is_url=False, is_txt=False,
+
+                # urls
+                use_unstructured=True,
+                use_playwright=False,
+                use_selenium=False,
+
+                # pdfs
+                use_pymupdf='auto',
+                use_unstructured_pdf='auto',
+                use_pypdf='auto',
+                enable_pdf_ocr='auto',
+                try_pdf_as_html='auto',
+                enable_pdf_doctr='auto',
+
+                # images
+                enable_ocr=False,
+                enable_doctr=False,
+                enable_pix2struct=False,
+                enable_captions=True,
+                captions_model=None,
+                model_loaders=None,
+
+                # json
+                jq_schema='.[]',
+
+                headsize=50,  # see also H2OSerpAPIWrapper
+                db_type=None,
+                selected_file_types=None):
+    assert isinstance(model_loaders, dict)
+    if selected_file_types is not None:
+        set_image_types1 = set_image_types.intersection(set(selected_file_types))
+    else:
+        set_image_types1 = set_image_types
+
+    assert db_type is not None
+    chunk_sources = functools.partial(_chunk_sources, chunk=chunk, chunk_size=chunk_size, db_type=db_type)
+    add_meta = functools.partial(_add_meta, headsize=headsize, filei=filei)
+    # FIXME: if zip, file index order will not be correct if other files involved
+    path_to_docs_func = functools.partial(path_to_docs,
+                                          verbose=verbose,
+                                          fail_any_exception=fail_any_exception,
+                                          n_jobs=n_jobs,
+                                          chunk=chunk, chunk_size=chunk_size,
+                                          # url=file if is_url else None,
+                                          # text=file if is_txt else None,
+
+                                          # urls
+                                          use_unstructured=use_unstructured,
+                                          use_playwright=use_playwright,
+                                          use_selenium=use_selenium,
+
+                                          # pdfs
+                                          use_pymupdf=use_pymupdf,
+                                          use_unstructured_pdf=use_unstructured_pdf,
+                                          use_pypdf=use_pypdf,
+                                          enable_pdf_ocr=enable_pdf_ocr,
+                                          enable_pdf_doctr=enable_pdf_doctr,
+                                          try_pdf_as_html=try_pdf_as_html,
+
+                                          # images
+                                          enable_ocr=enable_ocr,
+                                          enable_doctr=enable_doctr,
+                                          enable_pix2struct=enable_pix2struct,
+                                          enable_captions=enable_captions,
+                                          captions_model=captions_model,
+
+                                          caption_loader=model_loaders['caption'],
+                                          doctr_loader=model_loaders['doctr'],
+                                          pix2struct_loader=model_loaders['pix2struct'],
+
+                                          # json
+                                          jq_schema=jq_schema,
+
+                                          db_type=db_type,
+                                          )
+
+    if file is None:
+        if fail_any_exception:
+            raise RuntimeError("Unexpected None file")
+        else:
+            return []
+    doc1 = []  # in case no support, or disabled support
+    if base_path is None and not is_txt and not is_url:
+        # then assume want to persist but don't care which path used
+        # can't be in base_path
+        dir_name = os.path.dirname(file)
+        base_name = os.path.basename(file)
+        # if from gradio, will have its own temp uuid too, but that's ok
+        base_name = sanitize_filename(base_name) + "_" + str(uuid.uuid4())[:10]
+        base_path = os.path.join(dir_name, base_name)
+    if is_url:
+        file = file.strip()  # in case accidental spaces in front or at end
+        file_lower = file.lower()
+        case1 = file_lower.startswith('arxiv:') and len(file_lower.split('arxiv:')) == 2
+        case2 = file_lower.startswith('https://arxiv.org/abs') and len(file_lower.split('https://arxiv.org/abs')) == 2
+        case3 = file_lower.startswith('http://arxiv.org/abs') and len(file_lower.split('http://arxiv.org/abs')) == 2
+        case4 = file_lower.startswith('arxiv.org/abs/') and len(file_lower.split('arxiv.org/abs/')) == 2
+        if case1 or case2 or case3 or case4:
+            if case1:
+                query = file.lower().split('arxiv:')[1].strip()
+            elif case2:
+                query = file.lower().split('https://arxiv.org/abs/')[1].strip()
+            elif case2:
+                query = file.lower().split('http://arxiv.org/abs/')[1].strip()
+            elif case3:
+                query = file.lower().split('arxiv.org/abs/')[1].strip()
+            else:
+                raise RuntimeError("Unexpected arxiv error for %s" % file)
+            if have_arxiv:
+                trials = 3
+                docs1 = []
+                for trial in range(trials):
+                    try:
+                        docs1 = ArxivLoader(query=query, load_max_docs=20, load_all_available_meta=True).load()
+                        break
+                    except urllib.error.URLError:
+                        pass
+                if not docs1:
+                    print("Failed to get arxiv %s" % query, flush=True)
+                # ensure string, sometimes None
+                [[x.metadata.update({k: str(v)}) for k, v in x.metadata.items()] for x in docs1]
+                query_url = f"https://arxiv.org/abs/{query}"
+                [x.metadata.update(
+                    dict(source=x.metadata.get('entry_id', query_url), query=query_url,
+                         input_type='arxiv', head=x.metadata.get('Title', ''), date=str(datetime.now))) for x in
+                    docs1]
+            else:
+                docs1 = []
+        else:
+            if not (file.startswith("http://") or file.startswith("file://") or file.startswith("https://")):
+                file = 'http://' + file
+            docs1 = []
+            do_unstructured = only_unstructured_urls or use_unstructured
+            if only_selenium or only_playwright:
+                do_unstructured = False
+            do_playwright = have_playwright and (use_playwright or only_playwright)
+            if only_unstructured_urls or only_selenium:
+                do_playwright = False
+            do_selenium = have_selenium and (use_selenium or only_selenium)
+            if only_unstructured_urls or only_playwright:
+                do_selenium = False
+            if do_unstructured or use_unstructured:
+                docs1a = UnstructuredURLLoader(urls=[file]).load()
+                docs1a = [x for x in docs1a if x.page_content]
+                add_parser(docs1a, 'UnstructuredURLLoader')
+                docs1.extend(docs1a)
+            if len(docs1) == 0 and have_playwright or do_playwright:
+                # then something went wrong, try another loader:
+                from langchain.document_loaders import PlaywrightURLLoader
+                docs1a = asyncio.run(PlaywrightURLLoader(urls=[file]).aload())
+                # docs1 = PlaywrightURLLoader(urls=[file]).load()
+                docs1a = [x for x in docs1a if x.page_content]
+                add_parser(docs1a, 'PlaywrightURLLoader')
+                docs1.extend(docs1a)
+            if len(docs1) == 0 and have_selenium or do_selenium:
+                # then something went wrong, try another loader:
+                # but requires Chrome binary, else get: selenium.common.exceptions.WebDriverException:
+                # Message: unknown error: cannot find Chrome binary
+                from langchain.document_loaders import SeleniumURLLoader
+                from selenium.common.exceptions import WebDriverException
+                try:
+                    docs1a = SeleniumURLLoader(urls=[file]).load()
+                    docs1a = [x for x in docs1a if x.page_content]
+                    add_parser(docs1a, 'SeleniumURLLoader')
+                    docs1.extend(docs1a)
+                except WebDriverException as e:
+                    print("No web driver: %s" % str(e), flush=True)
+            [x.metadata.update(dict(input_type='url', date=str(datetime.now))) for x in docs1]
+        add_meta(docs1, file, parser="is_url")
+        docs1 = clean_doc(docs1)
+        doc1 = chunk_sources(docs1)
+    elif is_txt:
+        base_path = "user_paste"
+        base_path = makedirs(base_path, exist_ok=True, tmp_ok=True, use_base=True)
+        source_file = os.path.join(base_path, "_%s" % str(uuid.uuid4())[:10])
+        with open(source_file, "wt") as f:
+            f.write(file)
+        metadata = dict(source=source_file, date=str(datetime.now()), input_type='pasted txt')
+        doc1 = Document(page_content=str(file), metadata=metadata)
+        add_meta(doc1, file, parser="f.write")
+        # Bit odd to change if was original text
+        # doc1 = clean_doc(doc1)
+    elif file.lower().endswith('.html') or file.lower().endswith('.mhtml') or file.lower().endswith('.htm'):
+        docs1 = UnstructuredHTMLLoader(file_path=file).load()
+        add_meta(docs1, file, parser='UnstructuredHTMLLoader')
+        docs1 = clean_doc(docs1)
+        doc1 = chunk_sources(docs1, language=Language.HTML)
+    elif (file.lower().endswith('.docx') or file.lower().endswith('.doc')) and (have_libreoffice or True):
+        docs1 = UnstructuredWordDocumentLoader(file_path=file).load()
+        add_meta(docs1, file, parser='UnstructuredWordDocumentLoader')
+        doc1 = chunk_sources(docs1)
+    elif (file.lower().endswith('.xlsx') or file.lower().endswith('.xls')) and (have_libreoffice or True):
+        docs1 = UnstructuredExcelLoader(file_path=file).load()
+        add_meta(docs1, file, parser='UnstructuredExcelLoader')
+        doc1 = chunk_sources(docs1)
+    elif file.lower().endswith('.odt'):
+        docs1 = UnstructuredODTLoader(file_path=file).load()
+        add_meta(docs1, file, parser='UnstructuredODTLoader')
+        doc1 = chunk_sources(docs1)
+    elif file.lower().endswith('pptx') or file.lower().endswith('ppt'):
+        docs1 = UnstructuredPowerPointLoader(file_path=file).load()
+        add_meta(docs1, file, parser='UnstructuredPowerPointLoader')
+        docs1 = clean_doc(docs1)
+        doc1 = chunk_sources(docs1)
+    elif file.lower().endswith('.txt'):
+        # use UnstructuredFileLoader ?
+        docs1 = TextLoader(file, encoding="utf8", autodetect_encoding=True).load()
+        # makes just one, but big one
+        doc1 = chunk_sources(docs1)
+        # Bit odd to change if was original text
+        # doc1 = clean_doc(doc1)
+        add_meta(doc1, file, parser='TextLoader')
+    elif file.lower().endswith('.rtf'):
+        docs1 = UnstructuredRTFLoader(file).load()
+        add_meta(docs1, file, parser='UnstructuredRTFLoader')
+        doc1 = chunk_sources(docs1)
+    elif file.lower().endswith('.md'):
+        docs1 = UnstructuredMarkdownLoader(file).load()
+        add_meta(docs1, file, parser='UnstructuredMarkdownLoader')
+        docs1 = clean_doc(docs1)
+        doc1 = chunk_sources(docs1, language=Language.MARKDOWN)
+    elif file.lower().endswith('.enex'):
+        docs1 = EverNoteLoader(file).load()
+        add_meta(doc1, file, parser='EverNoteLoader')
+        doc1 = chunk_sources(docs1)
+    elif file.lower().endswith('.epub'):
+        docs1 = UnstructuredEPubLoader(file).load()
+        add_meta(docs1, file, parser='UnstructuredEPubLoader')
+        doc1 = chunk_sources(docs1)
+    elif any(file.lower().endswith(x) for x in set_image_types1):
+        docs1 = []
+        if verbose:
+            print("BEGIN: Tesseract", flush=True)
+        if have_tesseract and enable_ocr:
+            # OCR, somewhat works, but not great
+            docs1a = UnstructuredImageLoader(file, strategy='ocr_only').load()
+            # docs1a = UnstructuredImageLoader(file, strategy='hi_res').load()
+            docs1a = [x for x in docs1a if x.page_content]
+            add_meta(docs1a, file, parser='UnstructuredImageLoader')
+            docs1.extend(docs1a)
+        if verbose:
+            print("END: Tesseract", flush=True)
+        if have_doctr and enable_doctr:
+            if verbose:
+                print("BEGIN: DocTR", flush=True)
+            if model_loaders['doctr'] is not None and not isinstance(model_loaders['doctr'], (str, bool)):
+                if verbose:
+                    print("Reuse DocTR", flush=True)
+                model_loaders['doctr'].load_model()
+            else:
+                if verbose:
+                    print("Fresh DocTR", flush=True)
+                from image_doctr import H2OOCRLoader
+                model_loaders['doctr'] = H2OOCRLoader()
+            model_loaders['doctr'].set_document_paths([file])
+            docs1c = model_loaders['doctr'].load()
+            docs1c = [x for x in docs1c if x.page_content]
+            add_meta(docs1c, file, parser='H2OOCRLoader: %s' % 'DocTR')
+            # caption didn't set source, so fix-up meta
+            for doci in docs1c:
+                doci.metadata['source'] = doci.metadata.get('document_path', file)
+                doci.metadata['hashid'] = hash_file(doci.metadata['source'])
+            docs1.extend(docs1c)
+            if verbose:
+                print("END: DocTR", flush=True)
+        if enable_captions:
+            # BLIP
+            if verbose:
+                print("BEGIN: BLIP", flush=True)
+            if model_loaders['caption'] is not None and not isinstance(model_loaders['caption'], (str, bool)):
+                # assumes didn't fork into this process with joblib, else can deadlock
+                if verbose:
+                    print("Reuse BLIP", flush=True)
+                model_loaders['caption'].load_model()
+            else:
+                if verbose:
+                    print("Fresh BLIP", flush=True)
+                from image_captions import H2OImageCaptionLoader
+                model_loaders['caption'] = H2OImageCaptionLoader(caption_gpu=model_loaders['caption'] == 'gpu',
+                                                                 blip_model=captions_model,
+                                                                 blip_processor=captions_model)
+            model_loaders['caption'].set_image_paths([file])
+            docs1c = model_loaders['caption'].load()
+            docs1c = [x for x in docs1c if x.page_content]
+            add_meta(docs1c, file, parser='H2OImageCaptionLoader: %s' % captions_model)
+            # caption didn't set source, so fix-up meta
+            for doci in docs1c:
+                doci.metadata['source'] = doci.metadata.get('image_path', file)
+                doci.metadata['hashid'] = hash_file(doci.metadata['source'])
+            docs1.extend(docs1c)
+
+            if verbose:
+                print("END: BLIP", flush=True)
+        if enable_pix2struct:
+            # BLIP
+            if verbose:
+                print("BEGIN: Pix2Struct", flush=True)
+            if model_loaders['pix2struct'] is not None and not isinstance(model_loaders['pix2struct'], (str, bool)):
+                if verbose:
+                    print("Reuse pix2struct", flush=True)
+                model_loaders['pix2struct'].load_model()
+            else:
+                if verbose:
+                    print("Fresh pix2struct", flush=True)
+                from image_pix2struct import H2OPix2StructLoader
+                model_loaders['pix2struct'] = H2OPix2StructLoader()
+            model_loaders['pix2struct'].set_image_paths([file])
+            docs1c = model_loaders['pix2struct'].load()
+            docs1c = [x for x in docs1c if x.page_content]
+            add_meta(docs1c, file, parser='H2OPix2StructLoader: %s' % model_loaders['pix2struct'])
+            # caption didn't set source, so fix-up meta
+            for doci in docs1c:
+                doci.metadata['source'] = doci.metadata.get('image_path', file)
+                doci.metadata['hashid'] = hash_file(doci.metadata['source'])
+            docs1.extend(docs1c)
+            if verbose:
+                print("END: Pix2Struct", flush=True)
+        doc1 = chunk_sources(docs1)
+    elif file.lower().endswith('.msg'):
+        raise RuntimeError("Not supported, GPL3 license")
+        # docs1 = OutlookMessageLoader(file).load()
+        # docs1[0].metadata['source'] = file
+    elif file.lower().endswith('.eml'):
+        try:
+            docs1 = UnstructuredEmailLoader(file).load()
+            add_meta(docs1, file, parser='UnstructuredEmailLoader')
+            doc1 = chunk_sources(docs1)
+        except ValueError as e:
+            if 'text/html content not found in email' in str(e):
+                pass
+            else:
+                raise
+        doc1 = [x for x in doc1 if x.page_content]
+        if len(doc1) == 0:
+            # e.g. plain/text dict key exists, but not
+            # doc1 = TextLoader(file, encoding="utf8").load()
+            docs1 = UnstructuredEmailLoader(file, content_source="text/plain").load()
+            docs1 = [x for x in docs1 if x.page_content]
+            add_meta(docs1, file, parser='UnstructuredEmailLoader text/plain')
+            doc1 = chunk_sources(docs1)
+    # elif file.lower().endswith('.gcsdir'):
+    #    doc1 = GCSDirectoryLoader(project_name, bucket, prefix).load()
+    # elif file.lower().endswith('.gcsfile'):
+    # doc1 = GCSFileLoader(project_name, bucket, blob).load()
+    elif file.lower().endswith('.rst'):
+        with open(file, "r") as f:
+            doc1 = Document(page_content=str(f.read()), metadata={"source": file})
+        add_meta(doc1, file, parser='f.read()')
+        doc1 = chunk_sources(doc1, language=Language.RST)
+    elif file.lower().endswith('.json'):
+        # 10k rows, 100 columns-like parts 4 bytes each
+        JSON_SIZE_LIMIT = int(os.getenv('JSON_SIZE_LIMIT', str(10 * 10 * 1024 * 10 * 4)))
+        if os.path.getsize(file) > JSON_SIZE_LIMIT:
+            raise ValueError(
+                "JSON file sizes > %s not supported for naive parsing and embedding, requires Agents enabled" % JSON_SIZE_LIMIT)
+        loader = JSONLoader(
+            file_path=file,
+            # jq_schema='.messages[].content',
+            jq_schema=jq_schema,
+            text_content=False,
+            metadata_func=json_metadata_func)
+        doc1 = loader.load()
+        add_meta(doc1, file, parser='JSONLoader: %s' % jq_schema)
+        fix_json_meta(doc1)
+    elif file.lower().endswith('.jsonl'):
+        loader = JSONLoader(
+            file_path=file,
+            # jq_schema='.messages[].content',
+            jq_schema=jq_schema,
+            json_lines=True,
+            text_content=False,
+            metadata_func=json_metadata_func)
+        doc1 = loader.load()
+        add_meta(doc1, file, parser='JSONLoader: %s' % jq_schema)
+        fix_json_meta(doc1)
+    elif file.lower().endswith('.pdf'):
+        # migration
+        if isinstance(use_pymupdf, bool):
+            if use_pymupdf == False:
+                use_pymupdf = 'off'
+            if use_pymupdf == True:
+                use_pymupdf = 'on'
+        if isinstance(use_unstructured_pdf, bool):
+            if use_unstructured_pdf == False:
+                use_unstructured_pdf = 'off'
+            if use_unstructured_pdf == True:
+                use_unstructured_pdf = 'on'
+        if isinstance(use_pypdf, bool):
+            if use_pypdf == False:
+                use_pypdf = 'off'
+            if use_pypdf == True:
+                use_pypdf = 'on'
+        if isinstance(enable_pdf_ocr, bool):
+            if enable_pdf_ocr == False:
+                enable_pdf_ocr = 'off'
+            if enable_pdf_ocr == True:
+                enable_pdf_ocr = 'on'
+        if isinstance(try_pdf_as_html, bool):
+            if try_pdf_as_html == False:
+                try_pdf_as_html = 'off'
+            if try_pdf_as_html == True:
+                try_pdf_as_html = 'on'
+
+        doc1 = []
+        tried_others = False
+        handled = False
+        did_pymupdf = False
+        did_unstructured = False
+        e = None
+        if have_pymupdf and (len(doc1) == 0 and use_pymupdf == 'auto' or use_pymupdf == 'on'):
+            # GPL, only use if installed
+            from langchain.document_loaders import PyMuPDFLoader
+            # load() still chunks by pages, but every page has title at start to help
+            try:
+                doc1a = PyMuPDFLoader(file).load()
+                did_pymupdf = True
+            except BaseException as e0:
+                doc1a = []
+                print("PyMuPDFLoader: %s" % str(e0), flush=True)
+                e = e0
+            # remove empty documents
+            handled |= len(doc1a) > 0
+            doc1a = [x for x in doc1a if x.page_content]
+            doc1a = clean_doc(doc1a)
+            add_parser(doc1a, 'PyMuPDFLoader')
+            doc1.extend(doc1a)
+        if len(doc1) == 0 and use_unstructured_pdf == 'auto' or use_unstructured_pdf == 'on':
+            tried_others = True
+            try:
+                doc1a = UnstructuredPDFLoader(file).load()
+                did_unstructured = True
+            except BaseException as e0:
+                doc1a = []
+                print("UnstructuredPDFLoader: %s" % str(e0), flush=True)
+                e = e0
+            handled |= len(doc1a) > 0
+            # remove empty documents
+            doc1a = [x for x in doc1a if x.page_content]
+            add_parser(doc1a, 'UnstructuredPDFLoader')
+            # seems to not need cleaning in most cases
+            doc1.extend(doc1a)
+        if len(doc1) == 0 and use_pypdf == 'auto' or use_pypdf == 'on':
+            tried_others = True
+            # open-source fallback
+            # load() still chunks by pages, but every page has title at start to help
+            try:
+                doc1a = PyPDFLoader(file).load()
+            except BaseException as e0:
+                doc1a = []
+                print("PyPDFLoader: %s" % str(e0), flush=True)
+                e = e0
+            handled |= len(doc1a) > 0
+            # remove empty documents
+            doc1a = [x for x in doc1a if x.page_content]
+            doc1a = clean_doc(doc1a)
+            add_parser(doc1a, 'PyPDFLoader')
+            doc1.extend(doc1a)
+        if not did_pymupdf and ((have_pymupdf and len(doc1) == 0) and tried_others):
+            # try again in case only others used, but only if didn't already try (2nd part of and)
+            # GPL, only use if installed
+            from langchain.document_loaders import PyMuPDFLoader
+            # load() still chunks by pages, but every page has title at start to help
+            try:
+                doc1a = PyMuPDFLoader(file).load()
+            except BaseException as e0:
+                doc1a = []
+                print("PyMuPDFLoader: %s" % str(e0), flush=True)
+                e = e0
+            handled |= len(doc1a) > 0
+            # remove empty documents
+            doc1a = [x for x in doc1a if x.page_content]
+            doc1a = clean_doc(doc1a)
+            add_parser(doc1a, 'PyMuPDFLoader2')
+            doc1.extend(doc1a)
+        did_pdf_ocr = False
+        if len(doc1) == 0 and (enable_pdf_ocr == 'auto' and enable_pdf_doctr != 'on') or enable_pdf_ocr == 'on':
+            did_pdf_ocr = True
+            # no did_unstructured condition here because here we do OCR, and before we did not
+            # try OCR in end since slowest, but works on pure image pages well
+            doc1a = UnstructuredPDFLoader(file, strategy='ocr_only').load()
+            handled |= len(doc1a) > 0
+            # remove empty documents
+            doc1a = [x for x in doc1a if x.page_content]
+            add_parser(doc1a, 'UnstructuredPDFLoader ocr_only')
+            # seems to not need cleaning in most cases
+            doc1.extend(doc1a)
+        # Some PDFs return nothing or junk from PDFMinerLoader
+        if len(doc1) == 0 and enable_pdf_doctr == 'auto' or enable_pdf_doctr == 'on':
+            if verbose:
+                print("BEGIN: DocTR", flush=True)
+            if model_loaders['doctr'] is not None and not isinstance(model_loaders['doctr'], (str, bool)):
+                model_loaders['doctr'].load_model()
+            else:
+                from image_doctr import H2OOCRLoader
+                model_loaders['doctr'] = H2OOCRLoader()
+            model_loaders['doctr'].set_document_paths([file])
+            doc1a = model_loaders['doctr'].load()
+            doc1a = [x for x in doc1a if x.page_content]
+            add_meta(doc1a, file, parser='H2OOCRLoader: %s' % 'DocTR')
+            handled |= len(doc1a) > 0
+            # caption didn't set source, so fix-up meta
+            for doci in doc1a:
+                doci.metadata['source'] = doci.metadata.get('document_path', file)
+                doci.metadata['hashid'] = hash_file(doci.metadata['source'])
+            doc1.extend(doc1a)
+            if verbose:
+                print("END: DocTR", flush=True)
+        if try_pdf_as_html in ['auto', 'on']:
+            doc1a = try_as_html(file)
+            add_parser(doc1a, 'try_as_html')
+            doc1.extend(doc1a)
+
+        if len(doc1) == 0:
+            # if literally nothing, show failed to parse so user knows, since unlikely nothing in PDF at all.
+            if handled:
+                raise ValueError("%s had no valid text, but meta data was parsed" % file)
+            else:
+                raise ValueError("%s had no valid text and no meta data was parsed: %s" % (file, str(e)))
+        add_meta(doc1, file, parser='pdf')
+        doc1 = chunk_sources(doc1)
+    elif file.lower().endswith('.csv'):
+        CSV_SIZE_LIMIT = int(os.getenv('CSV_SIZE_LIMIT', str(10 * 1024 * 10 * 4)))
+        if os.path.getsize(file) > CSV_SIZE_LIMIT:
+            raise ValueError(
+                "CSV file sizes > %s not supported for naive parsing and embedding, requires Agents enabled" % CSV_SIZE_LIMIT)
+        doc1 = CSVLoader(file).load()
+        add_meta(doc1, file, parser='CSVLoader')
+        if isinstance(doc1, list):
+            # each row is a Document, identify
+            [x.metadata.update(dict(chunk_id=chunk_id)) for chunk_id, x in enumerate(doc1)]
+            if db_type in ['chroma', 'chroma_old']:
+                # then separate summarize list
+                sdoc1 = clone_documents(doc1)
+                [x.metadata.update(dict(chunk_id=-1)) for chunk_id, x in enumerate(sdoc1)]
+                doc1 = sdoc1 + doc1
+    elif file.lower().endswith('.py'):
+        doc1 = PythonLoader(file).load()
+        add_meta(doc1, file, parser='PythonLoader')
+        doc1 = chunk_sources(doc1, language=Language.PYTHON)
+    elif file.lower().endswith('.toml'):
+        doc1 = TomlLoader(file).load()
+        add_meta(doc1, file, parser='TomlLoader')
+        doc1 = chunk_sources(doc1)
+    elif file.lower().endswith('.xml'):
+        from langchain.document_loaders import UnstructuredXMLLoader
+        loader = UnstructuredXMLLoader(file_path=file)
+        doc1 = loader.load()
+        add_meta(doc1, file, parser='UnstructuredXMLLoader')
+    elif file.lower().endswith('.urls'):
+        with open(file, "r") as f:
+            urls = f.readlines()
+            # recurse
+            doc1 = path_to_docs_func(None, url=urls)
+    elif file.lower().endswith('.zip'):
+        with zipfile.ZipFile(file, 'r') as zip_ref:
+            # don't put into temporary path, since want to keep references to docs inside zip
+            # so just extract in path where
+            zip_ref.extractall(base_path)
+            # recurse
+            doc1 = path_to_docs_func(base_path)
+    elif file.lower().endswith('.gz') or file.lower().endswith('.gzip'):
+        if file.lower().endswith('.gz'):
+            de_file = file.lower().replace('.gz', '')
+        else:
+            de_file = file.lower().replace('.gzip', '')
+        with gzip.open(file, 'rb') as f_in:
+            with open(de_file, 'wb') as f_out:
+                shutil.copyfileobj(f_in, f_out)
+        # recurse
+        doc1 = file_to_doc(de_file,
+                           filei=filei,  # single file, same file index as outside caller
+                           base_path=base_path, verbose=verbose, fail_any_exception=fail_any_exception,
+                           chunk=chunk, chunk_size=chunk_size, n_jobs=n_jobs,
+                           is_url=is_url, is_txt=is_txt,
+
+                           # urls
+                           use_unstructured=use_unstructured,
+                           use_playwright=use_playwright,
+                           use_selenium=use_selenium,
+
+                           # pdfs
+                           use_pymupdf=use_pymupdf,
+                           use_unstructured_pdf=use_unstructured_pdf,
+                           use_pypdf=use_pypdf,
+                           enable_pdf_ocr=enable_pdf_ocr,
+                           enable_pdf_doctr=enable_pdf_doctr,
+                           try_pdf_as_html=try_pdf_as_html,
+
+                           # images
+                           enable_ocr=enable_ocr,
+                           enable_doctr=enable_doctr,
+                           enable_pix2struct=enable_pix2struct,
+                           enable_captions=enable_captions,
+                           captions_model=captions_model,
+                           model_loaders=model_loaders,
+
+                           # json
+                           jq_schema=jq_schema,
+
+                           headsize=headsize,
+                           db_type=db_type,
+                           selected_file_types=selected_file_types)
+    else:
+        raise RuntimeError("No file handler for %s" % os.path.basename(file))
+
+    # allow doc1 to be list or not.
+    if not isinstance(doc1, list):
+        # If not list, did not chunk yet, so chunk now
+        docs = chunk_sources([doc1])
+    elif isinstance(doc1, list) and len(doc1) == 1:
+        # if list of length one, don't trust and chunk it, chunk_id's will still be correct if repeat
+        docs = chunk_sources(doc1)
+    else:
+        docs = doc1
+
+    assert isinstance(docs, list)
+    return docs
+
+
+def path_to_doc1(file,
+                 filei=0,
+                 verbose=False, fail_any_exception=False, return_file=True,
+                 chunk=True, chunk_size=512,
+                 n_jobs=-1,
+                 is_url=False, is_txt=False,
+
+                 # urls
+                 use_unstructured=True,
+                 use_playwright=False,
+                 use_selenium=False,
+
+                 # pdfs
+                 use_pymupdf='auto',
+                 use_unstructured_pdf='auto',
+                 use_pypdf='auto',
+                 enable_pdf_ocr='auto',
+                 enable_pdf_doctr='auto',
+                 try_pdf_as_html='auto',
+
+                 # images
+                 enable_ocr=False,
+                 enable_doctr=False,
+                 enable_pix2struct=False,
+                 enable_captions=True,
+                 captions_model=None,
+                 model_loaders=None,
+
+                 # json
+                 jq_schema='.[]',
+
+                 db_type=None,
+                 selected_file_types=None):
+    assert db_type is not None
+    if verbose:
+        if is_url:
+            print("Ingesting URL: %s" % file, flush=True)
+        elif is_txt:
+            print("Ingesting Text: %s" % file, flush=True)
+        else:
+            print("Ingesting file: %s" % file, flush=True)
+    res = None
+    try:
+        # don't pass base_path=path, would infinitely recurse
+        res = file_to_doc(file,
+                          filei=filei,
+                          base_path=None, verbose=verbose, fail_any_exception=fail_any_exception,
+                          chunk=chunk, chunk_size=chunk_size,
+                          n_jobs=n_jobs,
+                          is_url=is_url, is_txt=is_txt,
+
+                          # urls
+                          use_unstructured=use_unstructured,
+                          use_playwright=use_playwright,
+                          use_selenium=use_selenium,
+
+                          # pdfs
+                          use_pymupdf=use_pymupdf,
+                          use_unstructured_pdf=use_unstructured_pdf,
+                          use_pypdf=use_pypdf,
+                          enable_pdf_ocr=enable_pdf_ocr,
+                          enable_pdf_doctr=enable_pdf_doctr,
+                          try_pdf_as_html=try_pdf_as_html,
+
+                          # images
+                          enable_ocr=enable_ocr,
+                          enable_doctr=enable_doctr,
+                          enable_pix2struct=enable_pix2struct,
+                          enable_captions=enable_captions,
+                          captions_model=captions_model,
+                          model_loaders=model_loaders,
+
+                          # json
+                          jq_schema=jq_schema,
+
+                          db_type=db_type,
+                          selected_file_types=selected_file_types)
+    except BaseException as e:
+        print("Failed to ingest %s due to %s" % (file, traceback.format_exc()))
+        if fail_any_exception:
+            raise
+        else:
+            exception_doc = Document(
+                page_content='',
+                metadata={"source": file, "exception": '%s Exception: %s' % (file, str(e)),
+                          "traceback": traceback.format_exc()})
+            res = [exception_doc]
+    if verbose:
+        if is_url:
+            print("DONE Ingesting URL: %s" % file, flush=True)
+        elif is_txt:
+            print("DONE Ingesting Text: %s" % file, flush=True)
+        else:
+            print("DONE Ingesting file: %s" % file, flush=True)
+    if return_file:
+        base_tmp = "temp_path_to_doc1"
+        if not os.path.isdir(base_tmp):
+            base_tmp = makedirs(base_tmp, exist_ok=True, tmp_ok=True, use_base=True)
+        filename = os.path.join(base_tmp, str(uuid.uuid4()) + ".tmp.pickle")
+        with open(filename, 'wb') as f:
+            pickle.dump(res, f)
+        return filename
+    return res
+
+
+def path_to_docs(path_or_paths, verbose=False, fail_any_exception=False, n_jobs=-1,
+                 chunk=True, chunk_size=512,
+                 url=None, text=None,
+
+                 # urls
+                 use_unstructured=True,
+                 use_playwright=False,
+                 use_selenium=False,
+
+                 # pdfs
+                 use_pymupdf='auto',
+                 use_unstructured_pdf='auto',
+                 use_pypdf='auto',
+                 enable_pdf_ocr='auto',
+                 enable_pdf_doctr='auto',
+                 try_pdf_as_html='auto',
+
+                 # images
+                 enable_ocr=False,
+                 enable_doctr=False,
+                 enable_pix2struct=False,
+                 enable_captions=True,
+                 captions_model=None,
+
+                 caption_loader=None,
+                 doctr_loader=None,
+                 pix2struct_loader=None,
+
+                 # json
+                 jq_schema='.[]',
+
+                 existing_files=[],
+                 existing_hash_ids={},
+                 db_type=None,
+                 selected_file_types=None,
+                 ):
+    if verbose:
+        print("BEGIN Consuming path_or_paths=%s url=%s text=%s" % (path_or_paths, url, text), flush=True)
+    if selected_file_types is not None:
+        non_image_types1 = [x for x in non_image_types if x in selected_file_types]
+        image_types1 = [x for x in image_types if x in selected_file_types]
+    else:
+        non_image_types1 = non_image_types.copy()
+        image_types1 = image_types.copy()
+
+    assert db_type is not None
+    # path_or_paths could be str, list, tuple, generator
+    globs_image_types = []
+    globs_non_image_types = []
+    if not path_or_paths and not url and not text:
+        return []
+    elif url:
+        url = get_list_or_str(url)
+        globs_non_image_types = url if isinstance(url, (list, tuple, types.GeneratorType)) else [url]
+    elif text:
+        globs_non_image_types = text if isinstance(text, (list, tuple, types.GeneratorType)) else [text]
+    elif isinstance(path_or_paths, str) and os.path.isdir(path_or_paths):
+        # single path, only consume allowed files
+        path = path_or_paths
+        # Below globs should match patterns in file_to_doc()
+        [globs_image_types.extend(glob.glob(os.path.join(path, "./**/*.%s" % ftype), recursive=True))
+         for ftype in image_types1]
+        globs_image_types = [os.path.normpath(x) for x in globs_image_types]
+        [globs_non_image_types.extend(glob.glob(os.path.join(path, "./**/*.%s" % ftype), recursive=True))
+         for ftype in non_image_types1]
+        globs_non_image_types = [os.path.normpath(x) for x in globs_non_image_types]
+    else:
+        if isinstance(path_or_paths, str):
+            if os.path.isfile(path_or_paths) or os.path.isdir(path_or_paths):
+                path_or_paths = [path_or_paths]
+            else:
+                # path was deleted etc.
+                return []
+        # list/tuple of files (consume what can, and exception those that selected but cannot consume so user knows)
+        assert isinstance(path_or_paths, (list, tuple, types.GeneratorType)), \
+            "Wrong type for path_or_paths: %s %s" % (path_or_paths, type(path_or_paths))
+        # reform out of allowed types
+        globs_image_types.extend(
+            flatten_list([[os.path.normpath(x) for x in path_or_paths if x.endswith(y)] for y in image_types1]))
+        # could do below:
+        # globs_non_image_types = flatten_list([[x for x in path_or_paths if x.endswith(y)] for y in non_image_types1])
+        # But instead, allow fail so can collect unsupported too
+        set_globs_image_types = set(globs_image_types)
+        globs_non_image_types.extend([os.path.normpath(x) for x in path_or_paths if x not in set_globs_image_types])
+
+    # filter out any files to skip (e.g. if already processed them)
+    # this is easy, but too aggressive in case a file changed, so parent probably passed existing_files=[]
+    assert not existing_files, "DEV: assume not using this approach"
+    if existing_files:
+        set_skip_files = set(existing_files)
+        globs_image_types = [x for x in globs_image_types if x not in set_skip_files]
+        globs_non_image_types = [x for x in globs_non_image_types if x not in set_skip_files]
+    if existing_hash_ids:
+        # assume consistent with add_meta() use of hash_file(file)
+        # also assume consistent with get_existing_hash_ids for dict creation
+        # assume hashable values
+        existing_hash_ids_set = set(existing_hash_ids.items())
+        hash_ids_all_image = set({x: hash_file(x) for x in globs_image_types}.items())
+        hash_ids_all_non_image = set({x: hash_file(x) for x in globs_non_image_types}.items())
+        # don't use symmetric diff.  If file is gone, ignore and don't remove or something
+        #  just consider existing files (key) having new hash or not (value)
+        new_files_image = set(dict(hash_ids_all_image - existing_hash_ids_set).keys())
+        new_files_non_image = set(dict(hash_ids_all_non_image - existing_hash_ids_set).keys())
+        globs_image_types = [x for x in globs_image_types if x in new_files_image]
+        globs_non_image_types = [x for x in globs_non_image_types if x in new_files_non_image]
+
+    # could use generator, but messes up metadata handling in recursive case
+    if caption_loader and not isinstance(caption_loader, (bool, str)) and caption_loader.device != 'cpu' or \
+            get_device() == 'cuda':
+        # to avoid deadlocks, presume was preloaded and so can't fork due to cuda context
+        # get_device() == 'cuda' because presume faster to process image from (temporarily) preloaded model
+        n_jobs_image = 1
+    else:
+        n_jobs_image = n_jobs
+    if enable_doctr or enable_pdf_doctr in [True, 'auto', 'on']:
+        if doctr_loader and not isinstance(doctr_loader, (bool, str)) and doctr_loader.device != 'cpu':
+            # can't fork cuda context
+            n_jobs = 1
+
+    return_file = True  # local choice
+    is_url = url is not None
+    is_txt = text is not None
+    model_loaders = dict(caption=caption_loader,
+                         doctr=doctr_loader,
+                         pix2struct=pix2struct_loader)
+    model_loaders0 = model_loaders.copy()
+    kwargs = dict(verbose=verbose, fail_any_exception=fail_any_exception,
+                  return_file=return_file,
+                  chunk=chunk, chunk_size=chunk_size,
+                  n_jobs=n_jobs,
+                  is_url=is_url,
+                  is_txt=is_txt,
+
+                  # urls
+                  use_unstructured=use_unstructured,
+                  use_playwright=use_playwright,
+                  use_selenium=use_selenium,
+
+                  # pdfs
+                  use_pymupdf=use_pymupdf,
+                  use_unstructured_pdf=use_unstructured_pdf,
+                  use_pypdf=use_pypdf,
+                  enable_pdf_ocr=enable_pdf_ocr,
+                  enable_pdf_doctr=enable_pdf_doctr,
+                  try_pdf_as_html=try_pdf_as_html,
+
+                  # images
+                  enable_ocr=enable_ocr,
+                  enable_doctr=enable_doctr,
+                  enable_pix2struct=enable_pix2struct,
+                  enable_captions=enable_captions,
+                  captions_model=captions_model,
+                  model_loaders=model_loaders,
+
+                  # json
+                  jq_schema=jq_schema,
+
+                  db_type=db_type,
+                  selected_file_types=selected_file_types,
+                  )
+    if n_jobs != 1 and len(globs_non_image_types) > 1:
+        # avoid nesting, e.g. upload 1 zip and then inside many files
+        # harder to handle if upload many zips with many files, inner parallel one will be disabled by joblib
+        documents = ProgressParallel(n_jobs=n_jobs, verbose=10 if verbose else 0, backend='multiprocessing')(
+            delayed(path_to_doc1)(file, filei=filei, **kwargs) for filei, file in enumerate(globs_non_image_types)
+        )
+    else:
+        documents = [path_to_doc1(file, filei=filei, **kwargs) for filei, file in
+                     enumerate(tqdm(globs_non_image_types))]
+
+    # do images separately since can't fork after cuda in parent, so can't be parallel
+    if n_jobs_image != 1 and len(globs_image_types) > 1:
+        # avoid nesting, e.g. upload 1 zip and then inside many files
+        # harder to handle if upload many zips with many files, inner parallel one will be disabled by joblib
+        image_documents = ProgressParallel(n_jobs=n_jobs, verbose=10 if verbose else 0, backend='multiprocessing')(
+            delayed(path_to_doc1)(file, filei=filei, **kwargs) for filei, file in enumerate(globs_image_types)
+        )
+    else:
+        image_documents = [path_to_doc1(file, filei=filei, **kwargs) for filei, file in
+                           enumerate(tqdm(globs_image_types))]
+
+    # unload loaders (image loaders, includes enable_pdf_doctr that uses same loader)
+    for name, loader in model_loaders.items():
+        loader0 = model_loaders0[name]
+        real_model_initial = loader0 is not None and not isinstance(loader0, (str, bool))
+        real_model_final = model_loaders[name] is not None and not isinstance(model_loaders[name], (str, bool))
+        if not real_model_initial and real_model_final:
+            # clear off GPU newly added model
+            model_loaders[name].unload_model()
+
+    # add image docs in
+    documents += image_documents
+
+    if return_file:
+        # then documents really are files
+        files = documents.copy()
+        documents = []
+        for fil in files:
+            with open(fil, 'rb') as f:
+                documents.extend(pickle.load(f))
+            # remove temp pickle
+            remove(fil)
+    else:
+        documents = reduce(concat, documents)
+
+    if verbose:
+        print("END consuming path_or_paths=%s url=%s text=%s" % (path_or_paths, url, text), flush=True)
+    return documents
+
+
+def prep_langchain(persist_directory,
+                   load_db_if_exists,
+                   db_type, use_openai_embedding,
+                   langchain_mode, langchain_mode_paths, langchain_mode_types,
+                   hf_embedding_model,
+                   migrate_embedding_model,
+                   auto_migrate_db,
+                   n_jobs=-1, kwargs_make_db={},
+                   verbose=False):
+    """
+    do prep first time, involving downloads
+    # FIXME: Add github caching then add here
+    :return:
+    """
+    if os.getenv("HARD_ASSERTS"):
+        assert langchain_mode not in ['MyData'], "Should not prep scratch/personal data"
+
+    if langchain_mode in langchain_modes_intrinsic:
+        return None
+
+    db_dir_exists = os.path.isdir(persist_directory)
+    user_path = langchain_mode_paths.get(langchain_mode)
+
+    if db_dir_exists and user_path is None:
+        if verbose:
+            print("Prep: persist_directory=%s exists, using" % persist_directory, flush=True)
+        db, use_openai_embedding, hf_embedding_model = \
+            get_existing_db(None, persist_directory, load_db_if_exists,
+                            db_type, use_openai_embedding,
+                            langchain_mode, langchain_mode_paths, langchain_mode_types,
+                            hf_embedding_model, migrate_embedding_model, auto_migrate_db,
+                            n_jobs=n_jobs)
+    else:
+        if db_dir_exists and user_path is not None:
+            if verbose:
+                print("Prep: persist_directory=%s exists, user_path=%s passed, adding any changed or new documents" % (
+                    persist_directory, user_path), flush=True)
+        elif not db_dir_exists:
+            if verbose:
+                print("Prep: persist_directory=%s does not exist, regenerating" % persist_directory, flush=True)
+        db = None
+        if langchain_mode in ['DriverlessAI docs']:
+            # FIXME: Could also just use dai_docs.pickle directly and upload that
+            get_dai_docs(from_hf=True)
+
+        if langchain_mode in ['wiki']:
+            get_wiki_sources(first_para=kwargs_make_db['first_para'], text_limit=kwargs_make_db['text_limit'])
+
+        langchain_kwargs = kwargs_make_db.copy()
+        langchain_kwargs.update(locals())
+        db, num_new_sources, new_sources_metadata = make_db(**langchain_kwargs)
+
+    return db
+
+
+import posthog
+
+posthog.disabled = True
+
+
+class FakeConsumer(object):
+    def __init__(self, *args, **kwargs):
+        pass
+
+    def run(self):
+        pass
+
+    def pause(self):
+        pass
+
+    def upload(self):
+        pass
+
+    def next(self):
+        pass
+
+    def request(self, batch):
+        pass
+
+
+posthog.Consumer = FakeConsumer
+
+
+def check_update_chroma_embedding(db,
+                                  db_type,
+                                  use_openai_embedding,
+                                  hf_embedding_model, migrate_embedding_model, auto_migrate_db,
+                                  langchain_mode, langchain_mode_paths, langchain_mode_types,
+                                  n_jobs=-1):
+    changed_db = False
+    embed_tuple = load_embed(db=db)
+    if embed_tuple not in [(True, use_openai_embedding, hf_embedding_model),
+                           (False, use_openai_embedding, hf_embedding_model)]:
+        print("Detected new embedding %s vs. %s %s, updating db: %s" % (
+            use_openai_embedding, hf_embedding_model, embed_tuple, langchain_mode), flush=True)
+        # handle embedding changes
+        db_get = get_documents(db)
+        sources = [Document(page_content=result[0], metadata=result[1] or {})
+                   for result in zip(db_get['documents'], db_get['metadatas'])]
+        # delete index, has to be redone
+        persist_directory = db._persist_directory
+        shutil.move(persist_directory, persist_directory + "_" + str(uuid.uuid4()) + ".bak")
+        assert db_type in ['chroma', 'chroma_old']
+        load_db_if_exists = False
+        db = get_db(sources, use_openai_embedding=use_openai_embedding, db_type=db_type,
+                    persist_directory=persist_directory, load_db_if_exists=load_db_if_exists,
+                    langchain_mode=langchain_mode,
+                    langchain_mode_paths=langchain_mode_paths,
+                    langchain_mode_types=langchain_mode_types,
+                    collection_name=None,
+                    hf_embedding_model=hf_embedding_model,
+                    migrate_embedding_model=migrate_embedding_model,
+                    auto_migrate_db=auto_migrate_db,
+                    n_jobs=n_jobs,
+                    )
+        changed_db = True
+        print("Done updating db for new embedding: %s" % langchain_mode, flush=True)
+
+    return db, changed_db
+
+
+def migrate_meta_func(db, langchain_mode):
+    changed_db = False
+    db_get = get_documents(db)
+    # just check one doc
+    if len(db_get['metadatas']) > 0 and 'chunk_id' not in db_get['metadatas'][0]:
+        print("Detected old metadata, adding additional information", flush=True)
+        t0 = time.time()
+        # handle meta changes
+        [x.update(dict(chunk_id=x.get('chunk_id', 0))) for x in db_get['metadatas']]
+        client_collection = db._client.get_collection(name=db._collection.name,
+                                                      embedding_function=db._collection._embedding_function)
+        client_collection.update(ids=db_get['ids'], metadatas=db_get['metadatas'])
+        # check
+        db_get = get_documents(db)
+        assert 'chunk_id' in db_get['metadatas'][0], "Failed to add meta"
+        changed_db = True
+        print("Done updating db for new meta: %s in %s seconds" % (langchain_mode, time.time() - t0), flush=True)
+
+    return db, changed_db
+
+
+def get_existing_db(db, persist_directory,
+                    load_db_if_exists, db_type, use_openai_embedding,
+                    langchain_mode, langchain_mode_paths, langchain_mode_types,
+                    hf_embedding_model,
+                    migrate_embedding_model,
+                    auto_migrate_db=False,
+                    verbose=False, check_embedding=True, migrate_meta=True,
+                    n_jobs=-1):
+    if load_db_if_exists and db_type in ['chroma', 'chroma_old'] and os.path.isdir(persist_directory):
+        if os.path.isfile(os.path.join(persist_directory, 'chroma.sqlite3')):
+            must_migrate = False
+        elif os.path.isdir(os.path.join(persist_directory, 'index')):
+            must_migrate = True
+        else:
+            return db, use_openai_embedding, hf_embedding_model
+        chroma_settings = dict(is_persistent=True)
+        use_chromamigdb = False
+        if must_migrate:
+            if auto_migrate_db:
+                print("Detected chromadb<0.4 database, require migration, doing now....", flush=True)
+                from chroma_migrate.import_duckdb import migrate_from_duckdb
+                import chromadb
+                api = chromadb.PersistentClient(path=persist_directory)
+                did_migration = migrate_from_duckdb(api, persist_directory)
+                assert did_migration, "Failed to migrate chroma collection at %s, see https://docs.trychroma.com/migration for CLI tool" % persist_directory
+            elif have_chromamigdb:
+                print(
+                    "Detected chroma<0.4 database but --auto_migrate_db=False, but detected chromamigdb package, so using old database that still requires duckdb",
+                    flush=True)
+                chroma_settings = dict(chroma_db_impl="duckdb+parquet")
+                use_chromamigdb = True
+            else:
+                raise ValueError(
+                    "Detected chromadb<0.4 database, require migration, but did not detect chromamigdb package or did not choose auto_migrate_db=False (see FAQ.md)")
+
+        if db is None:
+            if verbose:
+                print("DO Loading db: %s" % langchain_mode, flush=True)
+            got_embedding, use_openai_embedding0, hf_embedding_model0 = load_embed(persist_directory=persist_directory)
+            if got_embedding:
+                use_openai_embedding, hf_embedding_model = use_openai_embedding0, hf_embedding_model0
+            embedding = get_embedding(use_openai_embedding, hf_embedding_model=hf_embedding_model)
+            import logging
+            logging.getLogger("chromadb").setLevel(logging.ERROR)
+            if use_chromamigdb:
+                from chromamigdb.config import Settings
+                chroma_class = ChromaMig
+            else:
+                from chromadb.config import Settings
+                chroma_class = Chroma
+            client_settings = Settings(anonymized_telemetry=False,
+                                       **chroma_settings,
+                                       persist_directory=persist_directory)
+            db = chroma_class(persist_directory=persist_directory, embedding_function=embedding,
+                              collection_name=langchain_mode.replace(' ', '_'),
+                              client_settings=client_settings)
+            try:
+                db.similarity_search('')
+            except BaseException as e:
+                # migration when no embed_info
+                if 'Dimensionality of (768) does not match index dimensionality (384)' in str(e) or \
+                        'Embedding dimension 768 does not match collection dimensionality 384' in str(e):
+                    hf_embedding_model = "sentence-transformers/all-MiniLM-L6-v2"
+                    embedding = get_embedding(use_openai_embedding, hf_embedding_model=hf_embedding_model)
+                    db = chroma_class(persist_directory=persist_directory, embedding_function=embedding,
+                                      collection_name=langchain_mode.replace(' ', '_'),
+                                      client_settings=client_settings)
+                    # should work now, let fail if not
+                    db.similarity_search('')
+                    save_embed(db, use_openai_embedding, hf_embedding_model)
+                else:
+                    raise
+
+            if verbose:
+                print("DONE Loading db: %s" % langchain_mode, flush=True)
+        else:
+            if not migrate_embedding_model:
+                # OVERRIDE embedding choices if could load embedding info when not migrating
+                got_embedding, use_openai_embedding, hf_embedding_model = load_embed(db=db)
+            if verbose:
+                print("USING already-loaded db: %s" % langchain_mode, flush=True)
+        if check_embedding:
+            db_trial, changed_db = check_update_chroma_embedding(db,
+                                                                 db_type,
+                                                                 use_openai_embedding,
+                                                                 hf_embedding_model,
+                                                                 migrate_embedding_model,
+                                                                 auto_migrate_db,
+                                                                 langchain_mode,
+                                                                 langchain_mode_paths,
+                                                                 langchain_mode_types,
+                                                                 n_jobs=n_jobs)
+            if changed_db:
+                db = db_trial
+                # only call persist if really changed db, else takes too long for large db
+                if db is not None:
+                    db.persist()
+                    clear_embedding(db)
+        save_embed(db, use_openai_embedding, hf_embedding_model)
+        if migrate_meta and db is not None:
+            db_trial, changed_db = migrate_meta_func(db, langchain_mode)
+            if changed_db:
+                db = db_trial
+        return db, use_openai_embedding, hf_embedding_model
+    return db, use_openai_embedding, hf_embedding_model
+
+
+def clear_embedding(db):
+    if db is None:
+        return
+    # don't keep on GPU, wastes memory, push back onto CPU and only put back on GPU once again embed
+    try:
+        if hasattr(db._embedding_function, 'client') and hasattr(db._embedding_function.client, 'cpu'):
+            # only push back to CPU if each db/user has own embedding model, else if shared share on GPU
+            if hasattr(db._embedding_function.client, 'preload') and not db._embedding_function.client.preload:
+                db._embedding_function.client.cpu()
+                clear_torch_cache()
+    except RuntimeError as e:
+        print("clear_embedding error: %s" % ''.join(traceback.format_tb(e.__traceback__)), flush=True)
+
+
+def make_db(**langchain_kwargs):
+    func_names = list(inspect.signature(_make_db).parameters)
+    missing_kwargs = [x for x in func_names if x not in langchain_kwargs]
+    defaults_db = {k: v.default for k, v in dict(inspect.signature(run_qa_db).parameters).items()}
+    for k in missing_kwargs:
+        if k in defaults_db:
+            langchain_kwargs[k] = defaults_db[k]
+    # final check for missing
+    missing_kwargs = [x for x in func_names if x not in langchain_kwargs]
+    assert not missing_kwargs, "Missing kwargs for make_db: %s" % missing_kwargs
+    # only keep actual used
+    langchain_kwargs = {k: v for k, v in langchain_kwargs.items() if k in func_names}
+    return _make_db(**langchain_kwargs)
+
+
+embed_lock_name = 'embed.lock'
+
+
+def get_embed_lock_file(db, persist_directory=None):
+    if hasattr(db, '_persist_directory') or persist_directory:
+        if persist_directory is None:
+            persist_directory = db._persist_directory
+        check_persist_directory(persist_directory)
+        base_path = os.path.join('locks', persist_directory)
+        base_path = makedirs(base_path, exist_ok=True, tmp_ok=True, use_base=True)
+        lock_file = os.path.join(base_path, embed_lock_name)
+        makedirs(os.path.dirname(lock_file))
+        return lock_file
+    return None
+
+
+def save_embed(db, use_openai_embedding, hf_embedding_model):
+    if hasattr(db, '_persist_directory'):
+        persist_directory = db._persist_directory
+        lock_file = get_embed_lock_file(db)
+        with filelock.FileLock(lock_file):
+            embed_info_file = os.path.join(persist_directory, 'embed_info')
+            with open(embed_info_file, 'wb') as f:
+                if isinstance(hf_embedding_model, str):
+                    hf_embedding_model_save = hf_embedding_model
+                elif hasattr(hf_embedding_model, 'model_name'):
+                    hf_embedding_model_save = hf_embedding_model.model_name
+                elif isinstance(hf_embedding_model, dict) and 'name' in hf_embedding_model:
+                    hf_embedding_model_save = hf_embedding_model['name']
+                elif isinstance(hf_embedding_model, dict) and 'name' in hf_embedding_model:
+                    if os.getenv('HARD_ASSERTS'):
+                        # unexpected in testing or normally
+                        raise RuntimeError("HERE")
+                    hf_embedding_model_save = 'hkunlp/instructor-large'
+                pickle.dump((use_openai_embedding, hf_embedding_model_save), f)
+    return use_openai_embedding, hf_embedding_model
+
+
+def load_embed(db=None, persist_directory=None):
+    if hasattr(db, 'embeddings') and hasattr(db.embeddings, 'model_name'):
+        hf_embedding_model = db.embeddings.model_name if 'openai' not in db.embeddings.model_name.lower() else None
+        use_openai_embedding = hf_embedding_model is None
+        save_embed(db, use_openai_embedding, hf_embedding_model)
+        return True, use_openai_embedding, hf_embedding_model
+    if persist_directory is None:
+        persist_directory = db._persist_directory
+    embed_info_file = os.path.join(persist_directory, 'embed_info')
+    if os.path.isfile(embed_info_file):
+        lock_file = get_embed_lock_file(db, persist_directory=persist_directory)
+        with filelock.FileLock(lock_file):
+            with open(embed_info_file, 'rb') as f:
+                try:
+                    use_openai_embedding, hf_embedding_model = pickle.load(f)
+                    if not isinstance(hf_embedding_model, str):
+                        # work-around bug introduced here: https://github.com/h2oai/h2ogpt/commit/54c4414f1ce3b5b7c938def651c0f6af081c66de
+                        hf_embedding_model = 'hkunlp/instructor-large'
+                        # fix file
+                        save_embed(db, use_openai_embedding, hf_embedding_model)
+                    got_embedding = True
+                except EOFError:
+                    use_openai_embedding, hf_embedding_model = False, 'hkunlp/instructor-large'
+                    got_embedding = False
+                    if os.getenv('HARD_ASSERTS'):
+                        # unexpected in testing or normally
+                        raise
+    else:
+        # migration, assume defaults
+        use_openai_embedding, hf_embedding_model = False, "sentence-transformers/all-MiniLM-L6-v2"
+        got_embedding = False
+    assert isinstance(hf_embedding_model, str)
+    return got_embedding, use_openai_embedding, hf_embedding_model
+
+
+def get_persist_directory(langchain_mode, langchain_type=None, db1s=None, dbs=None):
+    if langchain_mode in [LangChainMode.DISABLED.value, LangChainMode.LLM.value]:
+        # not None so join works but will fail to find db
+        return '', langchain_type
+
+    userid = get_userid_direct(db1s)
+    username = get_username_direct(db1s)
+
+    # sanity for bad code
+    assert userid != 'None'
+    assert username != 'None'
+
+    dirid = username or userid
+    if langchain_type == LangChainTypes.SHARED.value and not dirid:
+        dirid = './'  # just to avoid error
+    if langchain_type == LangChainTypes.PERSONAL.value and not dirid:
+        # e.g. from client when doing transient calls with MyData
+        if db1s is None:
+            # just trick to get filled locally
+            db1s = {LangChainMode.MY_DATA.value: [None, None, None]}
+        set_userid_direct(db1s, str(uuid.uuid4()), str(uuid.uuid4()))
+        userid = get_userid_direct(db1s)
+        username = get_username_direct(db1s)
+        dirid = username or userid
+        langchain_type = LangChainTypes.PERSONAL.value
+
+    # deal with existing locations
+    user_base_dir = os.getenv('USERS_BASE_DIR', 'users')
+    persist_directory = os.path.join(user_base_dir, dirid, 'db_dir_%s' % langchain_mode)
+    if userid and \
+            (os.path.isdir(persist_directory) or
+             db1s is not None and langchain_mode in db1s or
+             langchain_type == LangChainTypes.PERSONAL.value):
+        langchain_type = LangChainTypes.PERSONAL.value
+        persist_directory = makedirs(persist_directory, use_base=True)
+        check_persist_directory(persist_directory)
+        return persist_directory, langchain_type
+
+    persist_directory = 'db_dir_%s' % langchain_mode
+    if (os.path.isdir(persist_directory) or
+            dbs is not None and langchain_mode in dbs or
+            langchain_type == LangChainTypes.SHARED.value):
+        # ensure consistent
+        langchain_type = LangChainTypes.SHARED.value
+        persist_directory = makedirs(persist_directory, use_base=True)
+        check_persist_directory(persist_directory)
+        return persist_directory, langchain_type
+
+    # dummy return for prep_langchain() or full personal space
+    base_others = 'db_nonusers'
+    persist_directory = os.path.join(base_others, 'db_dir_%s' % str(uuid.uuid4()))
+    persist_directory = makedirs(persist_directory, use_base=True)
+    langchain_type = LangChainTypes.PERSONAL.value
+
+    check_persist_directory(persist_directory)
+    return persist_directory, langchain_type
+
+
+def check_persist_directory(persist_directory):
+    # deal with some cases when see intrinsic names being used as shared
+    for langchain_mode in langchain_modes_intrinsic:
+        if persist_directory == 'db_dir_%s' % langchain_mode:
+            raise RuntimeError("Illegal access to %s" % persist_directory)
+
+
+def _make_db(use_openai_embedding=False,
+             hf_embedding_model=None,
+             migrate_embedding_model=False,
+             auto_migrate_db=False,
+             first_para=False, text_limit=None,
+             chunk=True, chunk_size=512,
+
+             # urls
+             use_unstructured=True,
+             use_playwright=False,
+             use_selenium=False,
+
+             # pdfs
+             use_pymupdf='auto',
+             use_unstructured_pdf='auto',
+             use_pypdf='auto',
+             enable_pdf_ocr='auto',
+             enable_pdf_doctr='auto',
+             try_pdf_as_html='auto',
+
+             # images
+             enable_ocr=False,
+             enable_doctr=False,
+             enable_pix2struct=False,
+             enable_captions=True,
+             captions_model=None,
+             caption_loader=None,
+             doctr_loader=None,
+             pix2struct_loader=None,
+
+             # json
+             jq_schema='.[]',
+
+             langchain_mode=None,
+             langchain_mode_paths=None,
+             langchain_mode_types=None,
+             db_type='faiss',
+             load_db_if_exists=True,
+             db=None,
+             n_jobs=-1,
+             verbose=False):
+    assert hf_embedding_model is not None
+    user_path = langchain_mode_paths.get(langchain_mode)
+    langchain_type = langchain_mode_types.get(langchain_mode, LangChainTypes.EITHER.value)
+    persist_directory, langchain_type = get_persist_directory(langchain_mode, langchain_type=langchain_type)
+    langchain_mode_types[langchain_mode] = langchain_type
+    # see if can get persistent chroma db
+    db_trial, use_openai_embedding, hf_embedding_model = \
+        get_existing_db(db, persist_directory, load_db_if_exists, db_type,
+                        use_openai_embedding,
+                        langchain_mode, langchain_mode_paths, langchain_mode_types,
+                        hf_embedding_model, migrate_embedding_model, auto_migrate_db, verbose=verbose,
+                        n_jobs=n_jobs)
+    if db_trial is not None:
+        db = db_trial
+
+    sources = []
+    if not db:
+        chunk_sources = functools.partial(_chunk_sources, chunk=chunk, chunk_size=chunk_size, db_type=db_type)
+        if langchain_mode in ['wiki_full']:
+            from read_wiki_full import get_all_documents
+            small_test = None
+            print("Generating new wiki", flush=True)
+            sources1 = get_all_documents(small_test=small_test, n_jobs=os.cpu_count() // 2)
+            print("Got new wiki", flush=True)
+            sources1 = chunk_sources(sources1, chunk=chunk)
+            print("Chunked new wiki", flush=True)
+            sources.extend(sources1)
+        elif langchain_mode in ['wiki']:
+            sources1 = get_wiki_sources(first_para=first_para, text_limit=text_limit)
+            sources1 = chunk_sources(sources1, chunk=chunk)
+            sources.extend(sources1)
+        elif langchain_mode in ['github h2oGPT']:
+            # sources = get_github_docs("dagster-io", "dagster")
+            sources1 = get_github_docs("h2oai", "h2ogpt")
+            # FIXME: always chunk for now
+            sources1 = chunk_sources(sources1)
+            sources.extend(sources1)
+        elif langchain_mode in ['DriverlessAI docs']:
+            sources1 = get_dai_docs(from_hf=True)
+            # FIXME: DAI docs are already chunked well, should only chunk more if over limit
+            sources1 = chunk_sources(sources1, chunk=False)
+            sources.extend(sources1)
+    if user_path:
+        # UserData or custom, which has to be from user's disk
+        if db is not None:
+            # NOTE: Ignore file names for now, only go by hash ids
+            # existing_files = get_existing_files(db)
+            existing_files = []
+            existing_hash_ids = get_existing_hash_ids(db)
+        else:
+            # pretend no existing files so won't filter
+            existing_files = []
+            existing_hash_ids = []
+        # chunk internally for speed over multiple docs
+        # FIXME: If first had old Hash=None and switch embeddings,
+        #  then re-embed, and then hit here and reload so have hash, and then re-embed.
+        sources1 = path_to_docs(user_path, n_jobs=n_jobs, chunk=chunk, chunk_size=chunk_size,
+                                # urls
+                                use_unstructured=use_unstructured,
+                                use_playwright=use_playwright,
+                                use_selenium=use_selenium,
+
+                                # pdfs
+                                use_pymupdf=use_pymupdf,
+                                use_unstructured_pdf=use_unstructured_pdf,
+                                use_pypdf=use_pypdf,
+                                enable_pdf_ocr=enable_pdf_ocr,
+                                enable_pdf_doctr=enable_pdf_doctr,
+                                try_pdf_as_html=try_pdf_as_html,
+
+                                # images
+                                enable_ocr=enable_ocr,
+                                enable_doctr=enable_doctr,
+                                enable_pix2struct=enable_pix2struct,
+                                enable_captions=enable_captions,
+                                captions_model=captions_model,
+                                caption_loader=caption_loader,
+                                doctr_loader=doctr_loader,
+                                pix2struct_loader=pix2struct_loader,
+
+                                # json
+                                jq_schema=jq_schema,
+
+                                existing_files=existing_files, existing_hash_ids=existing_hash_ids,
+                                db_type=db_type)
+        new_metadata_sources = set([x.metadata['source'] for x in sources1])
+        if new_metadata_sources:
+            if os.getenv('NO_NEW_FILES') is not None:
+                raise RuntimeError("Expected no new files! %s" % new_metadata_sources)
+            print("Loaded %s new files as sources to add to %s" % (len(new_metadata_sources), langchain_mode),
+                  flush=True)
+            if verbose:
+                print("Files added: %s" % '\n'.join(new_metadata_sources), flush=True)
+        sources.extend(sources1)
+        if len(sources) > 0 and os.getenv('NO_NEW_FILES') is not None:
+            raise RuntimeError("Expected no new files! %s" % langchain_mode)
+        if len(sources) == 0 and os.getenv('SHOULD_NEW_FILES') is not None:
+            raise RuntimeError("Expected new files! %s" % langchain_mode)
+        print("Loaded %s sources for potentially adding to %s" % (len(sources), langchain_mode), flush=True)
+
+        # see if got sources
+        if not sources:
+            if verbose:
+                if db is not None:
+                    print("langchain_mode %s has no new sources, nothing to add to db" % langchain_mode, flush=True)
+                else:
+                    print("langchain_mode %s has no sources, not making new db" % langchain_mode, flush=True)
+            return db, 0, []
+        if verbose:
+            if db is not None:
+                print("Generating db", flush=True)
+            else:
+                print("Adding to db", flush=True)
+    if not db:
+        if sources:
+            db = get_db(sources, use_openai_embedding=use_openai_embedding, db_type=db_type,
+                        persist_directory=persist_directory,
+                        langchain_mode=langchain_mode,
+                        langchain_mode_paths=langchain_mode_paths,
+                        langchain_mode_types=langchain_mode_types,
+                        hf_embedding_model=hf_embedding_model,
+                        migrate_embedding_model=migrate_embedding_model,
+                        auto_migrate_db=auto_migrate_db,
+                        n_jobs=n_jobs)
+            if verbose:
+                print("Generated db", flush=True)
+        elif langchain_mode not in langchain_modes_intrinsic:
+            print("Did not generate db for %s since no sources" % langchain_mode, flush=True)
+        new_sources_metadata = [x.metadata for x in sources]
+    elif user_path is not None:
+        print("Existing db, potentially adding %s sources from user_path=%s" % (len(sources), user_path), flush=True)
+        db, num_new_sources, new_sources_metadata = add_to_db(db, sources, db_type=db_type,
+                                                              use_openai_embedding=use_openai_embedding,
+                                                              hf_embedding_model=hf_embedding_model)
+        print("Existing db, added %s new sources from user_path=%s" % (num_new_sources, user_path), flush=True)
+    else:
+        new_sources_metadata = [x.metadata for x in sources]
+
+    return db, len(new_sources_metadata), new_sources_metadata
+
+
+def get_metadatas(db):
+    metadatas = []
+    from langchain.vectorstores import FAISS
+    if isinstance(db, FAISS):
+        metadatas = [v.metadata for k, v in db.docstore._dict.items()]
+    elif isinstance(db, Chroma) or isinstance(db, ChromaMig) or ChromaMig.__name__ in str(db):
+        metadatas = get_documents(db)['metadatas']
+    elif db is not None:
+        # FIXME: Hack due to https://github.com/weaviate/weaviate/issues/1947
+        # seems no way to get all metadata, so need to avoid this approach for weaviate
+        metadatas = [x.metadata for x in db.similarity_search("", k=10000)]
+    return metadatas
+
+
+def get_db_lock_file(db, lock_type='getdb'):
+    if hasattr(db, '_persist_directory'):
+        persist_directory = db._persist_directory
+        check_persist_directory(persist_directory)
+        base_path = os.path.join('locks', persist_directory)
+        base_path = makedirs(base_path, exist_ok=True, tmp_ok=True, use_base=True)
+        lock_file = os.path.join(base_path, "%s.lock" % lock_type)
+        makedirs(os.path.dirname(lock_file))  # ensure made
+        return lock_file
+    return None
+
+
+def get_documents(db):
+    if hasattr(db, '_persist_directory'):
+        lock_file = get_db_lock_file(db)
+        with filelock.FileLock(lock_file):
+            # get segfaults and other errors when multiple threads access this
+            return _get_documents(db)
+    else:
+        return _get_documents(db)
+
+
+def _get_documents(db):
+    from langchain.vectorstores import FAISS
+    if isinstance(db, FAISS):
+        documents = [v for k, v in db.docstore._dict.items()]
+        documents = dict(documents=documents)
+    elif isinstance(db, Chroma) or isinstance(db, ChromaMig) or ChromaMig.__name__ in str(db):
+        documents = db.get()
+    else:
+        # FIXME: Hack due to https://github.com/weaviate/weaviate/issues/1947
+        # seems no way to get all metadata, so need to avoid this approach for weaviate
+        documents = [x for x in db.similarity_search("", k=10000)]
+        documents = dict(documents=documents)
+    return documents
+
+
+def get_docs_and_meta(db, top_k_docs, filter_kwargs={}, text_context_list=None):
+    if hasattr(db, '_persist_directory'):
+        lock_file = get_db_lock_file(db)
+        with filelock.FileLock(lock_file):
+            return _get_docs_and_meta(db, top_k_docs, filter_kwargs=filter_kwargs, text_context_list=text_context_list)
+    else:
+        return _get_docs_and_meta(db, top_k_docs, filter_kwargs=filter_kwargs, text_context_list=text_context_list)
+
+
+def _get_docs_and_meta(db, top_k_docs, filter_kwargs={}, text_context_list=None):
+    db_documents = []
+    db_metadatas = []
+
+    if text_context_list:
+        db_documents += [x.page_content if hasattr(x, 'page_content') else x for x in text_context_list]
+        db_metadatas += [x.metadata if hasattr(x, 'metadata') else {} for x in text_context_list]
+
+    from langchain.vectorstores import FAISS
+    if isinstance(db, Chroma) or isinstance(db, ChromaMig) or ChromaMig.__name__ in str(db):
+        db_get = db._collection.get(where=filter_kwargs.get('filter'))
+        db_metadatas += db_get['metadatas']
+        db_documents += db_get['documents']
+    elif isinstance(db, FAISS):
+        import itertools
+        db_metadatas += get_metadatas(db)
+        # FIXME: FAISS has no filter
+        if top_k_docs == -1:
+            db_documents += list(db.docstore._dict.values())
+        else:
+            # slice dict first
+            db_documents += list(dict(itertools.islice(db.docstore._dict.items(), top_k_docs)).values())
+    elif db is not None:
+        db_metadatas += get_metadatas(db)
+        db_documents += get_documents(db)['documents']
+
+    return db_documents, db_metadatas
+
+
+def get_existing_files(db):
+    metadatas = get_metadatas(db)
+    metadata_sources = set([x['source'] for x in metadatas])
+    return metadata_sources
+
+
+def get_existing_hash_ids(db):
+    metadatas = get_metadatas(db)
+    # assume consistency, that any prior hashed source was single hashed file at the time among all source chunks
+    metadata_hash_ids = {os.path.normpath(x['source']): x.get('hashid') for x in metadatas}
+    return metadata_hash_ids
+
+
+def run_qa_db(**kwargs):
+    func_names = list(inspect.signature(_run_qa_db).parameters)
+    # hard-coded defaults
+    kwargs['answer_with_sources'] = kwargs.get('answer_with_sources', True)
+    kwargs['show_rank'] = kwargs.get('show_rank', False)
+    kwargs['show_accordions'] = kwargs.get('show_accordions', True)
+    kwargs['show_link_in_sources'] = kwargs.get('show_link_in_sources', True)
+    kwargs['top_k_docs_max_show'] = kwargs.get('top_k_docs_max_show', 10)
+    kwargs['llamacpp_dict'] = {}  # shouldn't be required unless from test using _run_qa_db
+    missing_kwargs = [x for x in func_names if x not in kwargs]
+    assert not missing_kwargs, "Missing kwargs for run_qa_db: %s" % missing_kwargs
+    # only keep actual used
+    kwargs = {k: v for k, v in kwargs.items() if k in func_names}
+    try:
+        return _run_qa_db(**kwargs)
+    finally:
+        clear_torch_cache()
+
+
+def _run_qa_db(query=None,
+               iinput=None,
+               context=None,
+               use_openai_model=False, use_openai_embedding=False,
+               first_para=False, text_limit=None, top_k_docs=4, chunk=True, chunk_size=512,
+
+               # urls
+               use_unstructured=True,
+               use_playwright=False,
+               use_selenium=False,
+
+               # pdfs
+               use_pymupdf='auto',
+               use_unstructured_pdf='auto',
+               use_pypdf='auto',
+               enable_pdf_ocr='auto',
+               enable_pdf_doctr='auto',
+               try_pdf_as_html='auto',
+
+               # images
+               enable_ocr=False,
+               enable_doctr=False,
+               enable_pix2struct=False,
+               enable_captions=True,
+               captions_model=None,
+               caption_loader=None,
+               doctr_loader=None,
+               pix2struct_loader=None,
+
+               # json
+               jq_schema='.[]',
+
+               langchain_mode_paths={},
+               langchain_mode_types={},
+               detect_user_path_changes_every_query=False,
+               db_type=None,
+               model_name=None, model=None, tokenizer=None, inference_server=None,
+               langchain_only_model=False,
+               hf_embedding_model=None,
+               migrate_embedding_model=False,
+               auto_migrate_db=False,
+               stream_output=False,
+               async_output=True,
+               num_async=3,
+               prompter=None,
+               prompt_type=None,
+               prompt_dict=None,
+               answer_with_sources=True,
+               append_sources_to_answer=True,
+               cut_distance=1.64,
+               add_chat_history_to_context=True,
+               add_search_to_context=False,
+               keep_sources_in_context=False,
+               memory_restriction_level=0,
+               system_prompt='',
+               sanitize_bot_response=False,
+               show_rank=False,
+               show_accordions=True,
+               show_link_in_sources=True,
+               top_k_docs_max_show=10,
+               use_llm_if_no_docs=True,
+               load_db_if_exists=False,
+               db=None,
+               do_sample=False,
+               temperature=0.1,
+               top_k=40,
+               top_p=0.7,
+               num_beams=1,
+               max_new_tokens=512,
+               min_new_tokens=1,
+               early_stopping=False,
+               max_time=180,
+               repetition_penalty=1.0,
+               num_return_sequences=1,
+               langchain_mode=None,
+               langchain_action=None,
+               langchain_agents=None,
+               document_subset=DocumentSubset.Relevant.name,
+               document_choice=[DocumentChoice.ALL.value],
+               pre_prompt_query=None,
+               prompt_query=None,
+               pre_prompt_summary=None,
+               prompt_summary=None,
+               text_context_list=None,
+               chat_conversation=None,
+               visible_models=None,
+               h2ogpt_key=None,
+               docs_ordering_type='reverse_ucurve_sort',
+               min_max_new_tokens=256,
+
+               n_jobs=-1,
+               llamacpp_dict=None,
+               verbose=False,
+               cli=False,
+               lora_weights='',
+               auto_reduce_chunks=True,
+               max_chunks=100,
+               total_tokens_for_docs=None,
+               headsize=50,
+               ):
+    """
+
+    :param query:
+    :param use_openai_model:
+    :param use_openai_embedding:
+    :param first_para:
+    :param text_limit:
+    :param top_k_docs:
+    :param chunk:
+    :param chunk_size:
+    :param langchain_mode_paths: dict of langchain_mode -> user path to glob recursively from
+    :param db_type: 'faiss' for in-memory
+                    'chroma' (for chroma >= 0.4)
+                    'chroma_old' (for chroma < 0.4)
+                    'weaviate' for persisted on disk
+    :param model_name: model name, used to switch behaviors
+    :param model: pre-initialized model, else will make new one
+    :param tokenizer: pre-initialized tokenizer, else will make new one.  Required not None if model is not None
+    :param answer_with_sources
+    :return:
+    """
+    t_run = time.time()
+    if stream_output:
+        # threads and asyncio don't mix
+        async_output = False
+    if langchain_action in [LangChainAction.QUERY.value]:
+        # only summarization supported
+        async_output = False
+
+    # in case None, e.g. lazy client, then set based upon actual model
+    pre_prompt_query, prompt_query, pre_prompt_summary, prompt_summary = \
+        get_langchain_prompts(pre_prompt_query, prompt_query,
+                              pre_prompt_summary, prompt_summary,
+                              model_name, inference_server,
+                              llamacpp_dict.get('model_path_llama'))
+
+    assert db_type is not None
+    assert hf_embedding_model is not None
+    assert langchain_mode_paths is not None
+    assert langchain_mode_types is not None
+    if model is not None:
+        assert model_name is not None  # require so can make decisions
+    assert query is not None
+    assert prompter is not None or prompt_type is not None or model is None  # if model is None, then will generate
+    if prompter is not None:
+        prompt_type = prompter.prompt_type
+        prompt_dict = prompter.prompt_dict
+    if model is not None:
+        assert prompt_type is not None
+        if prompt_type == PromptType.custom.name:
+            assert prompt_dict is not None  # should at least be {} or ''
+        else:
+            prompt_dict = ''
+
+    if LangChainAgent.SEARCH.value in langchain_agents and 'llama' in model_name.lower():
+        system_prompt = """You are a zero shot react agent.
+Consider to prompt of Question that was original query from the user.
+Respond to prompt of Thought with a thought that may lead to a reasonable new action choice.
+Respond to prompt of Action with an action to take out of the tools given, giving exactly single word for the tool name.
+Respond to prompt of Action Input with an input to give the tool.
+Consider to prompt of Observation that was response from the tool.
+Repeat this Thought, Action, Action Input, Observation, Thought sequence several times with new and different thoughts and actions each time, do not repeat.
+Once satisfied that the thoughts, responses are sufficient to answer the question, then respond to prompt of Thought with: I now know the final answer
+Respond to prompt of Final Answer with your final high-quality bullet list answer to the original query.
+"""
+        prompter.system_prompt = system_prompt
+
+    assert len(set(gen_hyper).difference(inspect.signature(get_llm).parameters)) == 0
+    # pass in context to LLM directly, since already has prompt_type structure
+    # can't pass through langchain in get_chain() to LLM: https://github.com/hwchase17/langchain/issues/6638
+    llm, model_name, streamer, prompt_type_out, async_output, only_new_text = \
+        get_llm(use_openai_model=use_openai_model, model_name=model_name,
+                model=model,
+                tokenizer=tokenizer,
+                inference_server=inference_server,
+                langchain_only_model=langchain_only_model,
+                stream_output=stream_output,
+                async_output=async_output,
+                num_async=num_async,
+                do_sample=do_sample,
+                temperature=temperature,
+                top_k=top_k,
+                top_p=top_p,
+                num_beams=num_beams,
+                max_new_tokens=max_new_tokens,
+                min_new_tokens=min_new_tokens,
+                early_stopping=early_stopping,
+                max_time=max_time,
+                repetition_penalty=repetition_penalty,
+                num_return_sequences=num_return_sequences,
+                prompt_type=prompt_type,
+                prompt_dict=prompt_dict,
+                prompter=prompter,
+                context=context,
+                iinput=iinput,
+                sanitize_bot_response=sanitize_bot_response,
+                system_prompt=system_prompt,
+                visible_models=visible_models,
+                h2ogpt_key=h2ogpt_key,
+                min_max_new_tokens=min_max_new_tokens,
+                n_jobs=n_jobs,
+                llamacpp_dict=llamacpp_dict,
+                cli=cli,
+                verbose=verbose,
+                )
+    # in case change, override original prompter
+    if hasattr(llm, 'prompter'):
+        prompter = llm.prompter
+    if hasattr(llm, 'pipeline') and hasattr(llm.pipeline, 'prompter'):
+        prompter = llm.pipeline.prompter
+
+    if prompter is None:
+        if prompt_type is None:
+            prompt_type = prompt_type_out
+        # get prompter
+        chat = True  # FIXME?
+        prompter = Prompter(prompt_type, prompt_dict, debug=False, chat=chat, stream_output=stream_output,
+                            system_prompt=system_prompt)
+
+    use_docs_planned = False
+    scores = []
+    chain = None
+
+    # basic version of prompt without docs etc.
+    data_point = dict(context=context, instruction=query, input=iinput)
+    prompt_basic = prompter.generate_prompt(data_point)
+
+    if isinstance(document_choice, str):
+        # support string as well
+        document_choice = [document_choice]
+
+    func_names = list(inspect.signature(get_chain).parameters)
+    sim_kwargs = {k: v for k, v in locals().items() if k in func_names}
+    missing_kwargs = [x for x in func_names if x not in sim_kwargs]
+    assert not missing_kwargs, "Missing: %s" % missing_kwargs
+    docs, chain, scores, \
+        use_docs_planned, num_docs_before_cut, \
+        use_llm_if_no_docs, llm_mode, top_k_docs_max_show = \
+        get_chain(**sim_kwargs)
+    if document_subset in non_query_commands:
+        formatted_doc_chunks = '\n\n'.join([get_url(x) + '\n\n' + x.page_content for x in docs])
+        if not formatted_doc_chunks and not use_llm_if_no_docs:
+            yield dict(prompt=prompt_basic, response="No sources", sources='', num_prompt_tokens=0)
+            return
+        # if no souces, outside gpt_langchain, LLM will be used with '' input
+        scores = [1] * len(docs)
+        get_answer_args = tuple([query, docs, formatted_doc_chunks, scores, show_rank,
+                                 answer_with_sources,
+                                 append_sources_to_answer])
+        get_answer_kwargs = dict(show_accordions=show_accordions,
+                                 show_link_in_sources=show_link_in_sources,
+                                 top_k_docs_max_show=top_k_docs_max_show,
+                                 docs_ordering_type=docs_ordering_type,
+                                 num_docs_before_cut=num_docs_before_cut,
+                                 verbose=verbose)
+        ret, extra = get_sources_answer(*get_answer_args, **get_answer_kwargs)
+        yield dict(prompt=prompt_basic, response=formatted_doc_chunks, sources=extra, num_prompt_tokens=0)
+        return
+    if not use_llm_if_no_docs:
+        if not docs and langchain_action in [LangChainAction.SUMMARIZE_MAP.value,
+                                             LangChainAction.SUMMARIZE_ALL.value,
+                                             LangChainAction.SUMMARIZE_REFINE.value]:
+            ret = 'No relevant documents to summarize.' if num_docs_before_cut else 'No documents to summarize.'
+            extra = ''
+            yield dict(prompt=prompt_basic, response=ret, sources=extra, num_prompt_tokens=0)
+            return
+        if not docs and not llm_mode:
+            ret = 'No relevant documents to query (for chatting with LLM, pick Resources->Collections->LLM).' if num_docs_before_cut else 'No documents to query (for chatting with LLM, pick Resources->Collections->LLM).'
+            extra = ''
+            yield dict(prompt=prompt_basic, response=ret, sources=extra, num_prompt_tokens=0)
+            return
+
+    if chain is None and not langchain_only_model:
+        # here if no docs at all and not HF type
+        # can only return if HF type
+        return
+
+    # context stuff similar to used in evaluate()
+    import torch
+    device, torch_dtype, context_class = get_device_dtype()
+    conditional_type = hasattr(llm, 'pipeline') and hasattr(llm.pipeline, 'model') and hasattr(llm.pipeline.model,
+                                                                                               'conditional_type') and llm.pipeline.model.conditional_type
+    with torch.no_grad():
+        have_lora_weights = lora_weights not in [no_lora_str, '', None]
+        context_class_cast = NullContext if device == 'cpu' or have_lora_weights else torch.autocast
+        if conditional_type:
+            # issues when casting to float16, can mess up t5 model, e.g. only when not streaming, or other odd behaviors
+            context_class_cast = NullContext
+        with context_class_cast(device):
+            if stream_output and streamer:
+                answer = None
+                import queue
+                bucket = queue.Queue()
+                thread = EThread(target=chain, streamer=streamer, bucket=bucket)
+                thread.start()
+                outputs = ""
+                try:
+                    for new_text in streamer:
+                        # print("new_text: %s" % new_text, flush=True)
+                        if bucket.qsize() > 0 or thread.exc:
+                            thread.join()
+                        outputs += new_text
+                        if prompter:  # and False:  # FIXME: pipeline can already use prompter
+                            if conditional_type:
+                                if prompter.botstr:
+                                    prompt = prompter.botstr
+                                    output_with_prompt = prompt + outputs
+                                    only_new_text = False
+                                else:
+                                    prompt = None
+                                    output_with_prompt = outputs
+                                    only_new_text = True
+                            else:
+                                prompt = None  # FIXME
+                                output_with_prompt = outputs
+                                # don't specify only_new_text here, use get_llm() value
+                            output1 = prompter.get_response(output_with_prompt, prompt=prompt,
+                                                            only_new_text=only_new_text,
+                                                            sanitize_bot_response=sanitize_bot_response)
+                            yield dict(prompt=prompt, response=output1, sources='', num_prompt_tokens=0)
+                        else:
+                            yield dict(prompt=prompt, response=outputs, sources='', num_prompt_tokens=0)
+                except BaseException:
+                    # if any exception, raise that exception if was from thread, first
+                    if thread.exc:
+                        raise thread.exc
+                    raise
+                finally:
+                    # in case no exception and didn't join with thread yet, then join
+                    if not thread.exc:
+                        answer = thread.join()
+                        if isinstance(answer, dict):
+                            if 'output_text' in answer:
+                                answer = answer['output_text']
+                            elif 'output' in answer:
+                                answer = answer['output']
+                # in case raise StopIteration or broke queue loop in streamer, but still have exception
+                if thread.exc:
+                    raise thread.exc
+            else:
+                if async_output:
+                    import asyncio
+                    answer = asyncio.run(chain())
+                else:
+                    answer = chain()
+                    if isinstance(answer, dict):
+                        if 'output_text' in answer:
+                            answer = answer['output_text']
+                        elif 'output' in answer:
+                            answer = answer['output']
+
+    get_answer_args = tuple([query, docs, answer, scores, show_rank,
+                             answer_with_sources,
+                             append_sources_to_answer])
+    get_answer_kwargs = dict(show_accordions=show_accordions,
+                             show_link_in_sources=show_link_in_sources,
+                             top_k_docs_max_show=top_k_docs_max_show,
+                             docs_ordering_type=docs_ordering_type,
+                             num_docs_before_cut=num_docs_before_cut,
+                             verbose=verbose,
+                             t_run=t_run,
+                             count_input_tokens=llm.count_input_tokens
+                             if hasattr(llm, 'count_input_tokens') else None,
+                             count_output_tokens=llm.count_output_tokens
+                             if hasattr(llm, 'count_output_tokens') else None)
+
+    t_run = time.time() - t_run
+
+    # for final yield, get real prompt used
+    if hasattr(llm, 'prompter') and llm.prompter.prompt is not None:
+        prompt = llm.prompter.prompt
+    else:
+        prompt = prompt_basic
+    num_prompt_tokens = get_token_count(prompt, tokenizer)
+
+    if not use_docs_planned:
+        ret = answer
+        extra = ''
+        yield dict(prompt=prompt, response=ret, sources=extra, num_prompt_tokens=num_prompt_tokens)
+    elif answer is not None:
+        ret, extra = get_sources_answer(*get_answer_args, **get_answer_kwargs)
+        yield dict(prompt=prompt, response=ret, sources=extra, num_prompt_tokens=num_prompt_tokens)
+    return
+
+
+def get_docs_with_score(query, k_db, filter_kwargs, db, db_type, text_context_list=None, verbose=False):
+    docs_with_score = []
+    got_db_docs = False
+
+    if text_context_list:
+        docs_with_score += [(x, x.metadata.get('score', 1.0)) for x in text_context_list]
+
+    # deal with bug in chroma where if (say) 234 doc chunks and ask for 233+ then fails due to reduction misbehavior
+    if hasattr(db, '_embedding_function') and isinstance(db._embedding_function, FakeEmbeddings):
+        top_k_docs = -1
+        # don't add text_context_list twice
+        db_documents, db_metadatas = get_docs_and_meta(db, top_k_docs, filter_kwargs=filter_kwargs,
+                                                       text_context_list=None)
+        # sort by order given to parser (file_id) and any chunk_id if chunked
+        doc_file_ids = [x.get('file_id', 0) for x in db_metadatas]
+        doc_chunk_ids = [x.get('chunk_id', 0) for x in db_metadatas]
+        docs_with_score_fake = [(Document(page_content=result[0], metadata=result[1] or {}), 1.0)
+                                for result in zip(db_documents, db_metadatas)]
+        docs_with_score_fake = [x for fx, cx, x in
+                                sorted(zip(doc_file_ids, doc_chunk_ids, docs_with_score_fake),
+                                       key=lambda x: (x[0], x[1]))
+                                ]
+        got_db_docs |= len(docs_with_score_fake) > 0
+        docs_with_score += docs_with_score_fake
+    elif db is not None and db_type in ['chroma', 'chroma_old']:
+        while True:
+            try:
+                docs_with_score_chroma = db.similarity_search_with_score(query, k=k_db, **filter_kwargs)
+                break
+            except (RuntimeError, AttributeError) as e:
+                # AttributeError is for people with wrong version of langchain
+                if verbose:
+                    print("chroma bug: %s" % str(e), flush=True)
+                if k_db == 1:
+                    raise
+                if k_db > 500:
+                    k_db -= 200
+                elif k_db > 100:
+                    k_db -= 50
+                elif k_db > 10:
+                    k_db -= 5
+                else:
+                    k_db -= 1
+                k_db = max(1, k_db)
+        got_db_docs |= len(docs_with_score_chroma) > 0
+        docs_with_score += docs_with_score_chroma
+    elif db is not None:
+        docs_with_score_other = db.similarity_search_with_score(query, k=k_db, **filter_kwargs)
+        got_db_docs |= len(docs_with_score_other) > 0
+        docs_with_score += docs_with_score_other
+
+    # set in metadata original order of docs
+    [x[0].metadata.update(orig_index=ii) for ii, x in enumerate(docs_with_score)]
+
+    return docs_with_score, got_db_docs
+
+
+def get_chain(query=None,
+              iinput=None,
+              context=None,  # FIXME: https://github.com/hwchase17/langchain/issues/6638
+              use_openai_model=False, use_openai_embedding=False,
+              first_para=False, text_limit=None, top_k_docs=4, chunk=True, chunk_size=512,
+
+              # urls
+              use_unstructured=True,
+              use_playwright=False,
+              use_selenium=False,
+
+              # pdfs
+              use_pymupdf='auto',
+              use_unstructured_pdf='auto',
+              use_pypdf='auto',
+              enable_pdf_ocr='auto',
+              enable_pdf_doctr='auto',
+              try_pdf_as_html='auto',
+
+              # images
+              enable_ocr=False,
+              enable_doctr=False,
+              enable_pix2struct=False,
+              enable_captions=True,
+              captions_model=None,
+              caption_loader=None,
+              doctr_loader=None,
+              pix2struct_loader=None,
+
+              # json
+              jq_schema='.[]',
+
+              langchain_mode_paths=None,
+              langchain_mode_types=None,
+              detect_user_path_changes_every_query=False,
+              db_type='faiss',
+              model_name=None,
+              inference_server='',
+              max_new_tokens=None,
+              langchain_only_model=False,
+              hf_embedding_model=None,
+              migrate_embedding_model=False,
+              auto_migrate_db=False,
+              prompter=None,
+              prompt_type=None,
+              prompt_dict=None,
+              system_prompt=None,
+              cut_distance=1.1,
+              add_chat_history_to_context=True,  # FIXME: https://github.com/hwchase17/langchain/issues/6638
+              add_search_to_context=False,
+              keep_sources_in_context=False,
+              memory_restriction_level=0,
+              top_k_docs_max_show=10,
+
+              load_db_if_exists=False,
+              db=None,
+              langchain_mode=None,
+              langchain_action=None,
+              langchain_agents=None,
+              document_subset=DocumentSubset.Relevant.name,
+              document_choice=[DocumentChoice.ALL.value],
+              pre_prompt_query=None,
+              prompt_query=None,
+              pre_prompt_summary=None,
+              prompt_summary=None,
+              text_context_list=None,
+              chat_conversation=None,
+
+              n_jobs=-1,
+              # beyond run_db_query:
+              llm=None,
+              tokenizer=None,
+              verbose=False,
+              docs_ordering_type='reverse_ucurve_sort',
+              min_max_new_tokens=256,
+              stream_output=True,
+              async_output=True,
+
+              # local
+              auto_reduce_chunks=True,
+              max_chunks=100,
+              total_tokens_for_docs=None,
+              use_llm_if_no_docs=None,
+              headsize=50,
+              ):
+    if inference_server is None:
+        inference_server = ''
+    assert hf_embedding_model is not None
+    assert langchain_agents is not None  # should be at least []
+    if text_context_list is None:
+        text_context_list = []
+
+    # default value:
+    llm_mode = langchain_mode in ['Disabled', 'LLM'] and len(text_context_list) == 0
+    query_action = langchain_action == LangChainAction.QUERY.value
+    summarize_action = langchain_action in [LangChainAction.SUMMARIZE_MAP.value,
+                                            LangChainAction.SUMMARIZE_ALL.value,
+                                            LangChainAction.SUMMARIZE_REFINE.value]
+
+    if len(text_context_list) > 0:
+        # turn into documents to make easy to manage and add meta
+        # try to account for summarization vs. query
+        chunk_id = 0 if query_action else -1
+        text_context_list = [
+            Document(page_content=x, metadata=dict(source='text_context_list', score=1.0, chunk_id=chunk_id)) for x
+            in text_context_list]
+
+    if add_search_to_context:
+        params = {
+            "engine": "duckduckgo",
+            "gl": "us",
+            "hl": "en",
+        }
+        search = H2OSerpAPIWrapper(params=params)
+        # if doing search, allow more docs
+        docs_search, top_k_docs = search.get_search_documents(query,
+                                                              query_action=query_action,
+                                                              chunk=chunk, chunk_size=chunk_size,
+                                                              db_type=db_type,
+                                                              headsize=headsize,
+                                                              top_k_docs=top_k_docs)
+        text_context_list = docs_search + text_context_list
+        add_search_to_context &= len(docs_search) > 0
+        top_k_docs_max_show = max(top_k_docs_max_show, len(docs_search))
+
+    if len(text_context_list) > 0:
+        llm_mode = False
+    use_llm_if_no_docs = True
+
+    from src.output_parser import H2OMRKLOutputParser
+    from langchain.agents import AgentType, load_tools, initialize_agent, create_vectorstore_agent, \
+        create_pandas_dataframe_agent, create_json_agent, create_csv_agent
+    from langchain.agents.agent_toolkits import VectorStoreInfo, VectorStoreToolkit, create_python_agent, JsonToolkit
+    if LangChainAgent.SEARCH.value in langchain_agents:
+        output_parser = H2OMRKLOutputParser()
+        tools = load_tools(["serpapi"], llm=llm, serpapi_api_key=os.environ.get('SERPAPI_API_KEY'))
+        if inference_server.startswith('openai'):
+            agent_type = AgentType.OPENAI_FUNCTIONS
+            agent_executor_kwargs = {"handle_parsing_errors": True, 'output_parser': output_parser}
+        else:
+            agent_type = AgentType.ZERO_SHOT_REACT_DESCRIPTION
+            agent_executor_kwargs = {'output_parser': output_parser}
+        chain = initialize_agent(tools, llm, agent=agent_type,
+                                 agent_executor_kwargs=agent_executor_kwargs,
+                                 agent_kwargs=dict(output_parser=output_parser,
+                                                   format_instructions=output_parser.get_format_instructions()),
+                                 output_parser=output_parser,
+                                 max_iterations=10,
+                                 verbose=True)
+        chain_kwargs = dict(input=query)
+        target = wrapped_partial(chain, chain_kwargs)
+
+        docs = []
+        scores = []
+        use_docs_planned = False
+        num_docs_before_cut = 0
+        use_llm_if_no_docs = True
+        return docs, target, scores, use_docs_planned, num_docs_before_cut, use_llm_if_no_docs, llm_mode, top_k_docs_max_show
+
+    if LangChainAgent.COLLECTION.value in langchain_agents:
+        output_parser = H2OMRKLOutputParser()
+        vectorstore_info = VectorStoreInfo(
+            name=langchain_mode,
+            description="DataBase of text from PDFs, Image Captions, or web URL content",
+            vectorstore=db,
+        )
+        toolkit = VectorStoreToolkit(vectorstore_info=vectorstore_info)
+        chain = create_vectorstore_agent(llm=llm, toolkit=toolkit,
+                                         agent_executor_kwargs=dict(output_parser=output_parser),
+                                         verbose=True)
+
+        chain_kwargs = dict(input=query)
+        target = wrapped_partial(chain, chain_kwargs)
+
+        docs = []
+        scores = []
+        use_docs_planned = False
+        num_docs_before_cut = 0
+        use_llm_if_no_docs = True
+        return docs, target, scores, use_docs_planned, num_docs_before_cut, use_llm_if_no_docs, llm_mode, top_k_docs_max_show
+
+    if LangChainAgent.PYTHON.value in langchain_agents and inference_server.startswith('openai'):
+        chain = create_python_agent(
+            llm=llm,
+            tool=PythonREPLTool(),
+            verbose=True,
+            agent_type=AgentType.OPENAI_FUNCTIONS,
+            agent_executor_kwargs={"handle_parsing_errors": True},
+        )
+
+        chain_kwargs = dict(input=query)
+        target = wrapped_partial(chain, chain_kwargs)
+
+        docs = []
+        scores = []
+        use_docs_planned = False
+        num_docs_before_cut = 0
+        use_llm_if_no_docs = True
+        return docs, target, scores, use_docs_planned, num_docs_before_cut, use_llm_if_no_docs, llm_mode, top_k_docs_max_show
+
+    if LangChainAgent.PANDAS.value in langchain_agents and inference_server.startswith('openai_chat'):
+        # FIXME: DATA
+        df = pd.DataFrame(None)
+        chain = create_pandas_dataframe_agent(
+            llm,
+            df,
+            verbose=True,
+            agent_type=AgentType.OPENAI_FUNCTIONS,
+        )
+
+        chain_kwargs = dict(input=query)
+        target = wrapped_partial(chain, chain_kwargs)
+
+        docs = []
+        scores = []
+        use_docs_planned = False
+        num_docs_before_cut = 0
+        use_llm_if_no_docs = True
+        return docs, target, scores, use_docs_planned, num_docs_before_cut, use_llm_if_no_docs, llm_mode, top_k_docs_max_show
+
+    if isinstance(document_choice, str):
+        document_choice = [document_choice]
+    if document_choice and document_choice[0] == DocumentChoice.ALL.value:
+        document_choice_agent = document_choice[1:]
+    else:
+        document_choice_agent = document_choice
+    document_choice_agent = [x for x in document_choice_agent if x.endswith('.json')]
+    if LangChainAgent.JSON.value in \
+            langchain_agents and \
+            inference_server.startswith('openai_chat') and \
+            len(document_choice_agent) == 1 and \
+            document_choice_agent[0].endswith('.json'):
+        # with open('src/openai.yaml') as f:
+        #    data = yaml.load(f, Loader=yaml.FullLoader)
+        with open(document_choice[0], 'rt') as f:
+            data = json.loads(f.read())
+        json_spec = JsonSpec(dict_=data, max_value_length=4000)
+        json_toolkit = JsonToolkit(spec=json_spec)
+
+        chain = create_json_agent(
+            llm=llm, toolkit=json_toolkit, verbose=True
+        )
+
+        chain_kwargs = dict(input=query)
+        target = wrapped_partial(chain, chain_kwargs)
+
+        docs = []
+        scores = []
+        use_docs_planned = False
+        num_docs_before_cut = 0
+        use_llm_if_no_docs = True
+        return docs, target, scores, use_docs_planned, num_docs_before_cut, use_llm_if_no_docs, llm_mode, top_k_docs_max_show
+
+    if isinstance(document_choice, str):
+        document_choice = [document_choice]
+    if document_choice and document_choice[0] == DocumentChoice.ALL.value:
+        document_choice_agent = document_choice[1:]
+    else:
+        document_choice_agent = document_choice
+    document_choice_agent = [x for x in document_choice_agent if x.endswith('.csv')]
+    if LangChainAgent.CSV.value in langchain_agents and len(document_choice_agent) == 1 and document_choice_agent[
+        0].endswith(
+            '.csv'):
+        data_file = document_choice[0]
+        if inference_server.startswith('openai_chat'):
+            chain = create_csv_agent(
+                llm,
+                data_file,
+                verbose=True,
+                agent_type=AgentType.OPENAI_FUNCTIONS,
+            )
+        else:
+            chain = create_csv_agent(
+                llm,
+                data_file,
+                verbose=True,
+                agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
+            )
+        chain_kwargs = dict(input=query)
+        target = wrapped_partial(chain, chain_kwargs)
+
+        docs = []
+        scores = []
+        use_docs_planned = False
+        num_docs_before_cut = 0
+        use_llm_if_no_docs = True
+        return docs, target, scores, use_docs_planned, num_docs_before_cut, use_llm_if_no_docs, llm_mode, top_k_docs_max_show
+
+    # determine whether use of context out of docs is planned
+    if not use_openai_model and prompt_type not in ['plain'] or langchain_only_model:
+        if llm_mode:
+            use_docs_planned = False
+        else:
+            use_docs_planned = True
+    else:
+        use_docs_planned = True
+
+    # https://github.com/hwchase17/langchain/issues/1946
+    # FIXME: Seems to way to get size of chroma db to limit top_k_docs to avoid
+    # Chroma collection MyData contains fewer than 4 elements.
+    # type logger error
+    if top_k_docs == -1:
+        k_db = 1000 if db_type in ['chroma', 'chroma_old'] else 100
+    else:
+        # top_k_docs=100 works ok too
+        k_db = 1000 if db_type in ['chroma', 'chroma_old'] else top_k_docs
+
+    # FIXME: For All just go over all dbs instead of a separate db for All
+    if not detect_user_path_changes_every_query and db is not None:
+        # avoid looking at user_path during similarity search db handling,
+        # if already have db and not updating from user_path every query
+        # but if db is None, no db yet loaded (e.g. from prep), so allow user_path to be whatever it was
+        if langchain_mode_paths is None:
+            langchain_mode_paths = {}
+        langchain_mode_paths = langchain_mode_paths.copy()
+        langchain_mode_paths[langchain_mode] = None
+    # once use_openai_embedding, hf_embedding_model passed in, possibly changed,
+    # but that's ok as not used below or in calling functions
+    db, num_new_sources, new_sources_metadata = make_db(use_openai_embedding=use_openai_embedding,
+                                                        hf_embedding_model=hf_embedding_model,
+                                                        migrate_embedding_model=migrate_embedding_model,
+                                                        auto_migrate_db=auto_migrate_db,
+                                                        first_para=first_para, text_limit=text_limit,
+                                                        chunk=chunk, chunk_size=chunk_size,
+
+                                                        # urls
+                                                        use_unstructured=use_unstructured,
+                                                        use_playwright=use_playwright,
+                                                        use_selenium=use_selenium,
+
+                                                        # pdfs
+                                                        use_pymupdf=use_pymupdf,
+                                                        use_unstructured_pdf=use_unstructured_pdf,
+                                                        use_pypdf=use_pypdf,
+                                                        enable_pdf_ocr=enable_pdf_ocr,
+                                                        enable_pdf_doctr=enable_pdf_doctr,
+                                                        try_pdf_as_html=try_pdf_as_html,
+
+                                                        # images
+                                                        enable_ocr=enable_ocr,
+                                                        enable_doctr=enable_doctr,
+                                                        enable_pix2struct=enable_pix2struct,
+                                                        enable_captions=enable_captions,
+                                                        captions_model=captions_model,
+                                                        caption_loader=caption_loader,
+                                                        doctr_loader=doctr_loader,
+                                                        pix2struct_loader=pix2struct_loader,
+
+                                                        # json
+                                                        jq_schema=jq_schema,
+
+                                                        langchain_mode=langchain_mode,
+                                                        langchain_mode_paths=langchain_mode_paths,
+                                                        langchain_mode_types=langchain_mode_types,
+                                                        db_type=db_type,
+                                                        load_db_if_exists=load_db_if_exists,
+                                                        db=db,
+                                                        n_jobs=n_jobs,
+                                                        verbose=verbose)
+    num_docs_before_cut = 0
+    use_template = not use_openai_model and prompt_type not in ['plain'] or langchain_only_model
+    got_db_docs = False  # not yet at least
+    template, template_if_no_docs, auto_reduce_chunks, query = \
+        get_template(query, iinput,
+                     pre_prompt_query, prompt_query,
+                     pre_prompt_summary, prompt_summary,
+                     langchain_action,
+                     llm_mode,
+                     use_docs_planned,
+                     auto_reduce_chunks,
+                     got_db_docs,
+                     add_search_to_context)
+
+    max_input_tokens = get_max_input_tokens(llm=llm, tokenizer=tokenizer, inference_server=inference_server,
+                                            model_name=model_name, max_new_tokens=max_new_tokens)
+
+    if (db or text_context_list) and use_docs_planned:
+        if hasattr(db, '_persist_directory'):
+            lock_file = get_db_lock_file(db, lock_type='sim')
+        else:
+            base_path = 'locks'
+            base_path = makedirs(base_path, exist_ok=True, tmp_ok=True, use_base=True)
+            name_path = "sim.lock"
+            lock_file = os.path.join(base_path, name_path)
+
+        if not (isinstance(db, Chroma) or isinstance(db, ChromaMig) or ChromaMig.__name__ in str(db)):
+            # only chroma supports filtering
+            filter_kwargs = {}
+            filter_kwargs_backup = {}
+        else:
+            import logging
+            logging.getLogger("chromadb").setLevel(logging.ERROR)
+            assert document_choice is not None, "Document choice was None"
+            if isinstance(db, Chroma):
+                filter_kwargs_backup = {}  # shouldn't ever need backup
+                # chroma >= 0.4
+                if len(document_choice) == 0 or len(document_choice) >= 1 and document_choice[
+                    0] == DocumentChoice.ALL.value:
+                    filter_kwargs = {"filter": {"chunk_id": {"$gte": 0}}} if query_action else \
+                        {"filter": {"chunk_id": {"$eq": -1}}}
+                else:
+                    if document_choice[0] == DocumentChoice.ALL.value:
+                        document_choice = document_choice[1:]
+                    if len(document_choice) == 0:
+                        filter_kwargs = {}
+                    elif len(document_choice) > 1:
+                        or_filter = [
+                            {"$and": [dict(source={"$eq": x}), dict(chunk_id={"$gte": 0})]} if query_action else {
+                                "$and": [dict(source={"$eq": x}), dict(chunk_id={"$eq": -1})]}
+                            for x in document_choice]
+                        filter_kwargs = dict(filter={"$or": or_filter})
+                    else:
+                        # still chromadb UX bug, have to do different thing for 1 vs. 2+ docs when doing filter
+                        one_filter = \
+                            [{"source": {"$eq": x}, "chunk_id": {"$gte": 0}} if query_action else {
+                                "source": {"$eq": x},
+                                "chunk_id": {
+                                    "$eq": -1}}
+                             for x in document_choice][0]
+
+                        filter_kwargs = dict(filter={"$and": [dict(source=one_filter['source']),
+                                                              dict(chunk_id=one_filter['chunk_id'])]})
+            else:
+                # migration for chroma < 0.4
+                if len(document_choice) == 0 or len(document_choice) >= 1 and document_choice[
+                    0] == DocumentChoice.ALL.value:
+                    filter_kwargs = {"filter": {"chunk_id": {"$gte": 0}}} if query_action else \
+                        {"filter": {"chunk_id": {"$eq": -1}}}
+                    filter_kwargs_backup = {"filter": {"chunk_id": {"$gte": 0}}}
+                elif len(document_choice) >= 2:
+                    if document_choice[0] == DocumentChoice.ALL.value:
+                        document_choice = document_choice[1:]
+                    or_filter = [
+                        {"source": {"$eq": x}, "chunk_id": {"$gte": 0}} if query_action else {"source": {"$eq": x},
+                                                                                              "chunk_id": {
+                                                                                                  "$eq": -1}}
+                        for x in document_choice]
+                    filter_kwargs = dict(filter={"$or": or_filter})
+                    or_filter_backup = [
+                        {"source": {"$eq": x}} if query_action else {"source": {"$eq": x}}
+                        for x in document_choice]
+                    filter_kwargs_backup = dict(filter={"$or": or_filter_backup})
+                elif len(document_choice) == 1:
+                    # degenerate UX bug in chroma
+                    one_filter = \
+                        [{"source": {"$eq": x}, "chunk_id": {"$gte": 0}} if query_action else {"source": {"$eq": x},
+                                                                                               "chunk_id": {
+                                                                                                   "$eq": -1}}
+                         for x in document_choice][0]
+                    filter_kwargs = dict(filter=one_filter)
+                    one_filter_backup = \
+                        [{"source": {"$eq": x}} if query_action else {"source": {"$eq": x}}
+                         for x in document_choice][0]
+                    filter_kwargs_backup = dict(filter=one_filter_backup)
+                else:
+                    # shouldn't reach
+                    filter_kwargs = {}
+                    filter_kwargs_backup = {}
+
+        if llm_mode:
+            docs = []
+            scores = []
+        elif document_subset == DocumentSubset.TopKSources.name or query in [None, '', '\n']:
+            db_documents, db_metadatas = get_docs_and_meta(db, top_k_docs, filter_kwargs=filter_kwargs,
+                                                           text_context_list=text_context_list)
+            if len(db_documents) == 0 and filter_kwargs_backup:
+                db_documents, db_metadatas = get_docs_and_meta(db, top_k_docs, filter_kwargs=filter_kwargs_backup,
+                                                               text_context_list=text_context_list)
+
+            if top_k_docs == -1:
+                top_k_docs = len(db_documents)
+            # similar to langchain's chroma's _results_to_docs_and_scores
+            docs_with_score = [(Document(page_content=result[0], metadata=result[1] or {}), 0)
+                               for result in zip(db_documents, db_metadatas)]
+            # set in metadata original order of docs
+            [x[0].metadata.update(orig_index=ii) for ii, x in enumerate(docs_with_score)]
+
+            # order documents
+            doc_hashes = [x.get('doc_hash', 'None') for x in db_metadatas]
+            if query_action:
+                doc_chunk_ids = [x.get('chunk_id', 0) for x in db_metadatas]
+                docs_with_score2 = [x for hx, cx, x in
+                                    sorted(zip(doc_hashes, doc_chunk_ids, docs_with_score), key=lambda x: (x[0], x[1]))
+                                    if cx >= 0]
+            else:
+                assert summarize_action
+                doc_chunk_ids = [x.get('chunk_id', -1) for x in db_metadatas]
+                docs_with_score2 = [x for hx, cx, x in
+                                    sorted(zip(doc_hashes, doc_chunk_ids, docs_with_score), key=lambda x: (x[0], x[1]))
+                                    if cx == -1
+                                    ]
+                if len(docs_with_score2) == 0 and len(docs_with_score) > 0:
+                    # old database without chunk_id, migration added 0 but didn't make -1 as that would be expensive
+                    # just do again and relax filter, let summarize operate on actual chunks if nothing else
+                    docs_with_score2 = [x for hx, cx, x in
+                                        sorted(zip(doc_hashes, doc_chunk_ids, docs_with_score),
+                                               key=lambda x: (x[0], x[1]))
+                                        ]
+            docs_with_score = docs_with_score2
+
+            docs_with_score = docs_with_score[:top_k_docs]
+            docs = [x[0] for x in docs_with_score]
+            scores = [x[1] for x in docs_with_score]
+            num_docs_before_cut = len(docs)
+        else:
+            with filelock.FileLock(lock_file):
+                docs_with_score, got_db_docs = get_docs_with_score(query, k_db, filter_kwargs, db, db_type,
+                                                                   text_context_list=text_context_list,
+                                                                   verbose=verbose)
+                if len(docs_with_score) == 0 and filter_kwargs_backup:
+                    docs_with_score, got_db_docs = get_docs_with_score(query, k_db, filter_kwargs_backup, db,
+                                                                       db_type,
+                                                                       text_context_list=text_context_list,
+                                                                       verbose=verbose)
+
+            tokenizer = get_tokenizer(db=db, llm=llm, tokenizer=tokenizer, inference_server=inference_server,
+                                      use_openai_model=use_openai_model,
+                                      db_type=db_type)
+            # NOTE: if map_reduce, then no need to auto reduce chunks
+            if query_action and (top_k_docs == -1 or auto_reduce_chunks):
+                top_k_docs_tokenize = 100
+                docs_with_score = docs_with_score[:top_k_docs_tokenize]
+
+                prompt_no_docs = template.format(context='', question=query)
+
+                model_max_length = tokenizer.model_max_length
+                chat = True  # FIXME?
+
+                # first docs_with_score are most important with highest score
+                full_prompt, \
+                    instruction, iinput, context, \
+                    num_prompt_tokens, max_new_tokens, \
+                    num_prompt_tokens0, num_prompt_tokens_actual, \
+                    chat_index, top_k_docs_trial, one_doc_size = \
+                    get_limited_prompt(prompt_no_docs,
+                                       iinput,
+                                       tokenizer,
+                                       prompter=prompter,
+                                       inference_server=inference_server,
+                                       prompt_type=prompt_type,
+                                       prompt_dict=prompt_dict,
+                                       chat=chat,
+                                       max_new_tokens=max_new_tokens,
+                                       system_prompt=system_prompt,
+                                       context=context,
+                                       chat_conversation=chat_conversation,
+                                       text_context_list=[x[0].page_content for x in docs_with_score],
+                                       keep_sources_in_context=keep_sources_in_context,
+                                       model_max_length=model_max_length,
+                                       memory_restriction_level=memory_restriction_level,
+                                       langchain_mode=langchain_mode,
+                                       add_chat_history_to_context=add_chat_history_to_context,
+                                       min_max_new_tokens=min_max_new_tokens,
+                                       )
+                # avoid craziness
+                if 0 < top_k_docs_trial < max_chunks:
+                    # avoid craziness
+                    if top_k_docs == -1:
+                        top_k_docs = top_k_docs_trial
+                    else:
+                        top_k_docs = min(top_k_docs, top_k_docs_trial)
+                elif top_k_docs_trial >= max_chunks:
+                    top_k_docs = max_chunks
+                if top_k_docs > 0:
+                    docs_with_score = docs_with_score[:top_k_docs]
+                elif one_doc_size is not None:
+                    docs_with_score = [docs_with_score[0][:one_doc_size]]
+                else:
+                    docs_with_score = []
+            else:
+                if total_tokens_for_docs is not None:
+                    # used to limit tokens for summarization, e.g. public instance
+                    top_k_docs, one_doc_size, num_doc_tokens = \
+                        get_docs_tokens(tokenizer,
+                                        text_context_list=[x[0].page_content for x in docs_with_score],
+                                        max_input_tokens=total_tokens_for_docs)
+
+                docs_with_score = docs_with_score[:top_k_docs]
+
+            # put most relevant chunks closest to question,
+            # esp. if truncation occurs will be "oldest" or "farthest from response" text that is truncated
+            # BUT: for small models, e.g. 6_9 pythia, if sees some stuff related to h2oGPT first, it can connect that and not listen to rest
+            if docs_ordering_type in ['best_first']:
+                pass
+            elif docs_ordering_type in ['best_near_prompt', 'reverse_sort']:
+                docs_with_score.reverse()
+            elif docs_ordering_type in ['', None, 'reverse_ucurve_sort']:
+                docs_with_score = reverse_ucurve_list(docs_with_score)
+            else:
+                raise ValueError("No such docs_ordering_type=%s" % docs_ordering_type)
+
+            # cut off so no high distance docs/sources considered
+            num_docs_before_cut = len(docs_with_score)
+            docs = [x[0] for x in docs_with_score if x[1] < cut_distance]
+            scores = [x[1] for x in docs_with_score if x[1] < cut_distance]
+            if len(scores) > 0 and verbose:
+                print("Distance: min: %s max: %s mean: %s median: %s" %
+                      (scores[0], scores[-1], np.mean(scores), np.median(scores)), flush=True)
+    else:
+        docs = []
+        scores = []
+
+    if not docs and use_docs_planned and not langchain_only_model:
+        # if HF type and have no docs, can bail out
+        return docs, None, [], False, num_docs_before_cut, use_llm_if_no_docs, llm_mode, top_k_docs_max_show
+
+    if document_subset in non_query_commands:
+        # no LLM use
+        return docs, None, [], False, num_docs_before_cut, use_llm_if_no_docs, llm_mode, top_k_docs_max_show
+
+    # FIXME: WIP
+    common_words_file = "data/NGSL_1.2_stats.csv.zip"
+    if False and os.path.isfile(common_words_file) and langchain_action == LangChainAction.QUERY.value:
+        df = pd.read_csv("data/NGSL_1.2_stats.csv.zip")
+        import string
+        reduced_query = query.translate(str.maketrans(string.punctuation, ' ' * len(string.punctuation))).strip()
+        reduced_query_words = reduced_query.split(' ')
+        set_common = set(df['Lemma'].values.tolist())
+        num_common = len([x.lower() in set_common for x in reduced_query_words])
+        frac_common = num_common / len(reduced_query) if reduced_query else 0
+        # FIXME: report to user bad query that uses too many common words
+        if verbose:
+            print("frac_common: %s" % frac_common, flush=True)
+
+    if len(docs) == 0:
+        # avoid context == in prompt then
+        use_docs_planned = False
+        template = template_if_no_docs
+
+    got_db_docs = got_db_docs and len(text_context_list) < len(docs)
+    # update template in case situation changed or did get docs
+    # then no new documents from database or not used, redo template
+    # got template earlier as estimate of template token size, here is final used version
+    template, template_if_no_docs, auto_reduce_chunks, query = \
+        get_template(query, iinput,
+                     pre_prompt_query, prompt_query,
+                     pre_prompt_summary, prompt_summary,
+                     langchain_action,
+                     llm_mode,
+                     use_docs_planned,
+                     auto_reduce_chunks,
+                     got_db_docs,
+                     add_search_to_context)
+
+    if langchain_action == LangChainAction.QUERY.value:
+        if use_template:
+            # instruct-like, rather than few-shot prompt_type='plain' as default
+            # but then sources confuse the model with how inserted among rest of text, so avoid
+            prompt = PromptTemplate(
+                # input_variables=["summaries", "question"],
+                input_variables=["context", "question"],
+                template=template,
+            )
+            chain = load_qa_chain(llm, prompt=prompt, verbose=verbose)
+        else:
+            # only if use_openai_model = True, unused normally except in testing
+            chain = load_qa_with_sources_chain(llm)
+        if not use_docs_planned:
+            chain_kwargs = dict(input_documents=[], question=query)
+        else:
+            chain_kwargs = dict(input_documents=docs, question=query)
+        target = wrapped_partial(chain, chain_kwargs)
+    elif langchain_action in [LangChainAction.SUMMARIZE_MAP.value,
+                              LangChainAction.SUMMARIZE_REFINE,
+                              LangChainAction.SUMMARIZE_ALL.value]:
+        if async_output:
+            return_intermediate_steps = False
+        else:
+            return_intermediate_steps = True
+        from langchain.chains.summarize import load_summarize_chain
+        if langchain_action == LangChainAction.SUMMARIZE_MAP.value:
+            prompt = PromptTemplate(input_variables=["text"], template=template)
+            chain = load_summarize_chain(llm, chain_type="map_reduce",
+                                         map_prompt=prompt, combine_prompt=prompt,
+                                         return_intermediate_steps=return_intermediate_steps,
+                                         token_max=max_input_tokens, verbose=verbose)
+            if async_output:
+                chain_func = chain.arun
+            else:
+                chain_func = chain
+            target = wrapped_partial(chain_func, {"input_documents": docs})  # , return_only_outputs=True)
+        elif langchain_action == LangChainAction.SUMMARIZE_ALL.value:
+            assert use_template
+            prompt = PromptTemplate(input_variables=["text"], template=template)
+            chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt,
+                                         return_intermediate_steps=return_intermediate_steps, verbose=verbose)
+            if async_output:
+                chain_func = chain.arun
+            else:
+                chain_func = chain
+            target = wrapped_partial(chain_func)
+        elif langchain_action == LangChainAction.SUMMARIZE_REFINE.value:
+            chain = load_summarize_chain(llm, chain_type="refine",
+                                         return_intermediate_steps=return_intermediate_steps, verbose=verbose)
+            if async_output:
+                chain_func = chain.arun
+            else:
+                chain_func = chain
+            target = wrapped_partial(chain_func)
+        else:
+            raise RuntimeError("No such langchain_action=%s" % langchain_action)
+    else:
+        raise RuntimeError("No such langchain_action=%s" % langchain_action)
+
+    return docs, target, scores, use_docs_planned, num_docs_before_cut, use_llm_if_no_docs, llm_mode, top_k_docs_max_show
+
+
+def get_max_model_length(llm=None, tokenizer=None, inference_server=None, model_name=None):
+    if hasattr(tokenizer, 'model_max_length'):
+        return tokenizer.model_max_length
+    elif inference_server in ['openai', 'openai_azure']:
+        return llm.modelname_to_contextsize(model_name)
+    elif inference_server in ['openai_chat', 'openai_azure_chat']:
+        return model_token_mapping[model_name]
+    elif isinstance(tokenizer, FakeTokenizer):
+        # GGML
+        return tokenizer.model_max_length
+    else:
+        return 2048
+
+
+def get_max_input_tokens(llm=None, tokenizer=None, inference_server=None, model_name=None, max_new_tokens=None):
+    model_max_length = get_max_model_length(llm=llm, tokenizer=tokenizer, inference_server=inference_server,
+                                            model_name=model_name)
+
+    if any([inference_server.startswith(x) for x in
+            ['openai', 'openai_azure', 'openai_chat', 'openai_azure_chat', 'vllm']]):
+        # openai can't handle tokens + max_new_tokens > max_tokens even if never generate those tokens
+        # and vllm uses OpenAI API with same limits
+        max_input_tokens = model_max_length - max_new_tokens
+    elif isinstance(tokenizer, FakeTokenizer):
+        # don't trust that fake tokenizer (e.g. GGML) will make lots of tokens normally, allow more input
+        max_input_tokens = model_max_length - min(256, max_new_tokens)
+    else:
+        if 'falcon' in model_name or inference_server.startswith('http'):
+            # allow for more input for falcon, assume won't make as long outputs as default max_new_tokens
+            # Also allow if TGI or Gradio, because we tell it input may be same as output, even if model can't actually handle
+            max_input_tokens = model_max_length - min(256, max_new_tokens)
+        else:
+            # trust that maybe model will make so many tokens, so limit input
+            max_input_tokens = model_max_length - max_new_tokens
+
+    return max_input_tokens
+
+
+def get_tokenizer(db=None, llm=None, tokenizer=None, inference_server=None, use_openai_model=False,
+                  db_type='chroma'):
+    if hasattr(llm, 'pipeline') and hasattr(llm.pipeline, 'tokenizer'):
+        # more accurate
+        return llm.pipeline.tokenizer
+    elif hasattr(llm, 'tokenizer'):
+        # e.g. TGI client mode etc.
+        return llm.tokenizer
+    elif inference_server in ['openai', 'openai_chat', 'openai_azure',
+                              'openai_azure_chat']:
+        return tokenizer
+    elif isinstance(tokenizer, FakeTokenizer):
+        return tokenizer
+    elif use_openai_model:
+        return FakeTokenizer()
+    elif (hasattr(db, '_embedding_function') and
+          hasattr(db._embedding_function, 'client') and
+          hasattr(db._embedding_function.client, 'tokenize')):
+        # in case model is not our pipeline with HF tokenizer
+        return db._embedding_function.client.tokenize
+    else:
+        # backup method
+        if os.getenv('HARD_ASSERTS'):
+            assert db_type in ['faiss', 'weaviate']
+        # use tiktoken for faiss since embedding called differently
+        return FakeTokenizer()
+
+
+def get_template(query, iinput,
+                 pre_prompt_query, prompt_query,
+                 pre_prompt_summary, prompt_summary,
+                 langchain_action,
+                 llm_mode,
+                 use_docs_planned,
+                 auto_reduce_chunks,
+                 got_db_docs,
+                 add_search_to_context):
+    if got_db_docs and add_search_to_context:
+        # modify prompts, assumes patterns like in predefined prompts.  If user customizes, then they'd need to account for that.
+        prompt_query = prompt_query.replace('information in the document sources',
+                                            'information in the document and web search sources (and their source dates and website source)')
+        prompt_summary = prompt_summary.replace('information in the document sources',
+                                                'information in the document and web search sources (and their source dates and website source)')
+    elif got_db_docs and not add_search_to_context:
+        pass
+    elif not got_db_docs and add_search_to_context:
+        # modify prompts, assumes patterns like in predefined prompts.  If user customizes, then they'd need to account for that.
+        prompt_query = prompt_query.replace('information in the document sources',
+                                            'information in the web search sources (and their source dates and website source)')
+        prompt_summary = prompt_summary.replace('information in the document sources',
+                                                'information in the web search sources (and their source dates and website source)')
+
+    if langchain_action == LangChainAction.QUERY.value:
+        if iinput:
+            query = "%s\n%s" % (query, iinput)
+        if llm_mode or not use_docs_planned:
+            template_if_no_docs = template = """{context}{question}"""
+        else:
+            template = """%s
+\"\"\"
+{context}
+\"\"\"
+%s{question}""" % (pre_prompt_query, prompt_query)
+            template_if_no_docs = """{context}{question}"""
+    elif langchain_action in [LangChainAction.SUMMARIZE_ALL.value, LangChainAction.SUMMARIZE_MAP.value]:
+        none = ['', '\n', None]
+
+        # modify prompt_summary if user passes query or iinput
+        if query not in none and iinput not in none:
+            prompt_summary = "Focusing on %s, %s, %s" % (query, iinput, prompt_summary)
+        elif query not in none:
+            prompt_summary = "Focusing on %s, %s" % (query, prompt_summary)
+        # don't auto reduce
+        auto_reduce_chunks = False
+        if langchain_action == LangChainAction.SUMMARIZE_MAP.value:
+            fstring = '{text}'
+        else:
+            fstring = '{input_documents}'
+        template = """%s:
+\"\"\"
+%s
+\"\"\"\n%s""" % (pre_prompt_summary, fstring, prompt_summary)
+        template_if_no_docs = "Exactly only say: There are no documents to summarize."
+    elif langchain_action in [LangChainAction.SUMMARIZE_REFINE]:
+        template = ''  # unused
+        template_if_no_docs = ''  # unused
+    else:
+        raise RuntimeError("No such langchain_action=%s" % langchain_action)
+
+    return template, template_if_no_docs, auto_reduce_chunks, query
+
+
+def get_sources_answer(query, docs, answer, scores, show_rank,
+                       answer_with_sources, append_sources_to_answer,
+                       show_accordions=True,
+                       show_link_in_sources=True,
+                       top_k_docs_max_show=10,
+                       docs_ordering_type='reverse_ucurve_sort',
+                       num_docs_before_cut=0,
+                       verbose=False,
+                       t_run=None,
+                       count_input_tokens=None, count_output_tokens=None):
+    if verbose:
+        print("query: %s" % query, flush=True)
+        print("answer: %s" % answer, flush=True)
+
+    if len(docs) == 0:
+        extra = ''
+        ret = answer + extra
+        return ret, extra
+
+    if answer_with_sources == -1:
+        extra = [dict(score=score, content=get_doc(x), source=get_source(x), orig_index=x.metadata.get('orig_index', 0))
+                 for score, x in zip(scores, docs)][
+                :top_k_docs_max_show]
+        if append_sources_to_answer:
+            extra_str = [str(x) for x in extra]
+            ret = answer + '\n\n' + '\n'.join(extra_str)
+        else:
+            ret = answer
+        return ret, extra
+
+    # link
+    answer_sources = [(max(0.0, 1.5 - score) / 1.5,
+                       get_url(doc, font_size=font_size),
+                       get_accordion(doc, font_size=font_size, head_acc=head_acc)) for score, doc in
+                      zip(scores, docs)]
+    if not show_accordions:
+        answer_sources_dict = defaultdict(list)
+        [answer_sources_dict[url].append(score) for score, url in answer_sources]
+        answers_dict = {}
+        for url, scores_url in answer_sources_dict.items():
+            answers_dict[url] = np.max(scores_url)
+        answer_sources = [(score, url) for url, score in answers_dict.items()]
+    answer_sources.sort(key=lambda x: x[0], reverse=True)
+    if show_rank:
+        # answer_sources = ['%d | %s' % (1 + rank, url) for rank, (score, url) in enumerate(answer_sources)]
+        # sorted_sources_urls = "Sources [Rank | Link]:<br>" + "<br>".join(answer_sources)
+        answer_sources = ['%s' % url for rank, (score, url) in enumerate(answer_sources)]
+        answer_sources = answer_sources[:top_k_docs_max_show]
+        sorted_sources_urls = "Ranked Sources:<br>" + "<br>".join(answer_sources)
+    else:
+        if show_accordions:
+            if show_link_in_sources:
+                answer_sources = ['<font size="%s"><li>%.2g | %s</li>%s</font>' % (font_size, score, url, accordion)
+                                  for score, url, accordion in answer_sources]
+            else:
+                answer_sources = ['<font size="%s"><li>%.2g</li>%s</font>' % (font_size, score, accordion)
+                                  for score, url, accordion in answer_sources]
+        else:
+            if show_link_in_sources:
+                answer_sources = ['<font size="%s"><li>%.2g | %s</li></font>' % (font_size, score, url)
+                                  for score, url in answer_sources]
+            else:
+                answer_sources = ['<font size="%s"><li>%.2g</li></font>' % (font_size, score)
+                                  for score, url in answer_sources]
+        answer_sources = answer_sources[:top_k_docs_max_show]
+        if show_accordions:
+            sorted_sources_urls = f"<font size=\"{font_size}\">{source_prefix}<ul></font>" + "".join(answer_sources)
+        else:
+            sorted_sources_urls = f"<font size=\"{font_size}\">{source_prefix}<p><ul></font>" + "<p>".join(
+                answer_sources)
+        if verbose:
+            if int(t_run):
+                sorted_sources_urls += 'Total Time: %d [s]<p>' % t_run
+            if count_input_tokens and count_output_tokens:
+                sorted_sources_urls += 'Input Tokens: %s | Output Tokens: %d<p>' % (
+                    count_input_tokens, count_output_tokens)
+        sorted_sources_urls += f"<font size=\"{font_size}\"></ul></p>{source_postfix}</font>"
+        title_overall = "Sources"
+        sorted_sources_urls = f"""<details><summary><font size="{font_size}">{title_overall}</font></summary><font size="{font_size}">{sorted_sources_urls}</font></details>"""
+        if os.getenv("HARD_ASSERTS"):
+            assert sorted_sources_urls.startswith(super_source_prefix)
+            assert sorted_sources_urls.endswith(super_source_postfix)
+
+    if not answer.endswith('\n'):
+        answer += '\n'
+
+    if answer_with_sources:
+        extra = '\n' + sorted_sources_urls
+    else:
+        extra = ''
+    if append_sources_to_answer:
+        ret = answer + extra
+    else:
+        ret = answer
+    return ret, extra
+
+
+def set_userid(db1s, requests_state1, get_userid_auth):
+    db1 = db1s[LangChainMode.MY_DATA.value]
+    assert db1 is not None and len(db1) == length_db1()
+    if not db1[1]:
+        db1[1] = get_userid_auth(requests_state1)
+    if not db1[2]:
+        username1 = None
+        if 'username' in requests_state1:
+            username1 = requests_state1['username']
+        db1[2] = username1
+
+
+def set_userid_direct(db1s, userid, username):
+    db1 = db1s[LangChainMode.MY_DATA.value]
+    db1[1] = userid
+    db1[2] = username
+
+
+def get_userid_direct(db1s):
+    return db1s[LangChainMode.MY_DATA.value][1] if db1s is not None else ''
+
+
+def get_username_direct(db1s):
+    return db1s[LangChainMode.MY_DATA.value][2] if db1s is not None else ''
+
+
+def get_dbid(db1):
+    return db1[1]
+
+
+def set_dbid(db1):
+    # can only call this after function called so for specific user, not in gr.State() that occurs during app init
+    assert db1 is not None and len(db1) == length_db1()
+    if db1[1] is None:
+        #  uuid in db is used as user ID
+        db1[1] = str(uuid.uuid4())
+
+
+def length_db1():
+    # For MyData:
+    # 0: db
+    # 1: userid and dbid
+    # 2: username
+
+    # For others:
+    # 0: db
+    # 1: dbid
+    # 2: None
+    return 3
+
+
+def get_any_db(db1s, langchain_mode, langchain_mode_paths, langchain_mode_types,
+               dbs=None,
+               load_db_if_exists=None, db_type=None,
+               use_openai_embedding=None,
+               hf_embedding_model=None, migrate_embedding_model=None, auto_migrate_db=None,
+               for_sources_list=False,
+               verbose=False,
+               n_jobs=-1,
+               ):
+    if langchain_mode in [LangChainMode.DISABLED.value, LangChainMode.LLM.value]:
+        return None
+    elif for_sources_list and langchain_mode in [LangChainMode.WIKI_FULL.value]:
+        # NOTE: avoid showing full wiki.  Takes about 30 seconds over about 90k entries, but not useful for now
+        return None
+    elif langchain_mode in db1s and len(db1s[langchain_mode]) > 1 and db1s[langchain_mode][0]:
+        return db1s[langchain_mode][0]
+    elif dbs is not None and langchain_mode in dbs and dbs[langchain_mode] is not None:
+        return dbs[langchain_mode]
+    else:
+        db = None
+
+    if db is None:
+        langchain_type = langchain_mode_types.get(langchain_mode, LangChainTypes.EITHER.value)
+        persist_directory, langchain_type = get_persist_directory(langchain_mode, db1s=db1s, dbs=dbs,
+                                                                  langchain_type=langchain_type)
+        langchain_mode_types[langchain_mode] = langchain_type
+        # see if actually have on disk, don't try to switch embedding yet, since can't use return here
+        migrate_embedding_model = False
+        db, _, _ = \
+            get_existing_db(db, persist_directory, load_db_if_exists, db_type,
+                            use_openai_embedding,
+                            langchain_mode, langchain_mode_paths, langchain_mode_types,
+                            hf_embedding_model, migrate_embedding_model, auto_migrate_db,
+                            verbose=verbose, n_jobs=n_jobs)
+        if db is not None:
+            # if found db, then stuff into state, so don't have to reload again that takes time
+            if langchain_type == LangChainTypes.PERSONAL.value:
+                assert isinstance(db1s, dict), "db1s wrong type: %s" % type(db1s)
+                db1 = db1s[langchain_mode] = [db, None, None]
+                assert len(db1) == length_db1(), "Bad setup: %s" % len(db1)
+                set_dbid(db1)
+            else:
+                assert isinstance(dbs, dict), "dbs wrong type: %s" % type(dbs)
+                dbs[langchain_mode] = db
+
+    return db
+
+
+def get_sources(db1s, selection_docs_state1, requests_state1, langchain_mode,
+                dbs=None, docs_state0=None,
+                load_db_if_exists=None,
+                db_type=None,
+                use_openai_embedding=None,
+                hf_embedding_model=None,
+                migrate_embedding_model=None,
+                auto_migrate_db=None,
+                verbose=False,
+                get_userid_auth=None,
+                n_jobs=-1,
+                ):
+    for k in db1s:
+        set_dbid(db1s[k])
+    langchain_mode_paths = selection_docs_state1['langchain_mode_paths']
+    langchain_mode_types = selection_docs_state1['langchain_mode_types']
+    set_userid(db1s, requests_state1, get_userid_auth)
+    db = get_any_db(db1s, langchain_mode, langchain_mode_paths, langchain_mode_types,
+                    dbs=dbs,
+                    load_db_if_exists=load_db_if_exists,
+                    db_type=db_type,
+                    use_openai_embedding=use_openai_embedding,
+                    hf_embedding_model=hf_embedding_model,
+                    migrate_embedding_model=migrate_embedding_model,
+                    auto_migrate_db=auto_migrate_db,
+                    for_sources_list=True,
+                    verbose=verbose,
+                    n_jobs=n_jobs,
+                    )
+
+    if langchain_mode in ['LLM'] or db is None:
+        source_files_added = "NA"
+        source_list = []
+        num_chunks = 0
+    elif langchain_mode in ['wiki_full']:
+        source_files_added = "Not showing wiki_full, takes about 20 seconds and makes 4MB file." \
+                             "  Ask jon.mckinney@h2o.ai for file if required."
+        source_list = []
+        num_chunks = 0
+    elif db is not None:
+        metadatas = get_metadatas(db)
+        source_list = sorted(set([x['source'] for x in metadatas]))
+        source_files_added = '\n'.join(source_list)
+        num_chunks = len(metadatas)
+    else:
+        source_list = []
+        source_files_added = "None"
+        num_chunks = 0
+    sources_dir = "sources_dir"
+    sources_dir = makedirs(sources_dir, exist_ok=True, tmp_ok=True, use_base=True)
+    sources_file = os.path.join(sources_dir, 'sources_%s_%s' % (langchain_mode, str(uuid.uuid4())))
+    with open(sources_file, "wt") as f:
+        f.write(source_files_added)
+    source_list = docs_state0 + source_list
+    if DocumentChoice.ALL.value in source_list:
+        source_list.remove(DocumentChoice.ALL.value)
+    return sources_file, source_list, num_chunks, db
+
+
+def update_user_db(file, db1s, selection_docs_state1, requests_state1,
+                   langchain_mode=None,
+                   get_userid_auth=None,
+                   **kwargs):
+    kwargs.update(selection_docs_state1)
+    set_userid(db1s, requests_state1, get_userid_auth)
+
+    if file is None:
+        raise RuntimeError("Don't use change, use input")
+
+    try:
+        return _update_user_db(file, db1s=db1s,
+                               langchain_mode=langchain_mode,
+                               **kwargs)
+    except BaseException as e:
+        print(traceback.format_exc(), flush=True)
+        # gradio has issues if except, so fail semi-gracefully, else would hang forever in processing textbox
+        ex_str = "Exception: %s" % str(e)
+        source_files_added = """\
+        <html>
+          <body>
+            <p>
+               Sources: <br>
+            </p>
+               <div style="overflow-y: auto;height:400px">
+               {0}
+               </div>
+          </body>
+        </html>
+        """.format(ex_str)
+        doc_exception_text = str(e)
+        return None, langchain_mode, source_files_added, doc_exception_text, None
+    finally:
+        clear_torch_cache()
+
+
+def get_lock_file(db1, langchain_mode):
+    db_id = get_dbid(db1)
+    base_path = 'locks'
+    base_path = makedirs(base_path, exist_ok=True, tmp_ok=True, use_base=True)
+    # don't allow db_id to be '' or None, would be bug and lock up everything
+    if not db_id:
+        if os.getenv('HARD_ASSERTS'):
+            raise ValueError("Invalid access for langchain_mode=%s" % langchain_mode)
+        db_id = str(uuid.uuid4())
+    lock_file = os.path.join(base_path, "db_%s_%s.lock" % (langchain_mode.replace(' ', '_').replace('/', '_'), db_id))
+    makedirs(os.path.dirname(lock_file))  # ensure really made
+    return lock_file
+
+
+def _update_user_db(file,
+                    db1s=None,
+                    langchain_mode='UserData',
+                    chunk=None, chunk_size=None,
+
+                    # urls
+                    use_unstructured=True,
+                    use_playwright=False,
+                    use_selenium=False,
+
+                    # pdfs
+                    use_pymupdf='auto',
+                    use_unstructured_pdf='auto',
+                    use_pypdf='auto',
+                    enable_pdf_ocr='auto',
+                    enable_pdf_doctr='auto',
+                    try_pdf_as_html='auto',
+
+                    # images
+                    enable_ocr=False,
+                    enable_doctr=False,
+                    enable_pix2struct=False,
+                    enable_captions=True,
+                    captions_model=None,
+                    caption_loader=None,
+                    doctr_loader=None,
+                    pix2struct_loader=None,
+
+                    # json
+                    jq_schema='.[]',
+
+                    dbs=None, db_type=None,
+                    langchain_modes=None,
+                    langchain_mode_paths=None,
+                    langchain_mode_types=None,
+                    use_openai_embedding=None,
+                    hf_embedding_model=None,
+                    migrate_embedding_model=None,
+                    auto_migrate_db=None,
+                    verbose=None,
+                    n_jobs=-1,
+                    is_url=None, is_txt=None,
+                    ):
+    assert db1s is not None
+    assert chunk is not None
+    assert chunk_size is not None
+    assert use_openai_embedding is not None
+    assert hf_embedding_model is not None
+    assert migrate_embedding_model is not None
+    assert auto_migrate_db is not None
+    assert caption_loader is not None
+    assert doctr_loader is not None
+    assert enable_captions is not None
+    assert captions_model is not None
+    assert enable_ocr is not None
+    assert enable_doctr is not None
+    assert enable_pdf_ocr is not None
+    assert enable_pdf_doctr is not None
+    assert enable_pix2struct is not None
+    assert verbose is not None
+
+    if dbs is None:
+        dbs = {}
+    assert isinstance(dbs, dict), "Wrong type for dbs: %s" % str(type(dbs))
+    # handle case of list of temp buffer
+    if isinstance(file, str) and file.strip().startswith('['):
+        try:
+            file = ast.literal_eval(file.strip())
+        except Exception as e:
+            print("Tried to parse %s as list but failed: %s" % (file, str(e)), flush=True)
+    if isinstance(file, list) and len(file) > 0 and hasattr(file[0], 'name'):
+        file = [x.name for x in file]
+    # handle single file of temp buffer
+    if hasattr(file, 'name'):
+        file = file.name
+    if not isinstance(file, (list, tuple, typing.Generator)) and isinstance(file, str):
+        file = [file]
+
+    if langchain_mode == LangChainMode.DISABLED.value:
+        return None, langchain_mode, get_source_files(), "", None
+
+    if langchain_mode in [LangChainMode.LLM.value]:
+        # then switch to MyData, so langchain_mode also becomes way to select where upload goes
+        # but default to mydata if nothing chosen, since safest
+        if LangChainMode.MY_DATA.value in langchain_modes:
+            langchain_mode = LangChainMode.MY_DATA.value
+        elif len(langchain_modes) >= 1:
+            langchain_mode = langchain_modes[0]
+        else:
+            return None, langchain_mode, get_source_files(), "", None
+
+    if langchain_mode_paths is None:
+        langchain_mode_paths = {}
+    user_path = langchain_mode_paths.get(langchain_mode)
+    # UserData or custom, which has to be from user's disk
+    if user_path is not None:
+        # move temp files from gradio upload to stable location
+        for fili, fil in enumerate(file):
+            if isinstance(fil, str) and os.path.isfile(fil):  # not url, text
+                new_fil = os.path.normpath(os.path.join(user_path, os.path.basename(fil)))
+                if os.path.normpath(os.path.abspath(fil)) != os.path.normpath(os.path.abspath(new_fil)):
+                    if os.path.isfile(new_fil):
+                        remove(new_fil)
+                    try:
+                        if os.path.dirname(new_fil):
+                            makedirs(os.path.dirname(new_fil))
+                        shutil.move(fil, new_fil)
+                    except FileExistsError:
+                        pass
+                    file[fili] = new_fil
+
+    if verbose:
+        print("Adding %s" % file, flush=True)
+
+    # FIXME: could avoid even parsing, let alone embedding, same old files if upload same file again
+    # FIXME: but assume nominally user isn't uploading all files over again from UI
+
+    if is_txt and hf_embedding_model == 'fake':
+        # avoid parallel if fake embedding since assume trivial ingestion
+        n_jobs = 1
+
+    sources = path_to_docs(file if not is_url and not is_txt else None,
+                           verbose=verbose,
+                           fail_any_exception=False,
+                           n_jobs=n_jobs,
+                           chunk=chunk, chunk_size=chunk_size,
+                           url=file if is_url else None,
+                           text=file if is_txt else None,
+
+                           # urls
+                           use_unstructured=use_unstructured,
+                           use_playwright=use_playwright,
+                           use_selenium=use_selenium,
+
+                           # pdfs
+                           use_pymupdf=use_pymupdf,
+                           use_unstructured_pdf=use_unstructured_pdf,
+                           use_pypdf=use_pypdf,
+                           enable_pdf_ocr=enable_pdf_ocr,
+                           enable_pdf_doctr=enable_pdf_doctr,
+                           try_pdf_as_html=try_pdf_as_html,
+
+                           # images
+                           enable_ocr=enable_ocr,
+                           enable_doctr=enable_doctr,
+                           enable_pix2struct=enable_pix2struct,
+                           enable_captions=enable_captions,
+                           captions_model=captions_model,
+                           caption_loader=caption_loader,
+                           doctr_loader=doctr_loader,
+                           pix2struct_loader=pix2struct_loader,
+
+                           # json
+                           jq_schema=jq_schema,
+
+                           db_type=db_type,
+                           )
+    exceptions = [x for x in sources if x.metadata.get('exception')]
+    exceptions_strs = [x.metadata['exception'] for x in exceptions]
+    sources = [x for x in sources if 'exception' not in x.metadata]
+
+    # below must at least come after langchain_mode is modified in case was LLM -> MyData,
+    # so original langchain mode changed
+    for k in db1s:
+        set_dbid(db1s[k])
+    db1 = get_db1(db1s, langchain_mode)
+
+    lock_file = get_lock_file(db1s[LangChainMode.MY_DATA.value], langchain_mode)  # user-level lock, not db-level lock
+    with filelock.FileLock(lock_file):
+        if langchain_mode in db1s:
+            if db1[0] is not None:
+                # then add
+                db, num_new_sources, new_sources_metadata = add_to_db(db1[0], sources, db_type=db_type,
+                                                                      use_openai_embedding=use_openai_embedding,
+                                                                      hf_embedding_model=hf_embedding_model)
+            else:
+                # in testing expect:
+                # assert len(db1) == length_db1() and db1[1] is None, "Bad MyData db: %s" % db1
+                # for production hit, when user gets clicky:
+                assert len(db1) == length_db1(), "Bad %s db: %s" % (langchain_mode, db1)
+                assert get_dbid(db1) is not None, "db hash was None, not allowed"
+                # then create
+                # if added has to original state and didn't change, then would be shared db for all users
+                langchain_type = langchain_mode_types.get(langchain_mode, LangChainTypes.EITHER.value)
+                persist_directory, langchain_type = get_persist_directory(langchain_mode, db1s=db1s, dbs=dbs,
+                                                                          langchain_type=langchain_type)
+                langchain_mode_types[langchain_mode] = langchain_type
+                db = get_db(sources, use_openai_embedding=use_openai_embedding,
+                            db_type=db_type,
+                            persist_directory=persist_directory,
+                            langchain_mode=langchain_mode,
+                            langchain_mode_paths=langchain_mode_paths,
+                            langchain_mode_types=langchain_mode_types,
+                            hf_embedding_model=hf_embedding_model,
+                            migrate_embedding_model=migrate_embedding_model,
+                            auto_migrate_db=auto_migrate_db,
+                            n_jobs=n_jobs)
+            if db is not None:
+                db1[0] = db
+            source_files_added = get_source_files(db=db1[0], exceptions=exceptions)
+            if len(sources) > 0:
+                sources_last = os.path.basename(sources[-1].metadata.get('source', 'Unknown Source'))
+            else:
+                sources_last = None
+            return None, langchain_mode, source_files_added, '\n'.join(exceptions_strs), sources_last
+        else:
+            langchain_type = langchain_mode_types.get(langchain_mode, LangChainTypes.EITHER.value)
+            persist_directory, langchain_type = get_persist_directory(langchain_mode, db1s=db1s, dbs=dbs,
+                                                                      langchain_type=langchain_type)
+            langchain_mode_types[langchain_mode] = langchain_type
+            if langchain_mode in dbs and dbs[langchain_mode] is not None:
+                # then add
+                db, num_new_sources, new_sources_metadata = add_to_db(dbs[langchain_mode], sources, db_type=db_type,
+                                                                      use_openai_embedding=use_openai_embedding,
+                                                                      hf_embedding_model=hf_embedding_model)
+            else:
+                # then create.  Or might just be that dbs is unfilled, then it will fill, then add
+                db = get_db(sources, use_openai_embedding=use_openai_embedding,
+                            db_type=db_type,
+                            persist_directory=persist_directory,
+                            langchain_mode=langchain_mode,
+                            langchain_mode_paths=langchain_mode_paths,
+                            langchain_mode_types=langchain_mode_types,
+                            hf_embedding_model=hf_embedding_model,
+                            migrate_embedding_model=migrate_embedding_model,
+                            auto_migrate_db=auto_migrate_db,
+                            n_jobs=n_jobs)
+            dbs[langchain_mode] = db
+            # NOTE we do not return db, because function call always same code path
+            # return dbs[langchain_mode]
+            # db in this code path is updated in place
+            source_files_added = get_source_files(db=dbs[langchain_mode], exceptions=exceptions)
+            if len(sources) > 0:
+                sources_last = os.path.basename(sources[-1].metadata.get('source', 'Unknown Source'))
+            else:
+                sources_last = None
+            return None, langchain_mode, source_files_added, '\n'.join(exceptions_strs), sources_last
+
+
+def get_source_files_given_langchain_mode(db1s, selection_docs_state1, requests_state1, document_choice1,
+                                          langchain_mode,
+                                          dbs=None,
+                                          load_db_if_exists=None,
+                                          db_type=None,
+                                          use_openai_embedding=None,
+                                          hf_embedding_model=None,
+                                          migrate_embedding_model=None,
+                                          auto_migrate_db=None,
+                                          verbose=False,
+                                          get_userid_auth=None,
+                                          delete_sources=False,
+                                          n_jobs=-1):
+    langchain_mode_paths = selection_docs_state1['langchain_mode_paths']
+    langchain_mode_types = selection_docs_state1['langchain_mode_types']
+    set_userid(db1s, requests_state1, get_userid_auth)
+    db = get_any_db(db1s, langchain_mode, langchain_mode_paths, langchain_mode_types,
+                    dbs=dbs,
+                    load_db_if_exists=load_db_if_exists,
+                    db_type=db_type,
+                    use_openai_embedding=use_openai_embedding,
+                    hf_embedding_model=hf_embedding_model,
+                    migrate_embedding_model=migrate_embedding_model,
+                    auto_migrate_db=auto_migrate_db,
+                    for_sources_list=True,
+                    verbose=verbose,
+                    n_jobs=n_jobs,
+                    )
+    if delete_sources:
+        del_from_db(db, document_choice1, db_type=db_type)
+
+    if langchain_mode in ['LLM'] or db is None:
+        return "Sources: N/A"
+    return get_source_files(db=db, exceptions=None)
+
+
+def get_source_files(db=None, exceptions=None, metadatas=None):
+    if exceptions is None:
+        exceptions = []
+
+    # only should be one source, not confused
+    # assert db is not None or metadatas is not None
+    # clicky user
+    if db is None and metadatas is None:
+        return "No Sources at all"
+
+    if metadatas is None:
+        source_label = "Sources:"
+        if db is not None:
+            metadatas = get_metadatas(db)
+        else:
+            metadatas = []
+        adding_new = False
+    else:
+        source_label = "New Sources:"
+        adding_new = True
+
+    # below automatically de-dups
+    small_dict = {get_url(x['source'], from_str=True, short_name=True): get_short_name(x.get('head')) for x in
+                  metadatas if x.get('page', 0) == 0}
+    # if small_dict is empty dict, that's ok
+    df = pd.DataFrame(small_dict.items(), columns=['source', 'head'])
+    df.index = df.index + 1
+    df.index.name = 'index'
+    source_files_added = tabulate.tabulate(df, headers='keys', tablefmt='unsafehtml')
+
+    if exceptions:
+        exception_metadatas = [x.metadata for x in exceptions]
+        small_dict = {get_url(x['source'], from_str=True, short_name=True): get_short_name(x.get('exception')) for x in
+                      exception_metadatas}
+        # if small_dict is empty dict, that's ok
+        df = pd.DataFrame(small_dict.items(), columns=['source', 'exception'])
+        df.index = df.index + 1
+        df.index.name = 'index'
+        exceptions_html = tabulate.tabulate(df, headers='keys', tablefmt='unsafehtml')
+    else:
+        exceptions_html = ''
+
+    if metadatas and exceptions:
+        source_files_added = """\
+        <html>
+          <body>
+            <p>
+               {0} <br>
+            </p>
+               <div style="overflow-y: auto;height:400px">
+               {1}
+               {2}
+               </div>
+          </body>
+        </html>
+        """.format(source_label, source_files_added, exceptions_html)
+    elif metadatas:
+        source_files_added = """\
+        <html>
+          <body>
+            <p>
+               {0} <br>
+            </p>
+               <div style="overflow-y: auto;height:400px">
+               {1}
+               </div>
+          </body>
+        </html>
+        """.format(source_label, source_files_added)
+    elif exceptions_html:
+        source_files_added = """\
+        <html>
+          <body>
+            <p>
+               Exceptions: <br>
+            </p>
+               <div style="overflow-y: auto;height:400px">
+               {0}
+               </div>
+          </body>
+        </html>
+        """.format(exceptions_html)
+    else:
+        if adding_new:
+            source_files_added = "No New Sources"
+        else:
+            source_files_added = "No Sources"
+
+    return source_files_added
+
+
+def update_and_get_source_files_given_langchain_mode(db1s,
+                                                     selection_docs_state,
+                                                     requests_state,
+                                                     langchain_mode, chunk, chunk_size,
+
+                                                     # urls
+                                                     use_unstructured=True,
+                                                     use_playwright=False,
+                                                     use_selenium=False,
+
+                                                     # pdfs
+                                                     use_pymupdf='auto',
+                                                     use_unstructured_pdf='auto',
+                                                     use_pypdf='auto',
+                                                     enable_pdf_ocr='auto',
+                                                     enable_pdf_doctr='auto',
+                                                     try_pdf_as_html='auto',
+
+                                                     # images
+                                                     enable_ocr=False,
+                                                     enable_doctr=False,
+                                                     enable_pix2struct=False,
+                                                     enable_captions=True,
+                                                     captions_model=None,
+                                                     caption_loader=None,
+                                                     doctr_loader=None,
+                                                     pix2struct_loader=None,
+
+                                                     # json
+                                                     jq_schema='.[]',
+
+                                                     dbs=None, first_para=None,
+                                                     hf_embedding_model=None,
+                                                     use_openai_embedding=None,
+                                                     migrate_embedding_model=None,
+                                                     auto_migrate_db=None,
+                                                     text_limit=None,
+                                                     db_type=None, load_db_if_exists=None,
+                                                     n_jobs=None, verbose=None, get_userid_auth=None):
+    set_userid(db1s, requests_state, get_userid_auth)
+    assert hf_embedding_model is not None
+    assert migrate_embedding_model is not None
+    assert auto_migrate_db is not None
+    langchain_mode_paths = selection_docs_state['langchain_mode_paths']
+    langchain_mode_types = selection_docs_state['langchain_mode_types']
+    has_path = {k: v for k, v in langchain_mode_paths.items() if v}
+    if langchain_mode in [LangChainMode.LLM.value, LangChainMode.MY_DATA.value]:
+        # then assume user really meant UserData, to avoid extra clicks in UI,
+        # since others can't be on disk, except custom user modes, which they should then select to query it
+        if LangChainMode.USER_DATA.value in has_path:
+            langchain_mode = LangChainMode.USER_DATA.value
+
+    db = get_any_db(db1s, langchain_mode, langchain_mode_paths, langchain_mode_types,
+                    dbs=dbs,
+                    load_db_if_exists=load_db_if_exists,
+                    db_type=db_type,
+                    use_openai_embedding=use_openai_embedding,
+                    hf_embedding_model=hf_embedding_model,
+                    migrate_embedding_model=migrate_embedding_model,
+                    auto_migrate_db=auto_migrate_db,
+                    for_sources_list=True,
+                    verbose=verbose,
+                    n_jobs=n_jobs,
+                    )
+
+    # not designed for older way of using openai embeddings, why use_openai_embedding=False
+    # use_openai_embedding, hf_embedding_model passed in and possible different values used,
+    # but no longer used here or in calling functions so ok
+    db, num_new_sources, new_sources_metadata = make_db(use_openai_embedding=False,
+                                                        hf_embedding_model=hf_embedding_model,
+                                                        migrate_embedding_model=migrate_embedding_model,
+                                                        auto_migrate_db=auto_migrate_db,
+                                                        first_para=first_para, text_limit=text_limit,
+                                                        chunk=chunk,
+                                                        chunk_size=chunk_size,
+
+                                                        # urls
+                                                        use_unstructured=use_unstructured,
+                                                        use_playwright=use_playwright,
+                                                        use_selenium=use_selenium,
+
+                                                        # pdfs
+                                                        use_pymupdf=use_pymupdf,
+                                                        use_unstructured_pdf=use_unstructured_pdf,
+                                                        use_pypdf=use_pypdf,
+                                                        enable_pdf_ocr=enable_pdf_ocr,
+                                                        enable_pdf_doctr=enable_pdf_doctr,
+                                                        try_pdf_as_html=try_pdf_as_html,
+
+                                                        # images
+                                                        enable_ocr=enable_ocr,
+                                                        enable_doctr=enable_doctr,
+                                                        enable_pix2struct=enable_pix2struct,
+                                                        enable_captions=enable_captions,
+                                                        captions_model=captions_model,
+                                                        caption_loader=caption_loader,
+                                                        doctr_loader=doctr_loader,
+                                                        pix2struct_loader=pix2struct_loader,
+
+                                                        # json
+                                                        jq_schema=jq_schema,
+
+                                                        langchain_mode=langchain_mode,
+                                                        langchain_mode_paths=langchain_mode_paths,
+                                                        langchain_mode_types=langchain_mode_types,
+                                                        db_type=db_type,
+                                                        load_db_if_exists=load_db_if_exists,
+                                                        db=db,
+                                                        n_jobs=n_jobs,
+                                                        verbose=verbose)
+    # during refreshing, might have "created" new db since not in dbs[] yet, so insert back just in case
+    # so even if persisted, not kept up-to-date with dbs memory
+    if langchain_mode in db1s:
+        db1s[langchain_mode][0] = db
+    else:
+        dbs[langchain_mode] = db
+
+    # return only new sources with text saying such
+    return get_source_files(db=None, exceptions=None, metadatas=new_sources_metadata)
+
+
+def get_db1(db1s, langchain_mode1):
+    if langchain_mode1 in db1s:
+        db1 = db1s[langchain_mode1]
+    else:
+        # indicates to code that not personal database
+        db1 = [None] * length_db1()
+    return db1
+
+
+def clean_doc(docs1):
+    if not isinstance(docs1, (list, tuple, types.GeneratorType)):
+        docs1 = [docs1]
+    for doci, doc in enumerate(docs1):
+        docs1[doci].page_content = '\n'.join([x.strip() for x in doc.page_content.split("\n") if x.strip()])
+    return docs1
+
+
+def clone_documents(documents: Iterable[Document]) -> List[Document]:
+    # first clone documents
+    new_docs = []
+    for doc in documents:
+        new_doc = Document(page_content=doc.page_content, metadata=copy.deepcopy(doc.metadata))
+        new_docs.append(new_doc)
+    return new_docs
+
+
+def get_db_from_hf(dest=".", db_dir='db_dir_DriverlessAI_docs.zip'):
+    from huggingface_hub import hf_hub_download
+    # True for case when locally already logged in with correct token, so don't have to set key
+    token = os.getenv('HUGGING_FACE_HUB_TOKEN', True)
+    path_to_zip_file = hf_hub_download('h2oai/db_dirs', db_dir, token=token, repo_type='dataset')
+    import zipfile
+    with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
+        persist_directory = os.path.dirname(zip_ref.namelist()[0])
+        remove(persist_directory)
+        zip_ref.extractall(dest)
+    return path_to_zip_file
+
+
+# Note dir has space in some cases, while zip does not
+some_db_zips = [['db_dir_DriverlessAI_docs.zip', 'db_dir_DriverlessAI docs', 'CC-BY-NC license'],
+                ['db_dir_UserData.zip', 'db_dir_UserData', 'CC-BY license for ArXiv'],
+                ['db_dir_github_h2oGPT.zip', 'db_dir_github h2oGPT', 'ApacheV2 license'],
+                ['db_dir_wiki.zip', 'db_dir_wiki', 'CC-BY-SA Wikipedia license'],
+                # ['db_dir_wiki_full.zip', 'db_dir_wiki_full.zip', '23GB, 05/04/2023 CC-BY-SA Wiki license'],
+                ]
+
+all_db_zips = some_db_zips + \
+              [['db_dir_wiki_full.zip', 'db_dir_wiki_full.zip', '23GB, 05/04/2023 CC-BY-SA Wiki license'],
+               ]
+
+
+def get_some_dbs_from_hf(dest='.', db_zips=None):
+    if db_zips is None:
+        db_zips = some_db_zips
+    for db_dir, dir_expected, license1 in db_zips:
+        path_to_zip_file = get_db_from_hf(dest=dest, db_dir=db_dir)
+        assert os.path.isfile(path_to_zip_file), "Missing zip in %s" % path_to_zip_file
+        if dir_expected:
+            assert os.path.isdir(os.path.join(dest, dir_expected)), "Missing path for %s" % dir_expected
+            assert os.path.isdir(
+                os.path.join(dest, dir_expected, 'index')), "Missing index in %s" % dir_expected
+
+
+def _create_local_weaviate_client():
+    WEAVIATE_URL = os.getenv('WEAVIATE_URL', "http://localhost:8080")
+    WEAVIATE_USERNAME = os.getenv('WEAVIATE_USERNAME')
+    WEAVIATE_PASSWORD = os.getenv('WEAVIATE_PASSWORD')
+    WEAVIATE_SCOPE = os.getenv('WEAVIATE_SCOPE', "offline_access")
+
+    resource_owner_config = None
+    try:
+        import weaviate
+        from weaviate.embedded import EmbeddedOptions
+        if WEAVIATE_USERNAME is not None and WEAVIATE_PASSWORD is not None:
+            resource_owner_config = weaviate.AuthClientPassword(
+                username=WEAVIATE_USERNAME,
+                password=WEAVIATE_PASSWORD,
+                scope=WEAVIATE_SCOPE
+            )
+
+        # if using remote server, don't choose persistent directory
+        client = weaviate.Client(WEAVIATE_URL, auth_client_secret=resource_owner_config)
+        return client
+    except Exception as e:
+        print(f"Failed to create Weaviate client: {e}")
+        return None
+
+
+if __name__ == '__main__':
+    pass