Spaces:

dkdaniz
/

katara

Paused

Daniel Marques commited on Oct 29, 2023

Commit

0fa1208

•

1 Parent(s): 3a5d8c7

fix: memory error

Files changed (2) hide show

constants.py CHANGED Viewed

@@ -32,13 +32,13 @@ CHROMA_SETTINGS = Settings(
 )
 # Context Window and Max New Tokens
-CONTEXT_WINDOW_SIZE = 4096
 MAX_NEW_TOKENS = CONTEXT_WINDOW_SIZE  # int(CONTEXT_WINDOW_SIZE/4)
 #### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
 N_GPU_LAYERS = 100  # Llama-2-70B has 83 layers
-N_BATCH = 512
 ### From experimenting with the Llama-2-7B-Chat-GGML model on 8GB VRAM, these values work:
 # N_GPU_LAYERS = 20

 )
 # Context Window and Max New Tokens
+CONTEXT_WINDOW_SIZE = 2048
 MAX_NEW_TOKENS = CONTEXT_WINDOW_SIZE  # int(CONTEXT_WINDOW_SIZE/4)
 #### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
 N_GPU_LAYERS = 100  # Llama-2-70B has 83 layers
+N_BATCH = 1024
 ### From experimenting with the Llama-2-7B-Chat-GGML model on 8GB VRAM, these values work:
 # N_GPU_LAYERS = 20

load_models.py CHANGED Viewed

@@ -6,7 +6,7 @@ from auto_gptq import AutoGPTQForCausalLM
 from huggingface_hub import hf_hub_download
 from langchain.llms import LlamaCpp, HuggingFacePipeline
-from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 from transformers import (
     AutoModelForCausalLM,

 from huggingface_hub import hf_hub_download
 from langchain.llms import LlamaCpp, HuggingFacePipeline
+# from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 from transformers import (
     AutoModelForCausalLM,