Spaces:

tmzh
/

chatbot-using-rag-on-faq

Paused

App Files Files Community

tmzh commited on Jul 1

Commit

371d8a8

•

1 Parent(s): 5a96289

enable gpu

Browse files

Files changed (2) hide show

app.py +32 -21
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -14,15 +14,19 @@ from chromadb.utils import embedding_functions
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
 models = {
-    "wizardLM-7B-HF" : "TheBloke/wizardLM-7B-HF",
-    "wizard-vicuna-13B-GPTQ" : "TheBloke/wizard-vicuna-13B-GPTQ",
-    "Wizard-Vicuna-13B-Uncensored" : "ehartford/Wizard-Vicuna-13B-Uncensored",
-    "WizardLM-13B" : "TheBloke/WizardLM-13B-V1.0-Uncensored-GPTQ",
-    "Llama-2-7B" : "TheBloke/Llama-2-7b-Chat-GPTQ",
-    "Vicuna-13B" : "TheBloke/vicuna-13B-v1.5-GPTQ",
-    "WizardLM-13B-V1.2" : "TheBloke/WizardLM-13B-V1.2-GPTQ", # Trained from Llama-2 13b
-    "Mistral-7B" : "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ"
 }
@@ -32,27 +36,29 @@ tokenizer = AutoTokenizer.from_pretrained(models[model_name])
 # tokenizer.use_default_system_prompt = True
 tokenizer.chat_template = tokenizer.default_chat_template
-model = AutoModelForCausalLM.from_pretrained(models[model_name],
-                                             torch_dtype=torch.float16,
                                              device_map="auto")
-file_path='./data/faq_dataset.json'
 data = json.loads(Path(file_path).read_text())
 client = chromadb.Client()
-emb_fn = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="BAAI/bge-small-en-v1.5")
 collection = client.create_collection(
     name="retrieval_qa",
     embedding_function=emb_fn,
-    metadata={"hnsw:space": "cosine"} # l2 is the default
 )
-documents = [json.dumps(q) for q in data['questions']] # encode QnA as json strings for generating embeddings
-metadatas = data['questions'] # retain QnA as dict in metadatas
 ids = [str(uuid.uuid1()) for _ in documents]
@@ -99,8 +105,10 @@ def respond(query):
     model.to(model.device)
-    generated_ids = model.generate(model_inputs, streamer=streamer, temperature=0.01, max_new_tokens=100, do_sample=True)
-    answer = tokenizer.batch_decode(generated_ids[:, model_inputs.shape[1]:])[0]
     answer = answer.replace('</s>', '')
     samples = related_questions
@@ -119,13 +127,16 @@ with gr.Blocks() as chatbot:
         with gr.Column():
             answer_block = gr.Textbox(label="Answers", lines=2)
             question = gr.Textbox(label="Question")
-            examples = gr.Dataset(samples=samples, components=[question], label="Similar questions", type="index")
             generate = gr.Button(value="Ask")
         with gr.Column():
-            references_block = gr.Markdown("## References\n", label="global variable")
         examples.click(load_example, inputs=[examples], outputs=[question])
-        generate.click(respond, inputs=question, outputs=[answer_block, references_block, examples])
 chatbot.queue()
-chatbot.launch()

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
+print(f"Is CUDA available: {torch.cuda.is_available()}")
+print(
+    f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
 models = {
+    "wizardLM-7B-HF": "TheBloke/wizardLM-7B-HF",
+    "wizard-vicuna-13B-GPTQ": "TheBloke/wizard-vicuna-13B-GPTQ",
+    "Wizard-Vicuna-13B-Uncensored": "ehartford/Wizard-Vicuna-13B-Uncensored",
+    "WizardLM-13B": "TheBloke/WizardLM-13B-V1.0-Uncensored-GPTQ",
+    "Llama-2-7B": "TheBloke/Llama-2-7b-Chat-GPTQ",
+    "Vicuna-13B": "TheBloke/vicuna-13B-v1.5-GPTQ",
+    "WizardLM-13B-V1.2": "TheBloke/WizardLM-13B-V1.2-GPTQ",  # Trained from Llama-2 13b
+    "Mistral-7B": "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ"
 }
 # tokenizer.use_default_system_prompt = True
 tokenizer.chat_template = tokenizer.default_chat_template
+model = AutoModelForCausalLM.from_pretrained(models[model_name],
+                                             torch_dtype=torch.float16,
                                              device_map="auto")
+file_path = './data/faq_dataset.json'
 data = json.loads(Path(file_path).read_text())
 client = chromadb.Client()
+emb_fn = embedding_functions.SentenceTransformerEmbeddingFunction(
+    model_name="BAAI/bge-small-en-v1.5")
 collection = client.create_collection(
     name="retrieval_qa",
     embedding_function=emb_fn,
+    metadata={"hnsw:space": "cosine"}  # l2 is the default
 )
+# encode QnA as json strings for generating embeddings
+documents = [json.dumps(q) for q in data['questions']]
+metadatas = data['questions']  # retain QnA as dict in metadatas
 ids = [str(uuid.uuid1()) for _ in documents]
     model.to(model.device)
+    generated_ids = model.generate(
+        model_inputs, streamer=streamer, temperature=0.01, max_new_tokens=100, do_sample=True)
+    answer = tokenizer.batch_decode(
+        generated_ids[:, model_inputs.shape[1]:])[0]
     answer = answer.replace('</s>', '')
     samples = related_questions
         with gr.Column():
             answer_block = gr.Textbox(label="Answers", lines=2)
             question = gr.Textbox(label="Question")
+            examples = gr.Dataset(samples=samples, components=[
+                                  question], label="Similar questions", type="index")
             generate = gr.Button(value="Ask")
         with gr.Column():
+            references_block = gr.Markdown(
+                "## References\n", label="global variable")
         examples.click(load_example, inputs=[examples], outputs=[question])
+        generate.click(respond, inputs=question, outputs=[
+                       answer_block, references_block, examples])
 chatbot.queue()
+chatbot.launch()

requirements.txt CHANGED Viewed

@@ -6,5 +6,5 @@ huggingface_hub
 optimum
 sentence_transformers
 spaces
-torch==2.3.0
 transformers==4.43.0.dev0

 optimum
 sentence_transformers
 spaces
+torch==2.3.0 --index-url https://download.pytorch.org/whl/cu121
 transformers==4.43.0.dev0