Spaces:

kietnt0603
/

ChatGLM4CS313

Sleeping

App Files Files Community

ChatGLM4CS313 / app.py

kietnt0603

Update app.py

22eb5b6 verified 6 months ago

raw

history blame

2.31 kB

	import streamlit as st
	import os
	import torch
	from datasets import DatasetDict, Dataset
	from transformers import (
	AutoModelForCausalLM,
	AutoTokenizer,
	BitsAndBytesConfig,
	logging
	)

	logging.set_verbosity_error()

	model_name = 'THUDM/chatglm3-6b'

	#############################################
	# bitsandbytes parameters
	#############################################

	# Activate 4-bit precision for base model loading
	use_4bit = True

	# Compute dtype of 4-bit base models
	bnb_4bit_compute_dtype = 'float16'

	# Quantization type (fp4 or np4)
	bnb_4bit_quant_type = 'nf4'

	# Activate nested quantization for 4-bit base models
	use_nested_quant = False

	# device mapping
	device = torch.device("cpu") # Set device to CPU
	device_map = {"": -1} # Use -1 for CPU in bnb_config

	compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=use_4bit,
	bnb_4bit_quant_type=bnb_4bit_quant_type,
	bnb_4bit_compute_dtype=compute_dtype,
	bnb_4bit_use_double_quant=use_nested_quant,
	)

	if compute_dtype == torch.float16 and use_4bit:
	major, _ = torch.cuda.get_device_capability()
	if major >= 8:
	print('='*80)
	print('Your GPU supports bfloat16, you can accelerate using the argument --fp16')
	print('='*80)

	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	trust_remote_code=True,
	quantization_config=bnb_config,
	device_map=device_map,
	)
	model.config.use_cache = False
	model.config.pretraining_tp = 1

	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
	tokenizer.padding_side = 'left'

	# Set the title of the Streamlit app
	st.title("Chatbot with LangChain and HuggingFace Model")

	# Display the conversation history
	conversation_text = st.empty()

	# Get the user input
	user_input = st.text_input("You: ")

	history = []
	# If the user has submitted input
	if st.button("Send"):

	# Generate the chatbot's response
	response, history = model.chat(tokenizer, user_input, history=history)

	# Add the response to the conversation history
	conversation_history.append(f"Bot: {response}")

	# Update the conversation text
	conversation_text.markdown("Conversation:\n")
	for message in conversation_history:
	conversation_text.markdown(f"- {message}")