Spaces:

WinterGYC
/

Baichuan-13B-Chat-Int8-Docker

Paused

App Files Files Community

WinterGYC commited on Jul 28, 2023

Commit

fd61579

•

1 Parent(s): bea4533

Init

Browse files

Files changed (3) hide show

Dockerfile +16 -0
app.py +72 -0
requirements.txt +6 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+COPY ./app.py /code/app.py
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+COPY . .
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import json
+import torch
+import streamlit as st
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from transformers.generation.utils import GenerationConfig
+st.set_page_config(page_title="Baichuan-13B-Chat")
+st.title("Baichuan-13B-Chat")
+@st.cache_resource
+def init_model():
+    model = AutoModelForCausalLM.from_pretrained(
+        "baichuan-inc/Baichuan-13B-Chat",
+        torch_dtype=torch.float16,
+        device_map="auto",
+        trust_remote_code=True
+    )
+    model.generation_config = GenerationConfig.from_pretrained(
+        "baichuan-inc/Baichuan-13B-Chat"
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        "baichuan-inc/Baichuan-13B-Chat",
+        use_fast=False,
+        trust_remote_code=True
+    )
+    model = model.quantize(8).cuda()
+    return model, tokenizer
+def clear_chat_history():
+    del st.session_state.messages
+def init_chat_history():
+    with st.chat_message("assistant", avatar='🤖'):
+        st.markdown("Greetings! I am the BaiChuan large language model, delighted to assist you.🥰")
+    if "messages" in st.session_state:
+        for message in st.session_state.messages:
+            avatar = '🧑‍💻' if message["role"] == "user" else '🤖'
+            with st.chat_message(message["role"], avatar=avatar):
+                st.markdown(message["content"])
+    else:
+        st.session_state.messages = []
+    return st.session_state.messages
+def main():
+    model, tokenizer = init_model()
+    messages = init_chat_history()
+    if prompt := st.chat_input("Shift + Enter for a new line, Enter to send"):
+        with st.chat_message("user", avatar='🧑‍💻'):
+            st.markdown(prompt)
+        messages.append({"role": "user", "content": prompt})
+        print(f"[user] {prompt}", flush=True)
+        with st.chat_message("assistant", avatar='🤖'):
+            placeholder = st.empty()
+            for response in model.chat(tokenizer, messages, stream=True):
+                placeholder.markdown(response)
+                if torch.backends.mps.is_available():
+                    torch.mps.empty_cache()
+        messages.append({"role": "assistant", "content": response})
+        print(json.dumps(messages, ensure_ascii=False), flush=True)
+        st.button("Reset Chat", on_click=clear_chat_history)
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+accelerate
+colorama
+cpm_kernels
+sentencepiece
+streamlit
+transformers_stream_generator