File size: 3,211 Bytes
168da77 8584d3c 06de88f bfd4b05 5ae7f9c d364219 168da77 0963b2f 168da77 d3f5533 168da77 d364219 0963b2f 6c232c6 5659ce7 a43653a c82f8fc 24384a7 5659ce7 24384a7 5659ce7 24384a7 c82f8fc 24384a7 c82f8fc d69619f 24384a7 1b08433 24384a7 d1df8d3 24384a7 daa8caf 24384a7 5ae7f9c 24384a7 5ae7f9c 24384a7 5ae7f9c 24384a7 1c699c0 ced6c96 1c699c0 a43653a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import torch
import gradio as gr
from transformers import TextIteratorStreamer, AutoProcessor, LlavaForConditionalGeneration
from PIL import Image
from threading import Thread
import spaces
import accelerate
import time
DESCRIPTION = '''
<div>
<h1 style="text-align: center;">Krypton π</h1>
<p>This uses an Open Source model from <a href="https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers"><b>xtuner/llava-llama-3-8b-v1_1-transformers</b></a></p>
</div>
'''
model_id = "xtuner/llava-llama-3-8b-v1_1-transformers"
model = LlavaForConditionalGeneration.from_pretrained(
model_id,
torch_dtype=torch.float16,
low_cpu_mem_usage=True
)
model.to('cuda')
processor = AutoProcessor.from_pretrained(model_id)
# Confirming and setting the eos_token_id (if necessary)
model.generation_config.eos_token_id = 128009
@spaces.GPU(duration=120)
def krypton(message, history):
print(message)
if message["files"]:
# message["files"][-1] is a Dict or just a string
if type(message["files"][-1]) == dict:
image = message["files"][-1]["path"]
else:
image = message["files"][-1]
else:
# if there's no image uploaded for this turn, look for images in the past turns
# kept inside tuples, take the last one
for hist in history:
if type(hist[0]) == tuple:
image = hist[0][0]
try:
if image is None:
# Handle the case where image is None
gr.Error("Please upload an image so kypton can understand.")
except NameError:
# Handle the case where 'image' is not defined at all
gr.Error("Upload an image so krypton can work.")
prompt = f"<|start_header_id|>user<|end_header_id|>\n\n<image>\n{message['text']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
image = Image.open(image)
inputs = processor(prompt, image, return_tensors='pt').to(0, torch.float16)
streamer = TextIteratorStreamer(processor, **{"skip_special_tokens": False, "skip_prompt": True})
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=False)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
text_prompt = f"<|start_header_id|>user<|end_header_id|>\n\n{message['text']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
buffer = ""
time.sleep(0.5)
for new_text in streamer:
# find <|eot_id|> and remove it from the new_text
if "<|eot_id|>" in new_text:
new_text = new_text.split("<|eot_id|>")[0]
buffer += new_text
generated_text_without_prompt = buffer
time.sleep(0.06)
yield generated_text_without_prompt
chatbot = gr.Chatbot(height=600, label="Krypt AI")
chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter your question or upload an image.", show_label=False)
with gr.Blocks(fill_height=True) as demo:
gr.Markdown(DESCRIPTION)
gr.ChatInterface(
fn=krypton,
chatbot=chatbot,
fill_height=True,
multimodal=True,
textbox=chat_input,
)
if __name__ == "__main__":
demo.launch() |