Vintern-3B-Demo

Running on Zero

qnguyen3 commited on Apr 8

Commit

b699eb0

•

1 Parent(s): eeeeb1b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -22,8 +22,6 @@ model = LlavaQwen2ForCausalLM.from_pretrained(
     torch_dtype=torch.float16,
     trust_remote_code=True)
-model.to("cuda:0")
 class KeywordsStoppingCriteria(StoppingCriteria):
     def __init__(self, keywords, tokenizer, input_ids):
         self.keywords = keywords
@@ -97,13 +95,13 @@ def bot_streaming(message, history):
         tokenize=False,
         add_generation_prompt=True)
     text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
-    input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0).to("cuda:0")
     stop_str = '<|im_end|>'
     keywords = [stop_str]
     stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids)
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
-    image_tensor = model.process_images([image], model.config).to("cuda:0")
     generation_kwargs = dict(input_ids=input_ids, images=image_tensor, streamer=streamer, max_new_tokens=100, stopping_criteria=[stopping_criteria])
     generated_text = ""
     thread = Thread(target=model.generate, kwargs=generation_kwargs)

     torch_dtype=torch.float16,
     trust_remote_code=True)
 class KeywordsStoppingCriteria(StoppingCriteria):
     def __init__(self, keywords, tokenizer, input_ids):
         self.keywords = keywords
         tokenize=False,
         add_generation_prompt=True)
     text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
+    input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0)
     stop_str = '<|im_end|>'
     keywords = [stop_str]
     stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids)
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+    image_tensor = model.process_images([image], model.config).to(dtype=model.dtype)
     generation_kwargs = dict(input_ids=input_ids, images=image_tensor, streamer=streamer, max_new_tokens=100, stopping_criteria=[stopping_criteria])
     generated_text = ""
     thread = Thread(target=model.generate, kwargs=generation_kwargs)