zohadev commited on
Commit
474af6f
1 Parent(s): fafb45a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -5
app.py CHANGED
@@ -1,6 +1,4 @@
1
  import subprocess
2
- # Installing flash_attn
3
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
4
 
5
  import gradio as gr
6
  from PIL import Image
@@ -13,9 +11,9 @@ import torch
13
  import spaces
14
 
15
  model_id = "microsoft/Phi-3-vision-128k-instruct"
16
- model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", trust_remote_code=True, torch_dtype="auto")
17
  processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
18
- model.to("cuda:0")
19
 
20
  PLACEHOLDER = """
21
  <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
@@ -71,7 +69,7 @@ def bot_streaming(message, history):
71
  print(f"prompt is -\n{conversation}")
72
  prompt = processor.tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
73
  image = Image.open(image)
74
- inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
75
 
76
  streamer = TextIteratorStreamer(processor, **{"skip_special_tokens": True, "skip_prompt": True, 'clean_up_tokenization_spaces':False,})
77
  generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=False, temperature=0.0, eos_token_id=processor.tokenizer.eos_token_id,)
 
1
  import subprocess
 
 
2
 
3
  import gradio as gr
4
  from PIL import Image
 
11
  import spaces
12
 
13
  model_id = "microsoft/Phi-3-vision-128k-instruct"
14
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cpu", trust_remote_code=True, torch_dtype="auto")
15
  processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
16
+ model.to("cpu")
17
 
18
  PLACEHOLDER = """
19
  <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
 
69
  print(f"prompt is -\n{conversation}")
70
  prompt = processor.tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
71
  image = Image.open(image)
72
+ inputs = processor(prompt, image, return_tensors="pt").to("cpu")
73
 
74
  streamer = TextIteratorStreamer(processor, **{"skip_special_tokens": True, "skip_prompt": True, 'clean_up_tokenization_spaces':False,})
75
  generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=False, temperature=0.0, eos_token_id=processor.tokenizer.eos_token_id,)