sandz7 commited on
Commit
168da77
β€’
1 Parent(s): 62140f8

added processor and conditional generation

Browse files
Files changed (1) hide show
  1. app.py +17 -65
app.py CHANGED
@@ -1,9 +1,9 @@
1
- # import torch
2
- # import gradio as gr
3
- # from transformers import pipeline, TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
4
- # from PIL import Image
5
- # import requests
6
- # import threading
7
 
8
  DESCRIPTION = '''
9
  <div>
@@ -12,63 +12,14 @@ DESCRIPTION = '''
12
  </div>
13
  '''
14
 
15
- # model_id = "xtuner/llava-llama-3-8b-v1_1-transformers"
16
- # pipe = pipeline("image-to-text", model=model_id, device_map="auto")
17
- # # Place transformers in hardware to prepare for process and generation
18
- # llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
19
- # llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.float16).to('cuda')
20
- # terminators = [
21
- # llama_tokenizer.eos_token_id,
22
- # llama_tokenizer.convert_tokens_to_ids("<|eot_id|>")
23
- # ]
24
-
25
- # def krypton(prompt,
26
- # history,
27
- # input_image,
28
- # max_new_tokens,
29
- # temperature,
30
- # num_beams,
31
- # do_sample: bool=True):
32
- # """
33
- # Passes an image as input, places it for generation
34
- # on pipeline and output is passed. This is multimodal
35
- # """
36
- # conversation = []
37
- # for user, assistant in history:
38
- # conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
39
- # conversation.append({"role": "user", "content": prompt})
40
-
41
- # input_ids = llama_tokenizer.apply_chat_template(conversation, return_tensors='pt').to(llama_model.device)
42
-
43
- # streamer = TextIteratorStreamer(llama_tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
44
-
45
- # llava_generation_kwargs = dict(
46
- # input_ids=input_ids,
47
- # streamer=streamer,
48
- # max_new_tokens=max_new_tokens,
49
- # num_beams=num_beams,
50
- # do_sample=do_sample
51
- # )
52
-
53
- # if temperature == 0.0:
54
- # do_sample = False
55
-
56
- # pil_image = Image.fromarray(input_image.astype('uint8'), 'RGB')
57
-
58
- # # Pipeline generation
59
- # outputs = pipeline()
60
-
61
-
62
- from transformers import pipeline
63
- from PIL import Image
64
- import requests
65
- import torch
66
- import subprocess
67
- import gradio as gr
68
- import spaces
69
-
70
  model_id = "xtuner/llava-llama-3-8b-v1_1-transformers"
71
- pipe = pipeline("image-to-text", model=model_id, torch_dtype=torch.float16, device=0)
 
 
 
 
 
 
72
 
73
  @spaces.GPU(duration=120)
74
  def krypton(input_image):
@@ -77,9 +28,10 @@ def krypton(input_image):
77
  # image = Image.open(requests.get(url, stream=True).raw)
78
  prompt = ("<|start_header_id|>user<|end_header_id|>\n\n<image>\nWhat are these?<|eot_id|>"
79
  "<|start_header_id|>assistant<|end_header_id|>\n\n")
80
- outputs = pipe(pil_image, prompt=prompt, generate_kwargs={"max_new_tokens": 200})
81
- nvidia_result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE)
82
- return outputs[0]
 
83
 
84
  with gr.Blocks(fill_height=True) as demo:
85
  gr.Markdown(DESCRIPTION)
 
1
+ import torch
2
+ import gradio as gr
3
+ from transformers import TextIteratorStreamer, AutoProcessor, LlavaForConditionalGeneration
4
+ from PIL import Image
5
+ import requests
6
+ import threading
7
 
8
  DESCRIPTION = '''
9
  <div>
 
12
  </div>
13
  '''
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  model_id = "xtuner/llava-llama-3-8b-v1_1-transformers"
16
+ model = LlavaForConditionalGeneration.from_pretrained(
17
+ model_id,
18
+ torch_dtype=torch.float16,
19
+ low_cpu_mem_usage=True
20
+ ).to('cuda')
21
+
22
+ processor = AutoProcessor.from_pretrained(model_id)
23
 
24
  @spaces.GPU(duration=120)
25
  def krypton(input_image):
 
28
  # image = Image.open(requests.get(url, stream=True).raw)
29
  prompt = ("<|start_header_id|>user<|end_header_id|>\n\n<image>\nWhat are these?<|eot_id|>"
30
  "<|start_header_id|>assistant<|end_header_id|>\n\n")
31
+ inputs = processor(prompt, pil_image, return_tensors='pt').to('cuda', torch.float16)
32
+ outputs = model.generate(**inputs, max_new_tokens=200, do_sample=False)
33
+ output_text = processor.decode(outputs[0][:2], skip_special_tokens=True)
34
+ return output_text
35
 
36
  with gr.Blocks(fill_height=True) as demo:
37
  gr.Markdown(DESCRIPTION)