Spaces:

howard-hou
/

VisualRWKV-Gradio-1

Sleeping

App Files Files Community

howard-hou commited on Dec 30, 2023

Commit

c25fbe0

•

1 Parent(s): a9b31ad

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -5

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 import os, gc
 import torch
 from transformers import CLIPImageProcessor
 from huggingface_hub import hf_hub_download
@@ -33,7 +34,7 @@ image_processor = CLIPImageProcessor.from_pretrained(vision_tower_name)
 ##########################################################################
 def generate_prompt(instruction):
     instruction = instruction.strip().replace('\r\n','\n').replace('\n\n','\n')
-    return f"{instruction}\n\nAssistant:"
 def generate(
     ctx,
@@ -57,10 +58,8 @@ def generate(
     for i in range(int(token_count)):
         if i == 0:
             input_ids = pipeline.encode(ctx)
-            print(input_ids)
             text_embs = model.w['emb.weight'][input_ids]
-            input_embs = torch.cat((image_features, text_embs), dim=0)
-            print(input_embs.shape)
             out, state = model.forward(embs=input_embs, state=None)
         else:
             input_ids = [token]
@@ -103,12 +102,18 @@ examples = [
         "What are the things I should be cautious about when I visit here?",
     ]
 ]
 def chatbot(image, question):
     if image is None:
         yield "Please upload an image."
         return
     image = image_processor(images=image.convert('RGB'), return_tensors='pt')['pixel_values']
     image_features = visual_encoder.encode_images(image.unsqueeze(0)).squeeze(0) # [L, D]
     input_text = generate_prompt(question)
     for output in generate(input_text, image_features):
         yield output
@@ -119,7 +124,7 @@ with gr.Blocks(title=title) as demo:
             image = gr.Image(type='pil', label="Image")
         with gr.Column():
             prompt = gr.Textbox(lines=5, label="Prompt",
-                value="Please upload an image and ask a question.")
             with gr.Row():
                 submit = gr.Button("Submit", variant="primary")
                 clear = gr.Button("Clear", variant="secondary")

 import gradio as gr
 import os, gc
 import torch
+import torch.nn.functional as F
 from transformers import CLIPImageProcessor
 from huggingface_hub import hf_hub_download
 ##########################################################################
 def generate_prompt(instruction):
     instruction = instruction.strip().replace('\r\n','\n').replace('\n\n','\n')
+    return f"\n{instruction}\n\nAssistant:"
 def generate(
     ctx,
     for i in range(int(token_count)):
         if i == 0:
             input_ids = pipeline.encode(ctx)
             text_embs = model.w['emb.weight'][input_ids]
+            input_embs = torch.cat((image_features, text_embs), dim=0)[-ctx_limit:]
             out, state = model.forward(embs=input_embs, state=None)
         else:
             input_ids = [token]
         "What are the things I should be cautious about when I visit here?",
     ]
 ]
 def chatbot(image, question):
     if image is None:
         yield "Please upload an image."
         return
     image = image_processor(images=image.convert('RGB'), return_tensors='pt')['pixel_values']
     image_features = visual_encoder.encode_images(image.unsqueeze(0)).squeeze(0) # [L, D]
+    # apply layer norm to image feature, very important
+    image_features = F.layer_norm(image_features,
+                                  (image_features.shape[-1],),
+                                  weight=model.w['blocks.0.ln0.weight'],
+                                  bias=model.w['blocks.0.ln0.bias'])
     input_text = generate_prompt(question)
     for output in generate(input_text, image_features):
         yield output
             image = gr.Image(type='pil', label="Image")
         with gr.Column():
             prompt = gr.Textbox(lines=5, label="Prompt",
+                value="Render a clear and concise summary of the photo.")
             with gr.Row():
                 submit = gr.Button("Submit", variant="primary")
                 clear = gr.Button("Clear", variant="secondary")