File size: 6,097 Bytes
24be6de
eadb20d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b36cde3
eadb20d
 
b36cde3
eadb20d
 
b36cde3
eadb20d
 
b36cde3
eadb20d
 
b36cde3
 
 
 
 
db4a0b6
f3b84cf
dc84e2c
 
 
 
 
 
 
a7afcaa
dc84e2c
 
a7afcaa
dc84e2c
0d89cb6
dc84e2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d89cb6
 
eadb20d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d89cb6
b36cde3
0d89cb6
 
9f983ee
0d89cb6
9f983ee
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# Text to Vedio
# import torch
# from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
# from diffusers.utils import export_to_video
# import streamlit as st
# import numpy as np 

# # Title and User Input
# st.title("Text-to-Video with Streamlit")
# prompt = st.text_input("Enter your text prompt:", "Spiderman is surfing")

# # Button to trigger generation
# if st.button("Generate Video"):  
#     # Ensure you have 'accelerate' version 0.17.0 or higher 
#     import accelerate 
#     if accelerate.__version__ < "0.17.0":
#         st.warning("Please upgrade 'accelerate' to version 0.17.0 or higher for CPU offloading.")
#     else:
#         with st.spinner("Generating video..."):
#             # Define the pipeline for image generation
#             pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b", 
#                                                  torch_dtype=torch.float16,                                                 variant="fp16",                                                 device="cpu") 
#             pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
#             pipe.enable_model_cpu_offload()  

#             # Generate video frames
#             video_frames = pipe(prompt, num_inference_steps=25).frames  

#             # Create dummy frames for testing (replace with actual manipulation later)
#             dummy_frames = [np.ones((256, 256, 3), dtype=np.uint8) for _ in range(20)]  

#             # Export to video
#             video_path = export_to_video(dummy_frames)

#             # Display the video in the Streamlit app  
#             st.video(video_path) 





# Text to 3D

# import streamlit as st
# import torch
# from diffusers import ShapEPipeline
# from diffusers.utils import export_to_gif
# from PIL import Image
# import numpy as np
# # import PyTorch

# # Model loading (Ideally done once at the start for efficiency)
# ckpt_id = "openai/shap-e"  

# @st.cache_resource  # Caches the model for faster subsequent runs

# def process_image_for_pil(image): 
#     if isinstance(image, torch.Tensor):   
#         # Your PyTorch conversion logic here (with correct indentation)
#     # elif isinstance(image, np.ndarray):   
#         # Your Numpy conversion logic here (with correct indentation)
#         image_array = image.astype('uint8')  # Assuming 8-bit conversion is needed
#         return Image.fromarray(image_array) 
#     else: 
#         raise TypeError("Unsupported image format. Please provide conversion logic.")

# test_image = np.random.randint(0, 256, size=(256, 256, 3), dtype=np.uint8)  # Placeholder image  
# result = process_image_for_pil(test_image)





# def should_resize(image):  # Add 'image' as an argument
#     """Determines whether to resize images (replace with your own logic)"""
#     if image.width > 512 or image.height > 512: 
#         return True   
#     else:
#         return False  
# def load_model():    
#     return ShapEPipeline.from_pretrained(ckpt_id).to("cuda")  

# pipe = load_model()

# # App Title
# st.title("Shark 3D Image Generator")

# # User Inputs
# prompt = st.text_input("Enter your prompt:", "a shark")
# guidance_scale = st.slider("Guidance Scale", 0.0, 20.0, 15.0, step=0.5)

# # Generate and Display Images 
# if st.button("Generate"):
#     with st.spinner("Generating images..."):
#         images = pipe(prompt, guidance_scale=guidance_scale, num_inference_steps=64).images

#         # ... (Process images for PIL conversion)

#         # Resize Images (Optional)
#         pil_images = []  # Modified to store resized images if needed
#         for image in images:
#             processed_image = process_image_for_pil(image)
#             if should_resize(processed_image):  # Pass image to should_resize
#                 resized_image = processed_image.resize((256, 256))
#                 pil_images.append(resized_image) 
#             else:
#                 pil_images.append(processed_image)  # Append without resizing

#         gif_path = export_to_gif(pil_images, "shark_3d.gif")
#         st.image(pil_images[0]) 
#         st.success("GIF saved as shark_3d.gif")


# visual QA
import requests
from PIL import Image
from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
import streamlit as st


image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg"
image = Image.open(requests.get(image_url, stream=True).raw)

model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-ai2d-base")
processor = Pix2StructProcessor.from_pretrained("google/pix2struct-ai2d-base")

question = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"

inputs = processor(images=image, text=question, return_tensors="pt")

predictions = model.generate(**inputs,max_new_tokens= 1000)
# print(processor.decode(predictions[0], skip_special_tokens=True))



def load_image():
    with st.sidebar:
        if img := st.text_input("Enter Image URL") or st.selectbox("Select Image", ("https://images.unsplash.com/photo-1593466144596-8abd50ad2c52?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3434&q=80", "https://images.unsplash.com/photo-1566438480900-0609be27a4be?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3394&q=80")):
            if st.button("Load Image"):
                st.write("Image Uploaded!")
                st.image(img)
        else:
            st.warning("Please enter an image URL and click 'Load Image' before asking a question.")
    return img



def visual_qna():
    st.title("Visual Q&A")
    img = load_image()
    if img:
        if query := st.chat_input("Enter your message"):
            response = model(question=query, image=img)
            with st.chat_message("assistant"):
                st.write(response)
    else:
        st.warning("Please enter an image URL and click 'Load Image' before asking a question.")