Spaces:

radames
/

Real-Time-Latent-Consistency-Model

Runtime error

App Files Files Community

radames commited on Nov 1, 2023

Commit

dd9c27c

•

1 Parent(s): 3e47535

make it work with OSX Mac m1/m2/m3

Browse files

Files changed (3) hide show

README.md +14 -10
app-img2img.py +38 -15
app-txt2img.py +30 -7

README.md CHANGED Viewed

@@ -17,37 +17,42 @@ You need a webcam to run this demo. 🤗
 You need CUDA and Python
 `TIMEOUT`: limit user session timeout
-`SAFETY_CHECKER`:  disabled if you want NSFW filter off
 `MAX_QUEUE_SIZE`: limit number of users on current app instance
 ### image to image
 ```bash
-python -m venv venv
-source venv/bin/activate
-pip install -r requirements.txt
 uvicorn "app-img2img:app" --host 0.0.0.0 --port 7860 --reload
 ```
 ### text to image
 ```bash
-python -m venv venv
-source venv/bin/activate
-pip install -r requirements.txt
 uvicorn "app-txt2img:app" --host 0.0.0.0 --port 7860 --reload
 ```
 or with environment variables
 ```bash
 TIMEOUT=120 SAFETY_CHECKER=True MAX_QUEUE_SIZE=4 uvicorn "app-img2img:app" --host 0.0.0.0 --port 7860 --reload
 ```
-If you're running locally and want to test it on Mobile Safari, the webserver needs to be served over HTTPS.
 ```bash
 openssl req -newkey rsa:4096 -nodes -keyout key.pem -x509 -days 365 -out certificate.pem
 uvicorn "app-img2img:app" --host 0.0.0.0 --port 7860 --reload --log-level info --ssl-certfile=certificate.pem --ssl-keyfile=key.pem
 ```
 ## Docker
 You need NVIDIA Container Toolkit for Docker
 ```bash
@@ -62,8 +67,7 @@ docker run -ti -e TIMEOUT=0 -e SAFETY_CHECKER=False -p 7860:7860 --gpus all lcm-
 ```
 # Demo on Hugging Face
-https://huggingface.co/spaces/radames/Real-Time-Latent-Consistency-Model
 https://github.com/radames/Real-Time-Latent-Consistency-Model/assets/102277/c4003ac5-e7ff-44c0-97d3-464bb659de70

 You need CUDA and Python
 `TIMEOUT`: limit user session timeout
+`SAFETY_CHECKER`: disabled if you want NSFW filter off
 `MAX_QUEUE_SIZE`: limit number of users on current app instance
 ### image to image
 ```bash
+python -m venv venv
+source venv/bin/activate
+pip3 install -r requirements.txt
 uvicorn "app-img2img:app" --host 0.0.0.0 --port 7860 --reload
 ```
 ### text to image
 ```bash
+python -m venv venv
+source venv/bin/activate
+pip3 install -r requirements.txt
 uvicorn "app-txt2img:app" --host 0.0.0.0 --port 7860 --reload
 ```
 or with environment variables
 ```bash
 TIMEOUT=120 SAFETY_CHECKER=True MAX_QUEUE_SIZE=4 uvicorn "app-img2img:app" --host 0.0.0.0 --port 7860 --reload
 ```
+If you're running locally and want to test it on Mobile Safari, the webserver needs to be served over HTTPS.
 ```bash
 openssl req -newkey rsa:4096 -nodes -keyout key.pem -x509 -days 365 -out certificate.pem
 uvicorn "app-img2img:app" --host 0.0.0.0 --port 7860 --reload --log-level info --ssl-certfile=certificate.pem --ssl-keyfile=key.pem
 ```
 ## Docker
 You need NVIDIA Container Toolkit for Docker
 ```bash
 ```
 # Demo on Hugging Face
+https://huggingface.co/spaces/radames/Real-Time-Latent-Consistency-Model
 https://github.com/radames/Real-Time-Latent-Consistency-Model/assets/102277/c4003ac5-e7ff-44c0-97d3-464bb659de70

app-img2img.py CHANGED Viewed

@@ -19,6 +19,8 @@ import io
 import uuid
 import os
 import time
 MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
 TIMEOUT = float(os.environ.get("TIMEOUT", 0))
@@ -28,6 +30,17 @@ print(f"TIMEOUT: {TIMEOUT}")
 print(f"SAFETY_CHECKER: {SAFETY_CHECKER}")
 print(f"MAX_QUEUE_SIZE: {MAX_QUEUE_SIZE}")
 if SAFETY_CHECKER == "True":
     pipe = DiffusionPipeline.from_pretrained(
         "SimianLuo/LCM_Dreamshaper_v7",
@@ -41,19 +54,28 @@ else:
         custom_pipeline="latent_consistency_img2img.py",
         custom_revision="main",
     )
-#TODO try to use tiny VAE
 # pipe.vae = AutoencoderTiny.from_pretrained(
 #     "madebyollin/taesd", torch_dtype=torch.float16, use_safetensors=True
 # )
 pipe.set_progress_bar_config(disable=True)
-pipe.to(torch_device="cuda", torch_dtype=torch.float16)
 pipe.unet.to(memory_format=torch.channels_last)
-pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
-compel_proc = Compel(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder, truncate_long_prompts=False)
 user_queue_map = {}
-# for torch.compile
-pipe(prompt="warmup", image=[Image.new("RGB", (512, 512))])
 def predict(input_image, prompt, guidance_scale=8.0, strength=0.5, seed=2159232):
     generator = torch.manual_seed(seed)
@@ -112,9 +134,7 @@ async def websocket_endpoint(websocket: WebSocket):
         await websocket.send_json(
             {"status": "success", "message": "Connected", "userId": uid}
         )
-        user_queue_map[uid] = {
-            "queue": asyncio.Queue()
-        }
         await websocket.send_json(
             {"status": "start", "message": "Start Streaming", "userId": uid}
         )
@@ -155,7 +175,13 @@ async def stream(user_id: uuid.UUID):
                 if input_image is None:
                     continue
-                image = predict(input_image, params.prompt, params.guidance_scale, params.strength, params.seed)
                 if image is None:
                     continue
                 frame_data = io.BytesIO()
@@ -194,10 +220,7 @@ async def handle_websocket_data(websocket: WebSocket, user_id: uuid.UUID):
                     queue.get_nowait()
                 except asyncio.QueueEmpty:
                     continue
-            await queue.put({
-                "image": pil_image,
-                "params": params
-            })
             if TIMEOUT > 0 and time.time() - last_time > TIMEOUT:
                 await websocket.send_json(
                     {
@@ -214,4 +237,4 @@ async def handle_websocket_data(websocket: WebSocket, user_id: uuid.UUID):
         traceback.print_exc()
-app.mount("/", StaticFiles(directory="img2img", html=True), name="public")

 import uuid
 import os
 import time
+import psutil
 MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
 TIMEOUT = float(os.environ.get("TIMEOUT", 0))
 print(f"SAFETY_CHECKER: {SAFETY_CHECKER}")
 print(f"MAX_QUEUE_SIZE: {MAX_QUEUE_SIZE}")
+# check if MPS is available OSX only M1/M2/M3 chips
+mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+torch_device = device
+torch_dtype = torch.float16
+if mps_available:
+    device = torch.device("mps")
+    torch_device = "cpu"
+    torch_dtype = torch.float32
 if SAFETY_CHECKER == "True":
     pipe = DiffusionPipeline.from_pretrained(
         "SimianLuo/LCM_Dreamshaper_v7",
         custom_pipeline="latent_consistency_img2img.py",
         custom_revision="main",
     )
+# TODO try to use tiny VAE
 # pipe.vae = AutoencoderTiny.from_pretrained(
 #     "madebyollin/taesd", torch_dtype=torch.float16, use_safetensors=True
 # )
 pipe.set_progress_bar_config(disable=True)
+pipe.to(torch_device=torch_device, torch_dtype=torch_dtype).to(device)
 pipe.unet.to(memory_format=torch.channels_last)
+if psutil.virtual_memory().total < 64 * 1024**3:
+    pipe.enable_attention_slicing()
+if not mps_available:
+    pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+    pipe(prompt="warmup", image=[Image.new("RGB", (512, 512))])
+compel_proc = Compel(
+    tokenizer=pipe.tokenizer,
+    text_encoder=pipe.text_encoder,
+    truncate_long_prompts=False,
+)
 user_queue_map = {}
 def predict(input_image, prompt, guidance_scale=8.0, strength=0.5, seed=2159232):
     generator = torch.manual_seed(seed)
         await websocket.send_json(
             {"status": "success", "message": "Connected", "userId": uid}
         )
+        user_queue_map[uid] = {"queue": asyncio.Queue()}
         await websocket.send_json(
             {"status": "start", "message": "Start Streaming", "userId": uid}
         )
                 if input_image is None:
                     continue
+                image = predict(
+                    input_image,
+                    params.prompt,
+                    params.guidance_scale,
+                    params.strength,
+                    params.seed,
+                )
                 if image is None:
                     continue
                 frame_data = io.BytesIO()
                     queue.get_nowait()
                 except asyncio.QueueEmpty:
                     continue
+            await queue.put({"image": pil_image, "params": params})
             if TIMEOUT > 0 and time.time() - last_time > TIMEOUT:
                 await websocket.send_json(
                     {
         traceback.print_exc()
+app.mount("/", StaticFiles(directory="img2img", html=True), name="public")

app-txt2img.py CHANGED Viewed

@@ -19,6 +19,8 @@ import io
 import uuid
 import os
 import time
 MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
 TIMEOUT = float(os.environ.get("TIMEOUT", 0))
@@ -28,6 +30,17 @@ print(f"TIMEOUT: {TIMEOUT}")
 print(f"SAFETY_CHECKER: {SAFETY_CHECKER}")
 print(f"MAX_QUEUE_SIZE: {MAX_QUEUE_SIZE}")
 if SAFETY_CHECKER == "True":
     pipe = DiffusionPipeline.from_pretrained(
         "SimianLuo/LCM_Dreamshaper_v7",
@@ -42,17 +55,27 @@ else:
         custom_revision="main",
     )
 pipe.vae = AutoencoderTiny.from_pretrained(
-    "madebyollin/taesd", torch_dtype=torch.float16, use_safetensors=True
 )
 pipe.set_progress_bar_config(disable=True)
-pipe.to(torch_device="cuda", torch_dtype=torch.float16)
 pipe.unet.to(memory_format=torch.channels_last)
-compel_proc = Compel(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder, truncate_long_prompts=False)
-pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
 user_queue_map = {}
-# warmup trigger compilation
-pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
 def predict(prompt, guidance_scale=8.0, seed=2159232):
     generator = torch.manual_seed(seed)
@@ -148,7 +171,7 @@ async def stream(user_id: uuid.UUID):
                 params = await queue.get()
                 if params is None:
                     continue
                 image = predict(params.prompt, params.guidance_scale, params.seed)
                 if image is None:
                     continue

 import uuid
 import os
 import time
+import psutil
 MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
 TIMEOUT = float(os.environ.get("TIMEOUT", 0))
 print(f"SAFETY_CHECKER: {SAFETY_CHECKER}")
 print(f"MAX_QUEUE_SIZE: {MAX_QUEUE_SIZE}")
+# check if MPS is available OSX only M1/M2/M3 chips
+mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+torch_device = device
+torch_dtype = torch.float16
+if mps_available:
+    device = torch.device("mps")
+    torch_device = "cpu"
+    torch_dtype = torch.float32
 if SAFETY_CHECKER == "True":
     pipe = DiffusionPipeline.from_pretrained(
         "SimianLuo/LCM_Dreamshaper_v7",
         custom_revision="main",
     )
 pipe.vae = AutoencoderTiny.from_pretrained(
+    "madebyollin/taesd", torch_dtype=torch_dtype, use_safetensors=True
 )
 pipe.set_progress_bar_config(disable=True)
+pipe.to(torch_device=torch_device, torch_dtype=torch_dtype).to(device)
 pipe.unet.to(memory_format=torch.channels_last)
+# check if computer has less than 64GB of RAM using sys or os
+if psutil.virtual_memory().total < 64 * 1024**3:
+    pipe.enable_attention_slicing()
+if not mps_available:
+    pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+    pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
+compel_proc = Compel(
+    tokenizer=pipe.tokenizer,
+    text_encoder=pipe.text_encoder,
+    truncate_long_prompts=False,
+)
 user_queue_map = {}
 def predict(prompt, guidance_scale=8.0, seed=2159232):
     generator = torch.manual_seed(seed)
                 params = await queue.get()
                 if params is None:
                     continue
                 image = predict(params.prompt, params.guidance_scale, params.seed)
                 if image is None:
                     continue