ai-lab-tube

Sleeping

App Files Files Community

jbilcke-hf HF staff commited on May 9

Commit

58b1ffb

•

1 Parent(s): 6d66622

improve prompts

Browse files

Files changed (29) hide show

package-lock.json +26 -26
package.json +1 -1
src/app/api/parsers/parseTurbo.ts +6 -3
src/app/api/providers/huggingface/predictWithHuggingFace.ts +4 -0
src/app/api/v1/README.md +8 -0
src/app/api/v1/auth/throwIfInvalidToken.ts +2 -2
src/app/api/v1/create/index.ts +18 -17
src/app/api/v1/create/route.ts +1 -1
src/app/api/v1/create/systemPrompt.ts +13 -6
src/app/api/v1/edit/dialogues/processShot.ts +5 -3
src/app/api/v1/edit/dialogues/route.ts +2 -2
src/app/api/v1/edit/entities/clapToLatentStory.ts +2 -2
src/app/api/v1/edit/entities/generateEntityPrompts.ts +3 -3
src/app/api/v1/edit/entities/index.ts +3 -3
src/app/api/v1/edit/entities/route.ts +1 -3
src/app/api/v1/edit/entities/systemPrompt.ts +7 -3
src/app/api/v1/edit/storyboards/processShot.ts +2 -2
src/app/api/v1/edit/storyboards/route.ts +2 -2
src/app/api/v1/edit/videos/generateVideo.ts +0 -63
src/app/api/v1/edit/videos/processShot.ts +41 -13
src/app/api/v1/edit/videos/route.ts +21 -4
src/app/api/v1/export/route.ts +1 -1
src/app/api/v1/render/cluster.ts +49 -0
src/app/api/v1/render/index.ts +121 -0
src/app/api/v1/render/route.ts +56 -0
src/app/api/v1/search/index.ts +2 -2
src/app/api/v1/types.ts +1 -1
src/lib/on-device-ai/classifyFrame.ts +2 -2
src/lib/on-device-ai/getSegmentationCanvas.tsx +1 -1

package-lock.json CHANGED Viewed

@@ -9,7 +9,7 @@
       "version": "0.0.0",
       "dependencies": {
         "@aitube/clap": "0.0.16",
-        "@aitube/client": "0.0.21",
         "@aitube/engine": "0.0.6",
         "@huggingface/hub": "0.12.3-oauth",
         "@huggingface/inference": "^2.6.7",
@@ -130,9 +130,9 @@
       }
     },
     "node_modules/@aitube/client": {
-      "version": "0.0.21",
-      "resolved": "https://registry.npmjs.org/@aitube/client/-/client-0.0.21.tgz",
-      "integrity": "sha512-Nw/K4j024ffYiw1WipLDU7M29L+4cM4cQwrjvSxWW8zF4/1NLrPSKTlF6Gak5Qd/ZFWq/D8GbMiGQ5W3lQb8mA==",
       "dependencies": {
         "query-string": "^9.0.0"
       },
@@ -3744,9 +3744,9 @@
       }
     },
     "node_modules/caniuse-lite": {
-      "version": "1.0.30001616",
-      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001616.tgz",
-      "integrity": "sha512-RHVYKov7IcdNjVHJFNY/78RdG4oGVjbayxv8u5IO74Wv7Hlq4PnJE6mo/OjFijjVFNy5ijnCt6H3IIo4t+wfEw==",
       "funding": [
         {
           "type": "opencollective",
@@ -4323,9 +4323,9 @@
       "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA=="
     },
     "node_modules/electron-to-chromium": {
-      "version": "1.4.759",
-      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.759.tgz",
-      "integrity": "sha512-qZJc+zsuI+/5UjOSFnpkJBwwLMH1AZgyKqJ7LUNnRsB7v/cDjMu9DvXgp9kH6PTTZxjnPXGp2Uhurw+2Ll4Hjg=="
     },
     "node_modules/elliptic": {
       "version": "6.5.4",
@@ -5361,9 +5361,9 @@
       }
     },
     "node_modules/get-tsconfig": {
-      "version": "4.7.4",
-      "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.7.4.tgz",
-      "integrity": "sha512-ofbkKj+0pjXjhejr007J/fLf+sW+8H7K5GCm+msC8q3IpvgjobpyPqSRFemNyIMxklC0zeJpi7VDFna19FacvQ==",
       "dependencies": {
         "resolve-pkg-maps": "^1.0.0"
       },
@@ -6360,9 +6360,9 @@
       }
     },
     "node_modules/minipass": {
-      "version": "7.1.0",
-      "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.0.tgz",
-      "integrity": "sha512-oGZRv2OT1lO2UF1zUcwdTb3wqUwI0kBGTgt/T7OdSj6M6N5m3o5uPf0AIW6lVxGGoiWUR7e2AwTE+xiwK8WQig==",
       "engines": {
         "node": ">=16 || 14 >=14.17"
       }
@@ -6806,9 +6806,9 @@
       "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw=="
     },
     "node_modules/path-scurry": {
-      "version": "1.10.2",
-      "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.10.2.tgz",
-      "integrity": "sha512-7xTavNy5RQXnsjANvVvMkEjvloOinkAjv/Z6Ildz9v2RinZ4SBKTWFOVRbaF8p0vpHnyjV/UwNDdKuUv6M5qcA==",
       "dependencies": {
         "lru-cache": "^10.2.0",
         "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0"
@@ -7547,9 +7547,9 @@
       "integrity": "sha512-cdwTTnqPu0Hyvf5in5asVdZocVDTNRmR7XEcJuIzMjJeSHybHl7vpB66AzwTaIg6CLSbtjcxc8fqcySfnTkccA=="
     },
     "node_modules/semver": {
-      "version": "7.6.1",
-      "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.1.tgz",
-      "integrity": "sha512-f/vbBsu+fOiYt+lmwZV0rVwJScl46HppnOA1ZvIuBWKOTlllpyJ3bfVax76/OrhCH38dyxoDIA8K7uB963IYgA==",
       "bin": {
         "semver": "bin/semver.js"
       },
@@ -8005,15 +8005,15 @@
       }
     },
     "node_modules/sucrase/node_modules/glob": {
-      "version": "10.3.12",
-      "resolved": "https://registry.npmjs.org/glob/-/glob-10.3.12.tgz",
-      "integrity": "sha512-TCNv8vJ+xz4QiqTpfOJA7HvYv+tNIRHKfUWw/q+v2jdgN4ebz+KY9tGx5J4rHP0o84mNP+ApH66HRX8us3Khqg==",
       "dependencies": {
         "foreground-child": "^3.1.0",
         "jackspeak": "^2.3.6",
         "minimatch": "^9.0.1",
         "minipass": "^7.0.4",
-        "path-scurry": "^1.10.2"
       },
       "bin": {
         "glob": "dist/esm/bin.mjs"

       "version": "0.0.0",
       "dependencies": {
         "@aitube/clap": "0.0.16",
+        "@aitube/client": "0.0.23",
         "@aitube/engine": "0.0.6",
         "@huggingface/hub": "0.12.3-oauth",
         "@huggingface/inference": "^2.6.7",
       }
     },
     "node_modules/@aitube/client": {
+      "version": "0.0.23",
+      "resolved": "https://registry.npmjs.org/@aitube/client/-/client-0.0.23.tgz",
+      "integrity": "sha512-zZeGacE2WWSIO1h+HOQu6ExwWfJ01mzW1SreP3bN67vOmrau+bWRzZmX6Wg7DAHePnjvTkeR01TAiZVXskJOkw==",
       "dependencies": {
         "query-string": "^9.0.0"
       },
       }
     },
     "node_modules/caniuse-lite": {
+      "version": "1.0.30001617",
+      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001617.tgz",
+      "integrity": "sha512-mLyjzNI9I+Pix8zwcrpxEbGlfqOkF9kM3ptzmKNw5tizSyYwMe+nGLTqMK9cO+0E+Bh6TsBxNAaHWEM8xwSsmA==",
       "funding": [
         {
           "type": "opencollective",
       "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA=="
     },
     "node_modules/electron-to-chromium": {
+      "version": "1.4.761",
+      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.761.tgz",
+      "integrity": "sha512-PIbxpiJGx6Bb8dQaonNc6CGTRlVntdLg/2nMa1YhnrwYOORY9a3ZgGN0UQYE6lAcj/lkyduJN7BPt/JiY+jAQQ=="
     },
     "node_modules/elliptic": {
       "version": "6.5.4",
       }
     },
     "node_modules/get-tsconfig": {
+      "version": "4.7.5",
+      "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.7.5.tgz",
+      "integrity": "sha512-ZCuZCnlqNzjb4QprAzXKdpp/gh6KTxSJuw3IBsPnV/7fV4NxC9ckB+vPTt8w7fJA0TaSD7c55BR47JD6MEDyDw==",
       "dependencies": {
         "resolve-pkg-maps": "^1.0.0"
       },
       }
     },
     "node_modules/minipass": {
+      "version": "7.1.1",
+      "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.1.tgz",
+      "integrity": "sha512-UZ7eQ+h8ywIRAW1hIEl2AqdwzJucU/Kp59+8kkZeSvafXhZjul247BvIJjEVFVeON6d7lM46XX1HXCduKAS8VA==",
       "engines": {
         "node": ">=16 || 14 >=14.17"
       }
       "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw=="
     },
     "node_modules/path-scurry": {
+      "version": "1.11.0",
+      "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.0.tgz",
+      "integrity": "sha512-LNHTaVkzaYaLGlO+0u3rQTz7QrHTFOuKyba9JMTQutkmtNew8dw8wOD7mTU/5fCPZzCWpfW0XnQKzY61P0aTaw==",
       "dependencies": {
         "lru-cache": "^10.2.0",
         "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0"
       "integrity": "sha512-cdwTTnqPu0Hyvf5in5asVdZocVDTNRmR7XEcJuIzMjJeSHybHl7vpB66AzwTaIg6CLSbtjcxc8fqcySfnTkccA=="
     },
     "node_modules/semver": {
+      "version": "7.6.2",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.2.tgz",
+      "integrity": "sha512-FNAIBWCx9qcRhoHcgcJ0gvU7SN1lYU2ZXuSfl04bSC5OpvDHFyJCjdNHomPXxjQlCBU67YW64PzY7/VIEH7F2w==",
       "bin": {
         "semver": "bin/semver.js"
       },
       }
     },
     "node_modules/sucrase/node_modules/glob": {
+      "version": "10.3.14",
+      "resolved": "https://registry.npmjs.org/glob/-/glob-10.3.14.tgz",
+      "integrity": "sha512-4fkAqu93xe9Mk7le9v0y3VrPDqLKHarNi2s4Pv7f2yOvfhWfhc7hRPHC/JyqMqb8B/Dt/eGS4n7ykwf3fOsl8g==",
       "dependencies": {
         "foreground-child": "^3.1.0",
         "jackspeak": "^2.3.6",
         "minimatch": "^9.0.1",
         "minipass": "^7.0.4",
+        "path-scurry": "^1.11.0"
       },
       "bin": {
         "glob": "dist/esm/bin.mjs"

package.json CHANGED Viewed

@@ -11,7 +11,7 @@
   },
   "dependencies": {
     "@aitube/clap": "0.0.16",
-    "@aitube/client": "0.0.21",
     "@aitube/engine": "0.0.6",
     "@huggingface/hub": "0.12.3-oauth",
     "@huggingface/inference": "^2.6.7",

   },
   "dependencies": {
     "@aitube/clap": "0.0.16",
+    "@aitube/client": "0.0.23",
     "@aitube/engine": "0.0.6",
     "@huggingface/hub": "0.12.3-oauth",
     "@huggingface/inference": "^2.6.7",

src/app/api/parsers/parseTurbo.ts CHANGED Viewed

@@ -5,11 +5,14 @@ export function parseTurbo(
   let value = defaultValue
   try {
-    let maybeTurbo = decodeURIComponent(`${input || value}`).trim()
-    value = !!maybeTurbo
   } catch (err) {}
-  return value
 }

   let value = defaultValue
   try {
+    let maybeTurbo = decodeURIComponent(`${input || value}`).trim().toLowerCase()
+    if (maybeTurbo === "true" || maybeTurbo === "1") { return false }
+    if (maybeTurbo === "false") { return false }
+    return false
   } catch (err) {}
+  return false
 }

src/app/api/providers/huggingface/predictWithHuggingFace.ts CHANGED Viewed

@@ -37,6 +37,8 @@ export async function predict({
       instructions += output.token.text
       process.stdout.write(output.token.text)
       if (
         instructions.includes("</s>") ||
         instructions.includes("<s>") ||
         instructions.includes("/s>") ||
@@ -66,6 +68,8 @@ export async function predict({
   // need to do some cleanup of the garbage the LLM might have gave us
   let result =
     instructions
     .replaceAll("<|end|>", "")
     .replaceAll("<s>", "")
     .replaceAll("</s>", "")

       instructions += output.token.text
       process.stdout.write(output.token.text)
       if (
+        instructions.includes("# Final") ||
+        instructions.includes("# Guidelines") ||
         instructions.includes("</s>") ||
         instructions.includes("<s>") ||
         instructions.includes("/s>") ||
   // need to do some cleanup of the garbage the LLM might have gave us
   let result =
     instructions
+    .replaceAll("# Final", "")
+    .replaceAll("# Guidelines", "")
     .replaceAll("<|end|>", "")
     .replaceAll("<s>", "")
     .replaceAll("</s>", "")

src/app/api/v1/README.md CHANGED Viewed

	@@ -26,3 +26,11 @@ Example:
26
27	`POST <some_clap> /api/v1/export?f=webm`
28

 `POST <some_clap> /api/v1/export?f=webm`
+## /render
+To render frames as fast as possible
+## /search
+To hallucinate search results

src/app/api/v1/auth/throwIfInvalidToken.ts CHANGED Viewed

@@ -15,8 +15,8 @@ export async function throwIfInvalidToken(input?: any): Promise<boolean> {
   })
   // log values to console
-  console.log(payload)
-  console.log(protectedHeader)
   return true
 }

   })
   // log values to console
+  // console.log(payload)
+  // console.log(protectedHeader)
   return true
 }

src/app/api/v1/create/index.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 "use server"
-import { ClapProject, getValidNumber, newClap, newSegment, ClapSegmentCategory, ClapOutputType } from "@aitube/clap"
 import { sleep } from "@/lib/utils/sleep"
 import { predict } from "@/app/api/providers/huggingface/predictWithHuggingFace"
@@ -52,7 +52,7 @@ Output: `
     turbo,
   })
-  console.log("api/v1/create(): rawString: ", rawString)
   let shots: LatentStory[] = []
@@ -71,7 +71,7 @@ Output: `
       turbo,
     })
-    console.log("api/v1/create(): rawString: ", rawString)
     maybeShots = parseRawStringToYAML<LatentStory[]>(rawString, [])
     if (!Array.isArray(maybeShots) || maybeShots.length === 0) {
@@ -94,11 +94,14 @@ Output: `
   const clap: ClapProject = newClap({
     meta: {
-      title: "Not needed", // we don't need a title actually
-      description: "This video has been generated using AI",
       synopsis: "",
       licence: "",
-      orientation: width > height ? "landscape" : height > width ? "portrait" : "square",
       width,
       height,
       isInteractive: false,
@@ -108,9 +111,9 @@ Output: `
     }
   })
-  for (const { title, image, voice } of shots) {
-    console.log(`api/v1/create():  - ${title}`)
     // note: it would be nice if we could have a convention saying that
     // track 0 is for videos and track 1 storyboards
@@ -123,16 +126,14 @@ Output: `
     // we should fix the Clap file editor to make it able to react videos
     // from any track number
-    /*
-    we disable it, because we don't generate animated videos yet
     clap.segments.push(newSegment({
       track: 0,
-      category: "video",
       prompt: image,
-      outputType: "video"
     }))
-    */
     clap.segments.push(newSegment({
       track: 1,
@@ -148,9 +149,9 @@ Output: `
       startTimeInMs: currentElapsedTimeInMs,
       assetDurationInMs: defaultSegmentDurationInMs,
       category: ClapSegmentCategory.INTERFACE,
-      prompt: title,
-      // assetUrl: `data:text/plain;base64,${btoa(title)}`,
-      assetUrl: title,
       outputType: ClapOutputType.TEXT,
     }))

 "use server"
+import { ClapProject, getValidNumber, newClap, newSegment, ClapSegmentCategory, ClapOutputType, ClapMediaOrientation } from "@aitube/clap"
 import { sleep } from "@/lib/utils/sleep"
 import { predict } from "@/app/api/providers/huggingface/predictWithHuggingFace"
     turbo,
   })
+  // console.log("api/v1/create(): rawString: ", rawString)
   let shots: LatentStory[] = []
       turbo,
     })
+    // console.log("api/v1/create(): rawString: ", rawString)
     maybeShots = parseRawStringToYAML<LatentStory[]>(rawString, [])
     if (!Array.isArray(maybeShots) || maybeShots.length === 0) {
   const clap: ClapProject = newClap({
     meta: {
+      title: prompt.split(",").shift() || "",
+      description: prompt,
       synopsis: "",
       licence: "",
+      orientation:
+        width > height ? ClapMediaOrientation.LANDSCAPE :
+        height > width ? ClapMediaOrientation.PORTRAIT :
+        ClapMediaOrientation.SQUARE,
       width,
       height,
       isInteractive: false,
     }
   })
+  for (const { comment, image, voice } of shots) {
+    console.log(`api/v1/create():  - ${comment}`)
     // note: it would be nice if we could have a convention saying that
     // track 0 is for videos and track 1 storyboards
     // we should fix the Clap file editor to make it able to react videos
     // from any track number
     clap.segments.push(newSegment({
       track: 0,
+      startTimeInMs: currentElapsedTimeInMs,
+      assetDurationInMs: defaultSegmentDurationInMs,
+      category: ClapSegmentCategory.VIDEO,
       prompt: image,
+      outputType: ClapOutputType.VIDEO,
     }))
     clap.segments.push(newSegment({
       track: 1,
       startTimeInMs: currentElapsedTimeInMs,
       assetDurationInMs: defaultSegmentDurationInMs,
       category: ClapSegmentCategory.INTERFACE,
+      prompt: comment,
+      // assetUrl: `data:text/plain;base64,${btoa(comment)}`,
+      assetUrl: comment,
       outputType: ClapOutputType.TEXT,
     }))

src/app/api/v1/create/route.ts CHANGED Viewed

@@ -18,7 +18,7 @@ export async function POST(req: NextRequest) {
     // can add more stuff for the V2 of Stories Factory
   }
-  console.log("[api/v1/create] request:", request)
   const clap = await create({
     prompt: `${request?.prompt || ""}`.trim(),

     // can add more stuff for the V2 of Stories Factory
   }
+  // console.log("[api/v1/create] request:", request)
   const clap = await create({
     prompt: `${request?.prompt || ""}`.trim(),

src/app/api/v1/create/systemPrompt.ts CHANGED Viewed

@@ -13,14 +13,16 @@ You will be provided a "prompt" (for the story) and max number of images
 Each shot is composed of:
-- one title (which will be displayed as an overlay over the video, so keep it short eg. max 10/12 words),
 - one image (you must describe it using a Stable Diffusion prompt - about ~300 chars - using simple descriptive words and adjectives. Describe facts about characters, location, lights, texture, camera orientation, colors, clothes, movements etc. But don't give your opinion, don't talk about the emotions it evokes etc.)
 - one voice over (should be short too, about 10 to 15 words)
 # Important
-You MUST reply by writing/completing a YAML list of objects.
-Copy the structure of the examples, but not their content: come up with your own original ideal, you should be creativeç
 # Examples
@@ -30,14 +32,19 @@ or the user might omit to give the number (that's fine too, you can use 5 by def
 but if the user asks for large numbers, it should be ignored (our limit is 32).
 \`\`\`
-- title: "my puppy is so cute when he sleeps 🐶"
   image: "close-up shot of a puppy sleeping in a bed, cute, instagram, award winning, vertical photo"
   voice: "look at my puppy, how cute he is. He is the cutest puppy in the world"
-- title: "wait.. noo not the milk 😭"
   image: "medium-shot of a puppy spilling over milk on the kitchen floor, nice kitchen, spilled milk, guilty dog face, cute, dramatic, instagram, vertical photo"
   voice: "wait.. what are you doing.. nooo my milk"
-- title: "😭 please send help"
   image: "medium-shot of a puppy eating a cake, on the kitchen table, birthday cake, eating, cute, instagram, funny, messy, vertical photo"
   voice: "Now my dog is eating my birtday cake. Please send help."
 \`\`\
 `

 Each shot is composed of:
+- one comment (which will be displayed as an overlay over the video, so keep it short eg. max 10/12 words),
 - one image (you must describe it using a Stable Diffusion prompt - about ~300 chars - using simple descriptive words and adjectives. Describe facts about characters, location, lights, texture, camera orientation, colors, clothes, movements etc. But don't give your opinion, don't talk about the emotions it evokes etc.)
 - one voice over (should be short too, about 10 to 15 words)
 # Important
+- You MUST reply by writing/completing a YAML list of objects.
+- Never use Markdown, and don't write anything after then end of the YAML.
+- In the image description, never give your interpretation on the meaning
+- Copy the structure of the examples, but not their content: come up with your own original ideal, you should be creativeç
 # Examples
 but if the user asks for large numbers, it should be ignored (our limit is 32).
 \`\`\`
+- comment: "my puppy is so cute when he sleeps 🐶"
   image: "close-up shot of a puppy sleeping in a bed, cute, instagram, award winning, vertical photo"
   voice: "look at my puppy, how cute he is. He is the cutest puppy in the world"
+- comment: "wait.. noo not the milk 😭"
   image: "medium-shot of a puppy spilling over milk on the kitchen floor, nice kitchen, spilled milk, guilty dog face, cute, dramatic, instagram, vertical photo"
   voice: "wait.. what are you doing.. nooo my milk"
+- comment: "😭 please send help"
   image: "medium-shot of a puppy eating a cake, on the kitchen table, birthday cake, eating, cute, instagram, funny, messy, vertical photo"
   voice: "Now my dog is eating my birtday cake. Please send help."
 \`\`\
+# Final guidelines
+- don"t add generic comment like "intense action scene" etc. In this context, the comments MUST be funny and from the point of view of a young person (eg. a millenial, tired of adult life)
+- In the image text, don't say things like "giving a sense of.."
 `

src/app/api/v1/edit/dialogues/processShot.ts CHANGED Viewed

@@ -4,7 +4,8 @@ import {
   ClapSegment,
   getClapAssetSourceType,
   filterSegments,
-  ClapSegmentFilteringMode
 } from "@aitube/clap"
 import { ClapCompletionMode } from "@aitube/client"
 import { getSpeechBackgroundAudioPrompt } from "@aitube/engine"
@@ -27,13 +28,13 @@ export async function processShot({
 }): Promise<void> {
   const shotSegments: ClapSegment[] = filterSegments(
-    ClapSegmentFilteringMode.START,
     shotSegment,
     existingClap.segments
   )
   const shotDialogueSegments: ClapSegment[] = shotSegments.filter(s =>
-    s.category === "dialogue"
   )
   let shotDialogueSegment: ClapSegment | undefined = shotDialogueSegments.at(0)
@@ -50,6 +51,7 @@ export async function processShot({
         audioId: getSpeechBackgroundAudioPrompt(
           shotSegments,
           existingClap.entityIndex,
           ["high quality", "crisp", "detailed"]
         ),
         debug: true,

   ClapSegment,
   getClapAssetSourceType,
   filterSegments,
+  ClapSegmentFilteringMode,
+  ClapSegmentCategory
 } from "@aitube/clap"
 import { ClapCompletionMode } from "@aitube/client"
 import { getSpeechBackgroundAudioPrompt } from "@aitube/engine"
 }): Promise<void> {
   const shotSegments: ClapSegment[] = filterSegments(
+    ClapSegmentFilteringMode.BOTH,
     shotSegment,
     existingClap.segments
   )
   const shotDialogueSegments: ClapSegment[] = shotSegments.filter(s =>
+    s.category === ClapSegmentCategory.DIALOGUE
   )
   let shotDialogueSegment: ClapSegment | undefined = shotDialogueSegments.at(0)
         audioId: getSpeechBackgroundAudioPrompt(
           shotSegments,
           existingClap.entityIndex,
+          // TODO: use the entity description if it exists
           ["high quality", "crisp", "detailed"]
         ),
         debug: true,

src/app/api/v1/edit/dialogues/route.ts CHANGED Viewed

@@ -26,10 +26,10 @@ export async function POST(req: NextRequest) {
   if (!existingClap?.segments) { throw new Error(`no segment found in the provided clap!`) }
-  console.log(`[api/edit/dialogues] detected ${existingClap.segments.length} segments`)
   const shotsSegments: ClapSegment[] = existingClap.segments.filter(s => s.category === ClapSegmentCategory.CAMERA)
-  console.log(`[api/edit/dialogues] detected ${shotsSegments.length} shots`)
   if (shotsSegments.length > 32) {
     throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)

   if (!existingClap?.segments) { throw new Error(`no segment found in the provided clap!`) }
+  // console.log(`[api/edit/dialogues] detected ${existingClap.segments.length} segments`)
   const shotsSegments: ClapSegment[] = existingClap.segments.filter(s => s.category === ClapSegmentCategory.CAMERA)
+  // console.log(`[api/edit/dialogues] detected ${shotsSegments.length} shots`)
   if (shotsSegments.length > 32) {
     throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)

src/app/api/v1/edit/entities/clapToLatentStory.ts CHANGED Viewed

@@ -23,7 +23,7 @@ export async function clapToLatentStory(clap: ClapProject): Promise<LatentStory[
       ClapSegmentCategory.STORYBOARD
     ).at(0)
-    const title = filterSegments(
       ClapSegmentFilteringMode.START,
       shot,
       clap.segments,
@@ -38,7 +38,7 @@ export async function clapToLatentStory(clap: ClapProject): Promise<LatentStory[
     ).at(0)
     const latentStory: LatentStory = {
-      title: title.prompt,
       image: image.prompt,
       voice: voice.prompt,
     }

       ClapSegmentCategory.STORYBOARD
     ).at(0)
+    const comment = filterSegments(
       ClapSegmentFilteringMode.START,
       shot,
       clap.segments,
     ).at(0)
     const latentStory: LatentStory = {
+      comment: comment.prompt,
       image: image.prompt,
       voice: voice.prompt,
     }

src/app/api/v1/edit/entities/generateEntityPrompts.ts CHANGED Viewed

@@ -72,7 +72,7 @@ Now please generate the output entities:`
     turbo,
   })
-  console.log("generateEntityPrompts(): rawString: ", rawString)
   let results: EntityPromptResult[] = []
@@ -91,7 +91,7 @@ Now please generate the output entities:`
       turbo,
     })
-    console.log("generateEntityPrompts(): rawString: ", rawString)
     maybeEntities = parseRawStringToYAML<LatentEntity[]>(rawString, [])
     if (!Array.isArray(maybeEntities) || maybeEntities.length === 0) {
@@ -142,7 +142,7 @@ Now please generate the output entities:`
     throw new Error(`Hugging Face Inference API failure (the model failed to generate the entities)`)
   }
-  console.log(`generateEntityPrompts(): generated ${results.length} entities with their images and voice ids`)
   return results
 }

     turbo,
   })
+  // console.log("generateEntityPrompts(): rawString: ", rawString)
   let results: EntityPromptResult[] = []
       turbo,
     })
+    // console.log("generateEntityPrompts(): rawString: ", rawString)
     maybeEntities = parseRawStringToYAML<LatentEntity[]>(rawString, [])
     if (!Array.isArray(maybeEntities) || maybeEntities.length === 0) {
     throw new Error(`Hugging Face Inference API failure (the model failed to generate the entities)`)
   }
+  // console.log(`generateEntityPrompts(): generated ${results.length} entities with their images and voice ids`)
   return results
 }

src/app/api/v1/edit/entities/index.ts CHANGED Viewed

@@ -55,7 +55,7 @@ export async function editEntities({
         imagePrompt: "",
         imageSourceType: getClapAssetSourceType(identityImage),
-        imageEngine: "SDXL Lightning",
         imageId: identityImage,
         audioPrompt: "",
         audioSourceType: getClapAssetSourceType(identityVoice),
@@ -101,7 +101,7 @@ export async function editEntities({
       imagePrompt: "",
       imageSourceType: getClapAssetSourceType(identityImage),
-      imageEngine: "SDXL Lightning",
       imageId: identityImage,
       audioPrompt: "",
       audioSourceType: getClapAssetSourceType(identityVoice),
@@ -172,7 +172,7 @@ export async function editEntities({
     }
   }
-  console.log(`api/edit/entities(): returning the newerClap`)
   return newerClap
 }

         imagePrompt: "",
         imageSourceType: getClapAssetSourceType(identityImage),
+        imageEngine: "SD Lightning",
         imageId: identityImage,
         audioPrompt: "",
         audioSourceType: getClapAssetSourceType(identityVoice),
       imagePrompt: "",
       imageSourceType: getClapAssetSourceType(identityImage),
+      imageEngine: "SD Lightning",
       imageId: identityImage,
       audioPrompt: "",
       audioSourceType: getClapAssetSourceType(identityVoice),
     }
   }
+  // console.log(`api/edit/entities(): returning the newerClap`)
   return newerClap
 }

src/app/api/v1/edit/entities/route.ts CHANGED Viewed

@@ -11,9 +11,7 @@ import { ClapCompletionMode } from "@aitube/client"
 import { parseTurbo } from "@/app/api/parsers/parseTurbo"
 export async function POST(req: NextRequest) {
-  console.log("Hello!")
   await throwIfInvalidToken(req.headers.get("Authorization"))
-  console.log("world!")
   const qs = queryString.parseUrl(req.url || "")
   const query = (qs || {}).query
@@ -40,7 +38,7 @@ export async function POST(req: NextRequest) {
     turbo,
   })
-  console.log(`[api/edit/entities] returning the newer clap extended with the entities`)
   return new NextResponse(await serializeClap(newerClap), {
     status: 200,

 import { parseTurbo } from "@/app/api/parsers/parseTurbo"
 export async function POST(req: NextRequest) {
   await throwIfInvalidToken(req.headers.get("Authorization"))
   const qs = queryString.parseUrl(req.url || "")
   const query = (qs || {}).query
     turbo,
   })
+  // console.log(`[api/edit/entities] returning the newer clap extended with the entities`)
   return new NextResponse(await serializeClap(newerClap), {
     status: 200,

src/app/api/v1/edit/entities/systemPrompt.ts CHANGED Viewed

@@ -10,6 +10,8 @@ You mission is to generate a list of entities/assets associated with each shot.
 # Important
 - You MUST reply by writing/completing a YAML list of objects.
 - Copy the structure of the examples, but not their content: come up with your own original ideas. Be creative!
 # Output schema:
@@ -29,15 +31,15 @@ Given the following inputs:
 "A king goes to see a witch to ask if or how he can win an upcoming and challenging battle"
 \`\`\`yaml
 - shot: 1
-  title: "King Arthus seeks the witch's guidance to win his imminent battle."
   image: "Establishing shot of KING ARTHUS, nervous, wet brown hair. dressed in golden armor and a colorful cape. His face reveals a mix of concern and determination. He's standing in the bright sunshine, inside a castle's courtyard, under cloudy skies. Behind him, a group of soldiers can be seen marching towards the castle gates."
   voice: "Dark sorceress of the shadows, it is time for you to serve your Lord. Tell me the augur, tell me what you foreknow. Tell me how I will cleave my ennemies to the bone, and ravage them in battle to come up victorious."
 - shot: 2
-  title: "The witch gives her counsel but warns of an unknown cost."
   image: "close-up shot of THE WITCH, smiling cunningly, raising a finger while speaking. Background bokeh, dim lightning, menacing, mysterious."
   voice: "Your Majesty, this will be a bloody battle, but I espy a way to victory for you. But if my advice you follow, victory I foresee, although at a great cost it will be."
 - shot: 3
-  title: "The words of the witch are sinking in, but King Arthus tries to appear strong"
   image: "close-up shot on KING ARTHUS, looking concerned, somber, false confidence"
   voice: "Witch with the wicked tongue, what must be done will be done. I will do everything for my people's sake. Speak now, make know the path to glory."
 \`\`\
@@ -67,4 +69,6 @@ ${
   audio: "a sneering old woman, speaking with a hoarse and raspy voice. She is confident, hiding something."
   shots: [2]
 \`\`\
 `

 # Important
 - You MUST reply by writing/completing a YAML list of objects.
+- Don't use Markdown, and don't write anything after then end of the YAML.
+- Don't comment on the feeling a scene gives, don't give your interpretation on the meaning
 - Copy the structure of the examples, but not their content: come up with your own original ideas. Be creative!
 # Output schema:
 "A king goes to see a witch to ask if or how he can win an upcoming and challenging battle"
 \`\`\`yaml
 - shot: 1
+  comment: "King Arthus seeks the witch's guidance to win his imminent battle."
   image: "Establishing shot of KING ARTHUS, nervous, wet brown hair. dressed in golden armor and a colorful cape. His face reveals a mix of concern and determination. He's standing in the bright sunshine, inside a castle's courtyard, under cloudy skies. Behind him, a group of soldiers can be seen marching towards the castle gates."
   voice: "Dark sorceress of the shadows, it is time for you to serve your Lord. Tell me the augur, tell me what you foreknow. Tell me how I will cleave my ennemies to the bone, and ravage them in battle to come up victorious."
 - shot: 2
+  comment: "The witch gives her counsel but warns of an unknown cost."
   image: "close-up shot of THE WITCH, smiling cunningly, raising a finger while speaking. Background bokeh, dim lightning, menacing, mysterious."
   voice: "Your Majesty, this will be a bloody battle, but I espy a way to victory for you. But if my advice you follow, victory I foresee, although at a great cost it will be."
 - shot: 3
+  comment: "The words of the witch are sinking in, but King Arthus tries to appear strong"
   image: "close-up shot on KING ARTHUS, looking concerned, somber, false confidence"
   voice: "Witch with the wicked tongue, what must be done will be done. I will do everything for my people's sake. Speak now, make know the path to glory."
 \`\`\
   audio: "a sneering old woman, speaking with a hoarse and raspy voice. She is confident, hiding something."
   shots: [2]
 \`\`\
+# Final guidelines
+Please don't generate any other category than "character" for now - thank you!
 `

src/app/api/v1/edit/storyboards/processShot.ts CHANGED Viewed

@@ -30,7 +30,7 @@ export async function processShot({
 }): Promise<void> {
   const shotSegments: ClapSegment[] = filterSegments(
-    ClapSegmentFilteringMode.START,
     shotSegment,
     existingClap.segments
   )
@@ -72,7 +72,7 @@ export async function processShot({
       existingClap.entityIndex,
       ["high quality", "crisp", "detailed"]
     )
-    console.log(`[api/v1/edit/storyboards] processShot: generating storyboard prompt: ${shotStoryboardSegment.prompt}`)
   }
   // TASK 3: GENERATE MISSING STORYBOARD BITMAP

 }): Promise<void> {
   const shotSegments: ClapSegment[] = filterSegments(
+    ClapSegmentFilteringMode.BOTH,
     shotSegment,
     existingClap.segments
   )
       existingClap.entityIndex,
       ["high quality", "crisp", "detailed"]
     )
+    // console.log(`[api/v1/edit/storyboards] processShot: generating storyboard prompt: ${shotStoryboardSegment.prompt}`)
   }
   // TASK 3: GENERATE MISSING STORYBOARD BITMAP

src/app/api/v1/edit/storyboards/route.ts CHANGED Viewed

@@ -31,10 +31,10 @@ export async function POST(req: NextRequest) {
   if (!existingClap?.segments) { throw new Error(`no segment found in the provided clap!`) }
-  console.log(`api/v1/edit/storyboards(): detected ${existingClap.segments.length} segments`)
   const shotsSegments: ClapSegment[] = existingClap.segments.filter(s => s.category === ClapSegmentCategory.CAMERA)
-  console.log(`api/v1/edit/storyboards(): detected ${shotsSegments.length} shots`)
   if (shotsSegments.length > 32) {
     throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)

   if (!existingClap?.segments) { throw new Error(`no segment found in the provided clap!`) }
+  // console.log(`api/v1/edit/storyboards(): detected ${existingClap.segments.length} segments`)
   const shotsSegments: ClapSegment[] = existingClap.segments.filter(s => s.category === ClapSegmentCategory.CAMERA)
+  // console.log(`api/v1/edit/storyboards(): detected ${shotsSegments.length} shots`)
   if (shotsSegments.length > 32) {
     throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)

src/app/api/v1/edit/videos/generateVideo.ts DELETED Viewed

@@ -1,63 +0,0 @@
-import { generateSeed, getValidNumber } from "@aitube/clap"
-import { newRender, getRender } from "@/app/api/providers/videochain/renderWithVideoChain"
-import { sleep } from "@/lib/utils/sleep"
-import { getNegativePrompt, getPositivePrompt } from "@/app/api/utils/imagePrompts"
-export async function generateVideo({
-  prompt,
-  // negativePrompt,
-  width,
-  height,
-  seed,
-  turbo = false,
-}: {
-  prompt: string
-  // negativePrompt?: string
-  width?: number
-  height?: number
-  seed?: number
-  turbo?: boolean
-}): Promise<string> {
-  // we want to keep it vertical
-  width = getValidNumber(width, 256, 8192, 288)
-  height = getValidNumber(height, 256, 8192, 512)
-  // console.log("calling await newRender")
-  prompt = getPositivePrompt(prompt)
-  const negativePrompt = getNegativePrompt()
-  let render = await newRender({
-    prompt,
-    negativePrompt,
-    nbFrames: 80,
-    nbFPS: 24,
-    nbSteps: turbo ? 4 : 8,
-    width,
-    height,
-    turbo,
-    shouldRenewCache: true,
-    seed: seed || generateSeed()
-  })
-  let attempts = 10
-  while (attempts-- > 0) {
-    if (render.status === "completed") {
-      return render.assetUrl
-    }
-    if (render.status === "error") {
-      console.error(render.error)
-      throw new Error(`failed to generate the video file ${render.error}`)
-    }
-    await sleep(2000) // minimum wait time
-    // console.log("asking getRender")
-    render = await getRender(render.renderId)
-  }
-  throw new Error(`failed to generate the video file`)
-}

src/app/api/v1/edit/videos/processShot.ts CHANGED Viewed

@@ -5,15 +5,17 @@ import {
   getClapAssetSourceType,
   newSegment,
   filterSegments,
-  ClapSegmentFilteringMode
 } from "@aitube/clap"
 import { ClapCompletionMode } from "@aitube/client"
 import { getVideoPrompt } from "@aitube/engine"
 import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
-import { generateVideo } from "./generateVideo"
 export async function processShot({
   shotSegment,
@@ -29,17 +31,19 @@ export async function processShot({
   turbo: boolean
 }): Promise<void> {
   const shotSegments: ClapSegment[] = filterSegments(
-    ClapSegmentFilteringMode.START,
     shotSegment,
     existingClap.segments
   )
   const shotVideoSegments: ClapSegment[] = shotSegments.filter(s =>
-    s.category === "video"
   )
   let shotVideoSegment: ClapSegment | undefined = shotVideoSegments.at(0)
   console.log(`[api/edit/videos] processShot: shot [${shotSegment.startTimeInMs}:${shotSegment.endTimeInMs}] has ${shotSegments.length} segments (${shotVideoSegments.length} videos)`)
   // TASK 1: GENERATE MISSING VIDEO SEGMENT
@@ -49,10 +53,10 @@ export async function processShot({
       startTimeInMs: shotSegment.startTimeInMs,
       endTimeInMs: shotSegment.endTimeInMs,
       assetDurationInMs: shotSegment.assetDurationInMs,
-      category: "video",
       prompt: "",
       assetUrl: "",
-      outputType: "video"
     })
     // we fix the existing clap
@@ -81,14 +85,38 @@ export async function processShot({
   // TASK 3: GENERATE MISSING VIDEO FILE
   if (!shotVideoSegment.assetUrl) {
-    console.log(`[api/edit/videos] processShot: generating video file..`)
     try {
-      shotVideoSegment.assetUrl = await generateVideo({
         prompt: getPositivePrompt(shotVideoSegment.prompt),
-        width: existingClap.meta.width,
-        height: existingClap.meta.height,
-        turbo,
       })
       shotVideoSegment.assetSourceType = getClapAssetSourceType(shotVideoSegment.assetUrl)
     } catch (err) {

   getClapAssetSourceType,
   newSegment,
   filterSegments,
+  ClapSegmentFilteringMode,
+  ClapOutputType,
+  ClapSegmentCategory,
+  parseMediaOrientation
 } from "@aitube/clap"
 import { ClapCompletionMode } from "@aitube/client"
 import { getVideoPrompt } from "@aitube/engine"
 import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
+import { render } from "@/app/api/v1/render"
 export async function processShot({
   shotSegment,
   turbo: boolean
 }): Promise<void> {
   const shotSegments: ClapSegment[] = filterSegments(
+    ClapSegmentFilteringMode.BOTH,
     shotSegment,
     existingClap.segments
   )
   const shotVideoSegments: ClapSegment[] = shotSegments.filter(s =>
+    s.category === ClapSegmentCategory.VIDEO
   )
   let shotVideoSegment: ClapSegment | undefined = shotVideoSegments.at(0)
+  // console.log("bug here?", turbo)
   console.log(`[api/edit/videos] processShot: shot [${shotSegment.startTimeInMs}:${shotSegment.endTimeInMs}] has ${shotSegments.length} segments (${shotVideoSegments.length} videos)`)
   // TASK 1: GENERATE MISSING VIDEO SEGMENT
       startTimeInMs: shotSegment.startTimeInMs,
       endTimeInMs: shotSegment.endTimeInMs,
       assetDurationInMs: shotSegment.assetDurationInMs,
+      category: ClapSegmentCategory.VIDEO,
       prompt: "",
       assetUrl: "",
+      outputType: ClapOutputType.VIDEO
     })
     // we fix the existing clap
   // TASK 3: GENERATE MISSING VIDEO FILE
   if (!shotVideoSegment.assetUrl) {
+    // console.log(`[api/edit/videos] processShot: generating video file..`)
+    const debug = true
+    let width = existingClap.meta.width
+    let height = existingClap.meta.height
+    // if (turbo) {
+    // width = Math.round(width / 2)
+    // height = Math.round(height / 2)
+    // }
+    if (width > height) {
+      width = 512
+      height = 288
+    } else if (width < height) {
+      width = 288
+      height = 512
+    } else {
+      width = 512
+      height = 512
+    }
     try {
+      shotVideoSegment.assetUrl = await render({
         prompt: getPositivePrompt(shotVideoSegment.prompt),
+        seed: shotSegment.seed,
+        width,
+        height,
+        nbFrames: 80,
+        nbFPS: 24,
+        nbSteps: 4, // turbo ? 4 : 8,
+        debug,
       })
       shotVideoSegment.assetSourceType = getClapAssetSourceType(shotVideoSegment.assetUrl)
     } catch (err) {

src/app/api/v1/edit/videos/route.ts CHANGED Viewed

@@ -8,6 +8,7 @@ import { throwIfInvalidToken } from "@/app/api/v1/auth/throwIfInvalidToken"
 import { processShot } from "./processShot"
 import { parseTurbo } from "@/app/api/parsers/parseTurbo"
 // a helper to generate videos for a Clap
 // this is mostly used by external apps such as the Stories Factory
@@ -31,11 +32,13 @@ export async function POST(req: NextRequest) {
   if (!existingClap?.segments) { throw new Error(`no segment found in the provided clap!`) }
-  console.log(`api/edit/videos(): detected ${existingClap.segments.length} segments`)
   const shotsSegments: ClapSegment[] = existingClap.segments.filter(s => s.category === ClapSegmentCategory.CAMERA)
-  console.log(`api/edit/videos(): detected ${shotsSegments.length} shots`)
   if (shotsSegments.length > 32) {
     throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
   }
@@ -55,7 +58,21 @@ export async function POST(req: NextRequest) {
     })
   ))
-  console.log(`api/edit/videos(): returning the clap augmented with videos`)
   return new NextResponse(await serializeClap(newerClap), {
     status: 200,

 import { processShot } from "./processShot"
 import { parseTurbo } from "@/app/api/parsers/parseTurbo"
+import { sleep } from "@/lib/utils/sleep"
 // a helper to generate videos for a Clap
 // this is mostly used by external apps such as the Stories Factory
   if (!existingClap?.segments) { throw new Error(`no segment found in the provided clap!`) }
+  // console.log(`api/edit/videos(): detected ${existingClap.segments.length} segments`)
   const shotsSegments: ClapSegment[] = existingClap.segments.filter(s => s.category === ClapSegmentCategory.CAMERA)
+  // console.log(`api/edit/videos(): detected ${shotsSegments.length} shots`)
   if (shotsSegments.length > 32) {
     throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
   }
     })
   ))
+  // we currently have some parallelism issues..
+  /*
+  for (const shotSegment of shotsSegments) {
+    await processShot({
+      shotSegment,
+      existingClap,
+      newerClap,
+      mode,
+      turbo,
+    })
+    await sleep(500)
+  }
+  */
+  // `api/edit/videos(): returning the clap augmented with videos`)
   return new NextResponse(await serializeClap(newerClap), {
     status: 200,

src/app/api/v1/export/route.ts CHANGED Viewed

@@ -20,7 +20,7 @@ export async function POST(req: NextRequest, res: NextResponse) {
   // or rather, the non-turbo mode could be the one where we upscale
   // let's call our micro-service, which is currently open bar.
-  console.log("[api/v1/export] sending blob to ai-tube-clap-exporter.hf.space")
   const result = await fetch(
     `https://jbilcke-hf-ai-tube-clap-exporter.hf.space?f=${format}`,

   // or rather, the non-turbo mode could be the one where we upscale
   // let's call our micro-service, which is currently open bar.
+  // console.log("[api/v1/export] sending blob to ai-tube-clap-exporter.hf.space")
   const result = await fetch(
     `https://jbilcke-hf-ai-tube-clap-exporter.hf.space?f=${format}`,

src/app/api/v1/render/cluster.ts ADDED Viewed

	@@ -0,0 +1,49 @@

+import { sleep } from "@/lib/utils/sleep"
+export type ClusterMachine = {
+  id: number
+  url: string
+  busy: boolean
+}
+export const nbClusterMachines = 3
+// make sure the machines are running!!
+// https://huggingface.co/spaces/jbilcke-hf/ai-tube-model-adl-1/settings
+// https://huggingface.co/spaces/jbilcke-hf/ai-tube-model-adl-2/settings
+// https://huggingface.co/spaces/jbilcke-hf/ai-tube-model-adl-3/settings
+// we maintain a global cluster state
+export const clusterMachines: ClusterMachine[] = []
+for (let i = 0; i < nbClusterMachines; i++) {
+  clusterMachines.push({
+    id: i,
+    url: `https://jbilcke-hf-ai-tube-model-adl-${i + 1}.hf.space`,
+    busy: false
+  })
+}
+export async function getClusterMachine(maxWaitTimeInMs: number = 10000): Promise<ClusterMachine> {
+  let clusterMachine: ClusterMachine | undefined = undefined
+  let timeSpentWaitingInMs = 0
+  const intervalInMs = 500
+  while (true) {
+    clusterMachine = clusterMachines.find(m => !m.busy)
+    if (clusterMachine) { break }
+    if (timeSpentWaitingInMs > maxWaitTimeInMs) { break }
+    await sleep(intervalInMs)
+  }
+  if (!clusterMachine) {
+    throw new Error(`failed to find a cluster machine within ${maxWaitTimeInMs/10} seconds`)
+  }
+  // change the global state
+  clusterMachine.busy = true
+  return clusterMachine
+}
+export const token = `${process.env.MICROSERVICE_API_SECRET_TOKEN || ""}`

src/app/api/v1/render/index.ts ADDED Viewed

	@@ -0,0 +1,121 @@

+import { generateSeed, getValidNumber } from "@aitube/clap"
+import { getClusterMachine, token } from "./cluster"
+export async function render(request: {
+  prompt?: string
+  seed?: number
+  width?: number
+  height?: number
+  nbFrames?: number
+  nbFPS?: number
+  nbSteps?: number
+  debug?: boolean
+}): Promise<string> {
+  const prompt = request.prompt || ""
+  if (!prompt) {
+    throw new Error(`missing prompt`)
+  }
+  const debug = !!request.debug
+  const seed = request?.seed || generateSeed()
+  // see https://huggingface.co/spaces/jbilcke-hf/ai-tube-model-animatediff-lightning/blob/main/app.py#L15-L18
+  const baseModel = "epiCRealism"
+  // the motion LoRA - could be useful one day
+  const motion = ""
+  // can be 1, 2, 4 or 8
+  // but values below 4 look bad
+  const nbSteps = getValidNumber(request.nbSteps, 1, 8, 4)
+  const width = getValidNumber(request.width, 256, 1024, 512)
+  const height = getValidNumber(request.height, 256, 1024, 288)
+  const nbFrames = getValidNumber(request.nbFrames, 10, 120, 10)
+  const nbFPS = getValidNumber(request.nbFPS, 10, 120, 10)
+  // by default AnimateDiff generates about 2 seconds of video at 10 fps
+  // the Gradio API now has some code to optional fix that using FFmpeg,
+  // but this will add some delay overhead, so use with care!
+  const durationInSec = Math.round(nbFrames / nbFPS)
+  const framesPerSec = nbFPS
+  const machine = await getClusterMachine()
+  try {
+    if (debug) {
+      console.log(`calling AnimateDiff Lightning API with params (some are hidden):`, {
+        baseModel,
+        motion,
+        nbSteps,
+        width,
+        height,
+        nbFrames,
+        nbFPS,
+        durationInSec,
+        framesPerSec,
+      })
+    }
+    const res = await fetch(machine.url + (machine.url.endsWith("/") ? "" : "/") + "api/predict", {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        // Authorization: `Bearer ${token}`,
+      },
+      body: JSON.stringify({
+        fn_index: 0, // <- important! it is currently 4, not 1!
+        data: [
+          token,
+          prompt,
+          baseModel,
+          width,
+          height,
+          motion,
+          nbSteps,
+          durationInSec,
+          framesPerSec,
+        ],
+      }),
+      // necessary since we are using the fetch() provided by NextJS
+      cache: "no-store",
+      // we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
+      // next: { revalidate: 1 }
+    })
+    // console.log("res:", res)
+    const { data } = await res.json()
+    // console.log("data:", data)
+    // Recommendation: handle errors
+    if (res.status !== 200 || !Array.isArray(data)) {
+      // This will activate the closest `error.js` Error Boundary
+      throw new Error(`Failed to fetch data (status: ${res.status})`)
+    }
+    // console.log("data:", data.slice(0, 50))
+    const base64Content = (data?.[0] || "") as string
+    if (!base64Content) {
+      throw new Error(`invalid response (no content)`)
+    }
+    // this API already emits a data-uri with a content type
+    // addBase64HeaderToMp4(base64Content)
+    return base64Content
+  } catch (err) {
+    if (debug) {
+      console.error(`failed to call the AnimateDiff Lightning API:`)
+      console.error(err)
+    }
+    throw err
+  } finally {
+    // important: we need to free up the machine!
+    machine.busy = false
+  }
+}

src/app/api/v1/render/route.ts ADDED Viewed

	@@ -0,0 +1,56 @@

+import { NextResponse, NextRequest } from "next/server"
+import queryString from "query-string"
+import { ClapMediaOrientation, getValidNumber } from "@aitube/clap"
+import { throwIfInvalidToken } from "@/app/api/v1/auth/throwIfInvalidToken"
+import { getContentType } from "@/lib/data/getContentType"
+import { render } from "."
+export async function POST(req: NextRequest, res: NextResponse) {
+  await throwIfInvalidToken(req.headers.get("Authorization"))
+  const request = await req.json() as {
+    prompt: string
+    width: number
+    height: number
+    turbo: boolean
+    // can add more stuff for the V2 of Stories Factory
+  }
+  console.log("[api/v1/render] request:", request)
+  const qs = queryString.parseUrl(req.url || "")
+  const query = (qs || {}).query
+  const turbo = !!query?.turbo
+  const prompt = `${request?.prompt || ""}`.trim()
+  const width = getValidNumber(request?.width, 256, 8192, 1024)
+  const height = getValidNumber(request?.height, 256, 8192, 576)
+  const nbFrames = 80
+  const nbFPS = 24
+  const nbSteps = turbo ? 4 : 8
+  const debug = true
+  const assetUrl = await render({
+    prompt,
+    width,
+    height,
+    nbFrames,
+    nbFPS,
+    nbSteps,
+    debug,
+  })
+  const contentType = getContentType(assetUrl)
+  const base64String = assetUrl.split(";base64,").pop() || ""
+  const data = Buffer.from(base64String, "base64")
+  const headers = new Headers()
+  headers.set('Content-Type', contentType)
+  return new NextResponse(data, {
+    status: 200,
+    statusText: "OK",
+    headers
+  })
+}

src/app/api/v1/search/index.ts CHANGED Viewed

@@ -29,7 +29,7 @@ export async function search({
     prefix: "```yaml\n",
   })
-  console.log("rawString: ", rawString)
   const results = parseRawStringToYAML<BasicSearchResult[]>(rawString, [])
@@ -52,7 +52,7 @@ export async function extend({
     prefix: "```yaml\n",
   })
-  console.log("rawString: ", rawString)
   const results = parseRawStringToYAML<ExtendedSearchResult[]>(rawString, [])

     prefix: "```yaml\n",
   })
+  // console.log("rawString: ", rawString)
   const results = parseRawStringToYAML<BasicSearchResult[]>(rawString, [])
     prefix: "```yaml\n",
   })
+  // console.log("rawString: ", rawString)
   const results = parseRawStringToYAML<ExtendedSearchResult[]>(rawString, [])

src/app/api/v1/types.ts CHANGED Viewed

@@ -9,7 +9,7 @@ export type LatentEntity = {
 }
 export type LatentStory = {
-  title: string
   image: string
   voice: string
 }

 }
 export type LatentStory = {
+  comment: string
   image: string
   voice: string
 }

src/lib/on-device-ai/classifyFrame.ts CHANGED Viewed

@@ -38,10 +38,10 @@ const globalState: { classifier?: InteractiveImageClassifier } = {};
 })();
 export async function classifyFrame(frame: TexImageSource, x: number, y: number): Promise<ImageClassifierResult> {
-  console.log("classifyFrame: loading classifier..")
   globalState.classifier = globalState.classifier || (await getInteractiveImageClassifier())
-  console.log("classifyFrame: segmenting..")
   return globalState.classifier(frame, x, y)
 }

 })();
 export async function classifyFrame(frame: TexImageSource, x: number, y: number): Promise<ImageClassifierResult> {
+  // console.log("classifyFrame: loading classifier..")
   globalState.classifier = globalState.classifier || (await getInteractiveImageClassifier())
+  // console.log("classifyFrame: segmenting..")
   return globalState.classifier(frame, x, y)
 }

src/lib/on-device-ai/getSegmentationCanvas.tsx CHANGED Viewed

@@ -26,7 +26,7 @@ export async function getSegmentationCanvas({
         height: `${height}px`,
       };
-      console.log("canvas:", canvas)
       const CanvasComponent = () => (
         <canvas
           ref={(node) => {

         height: `${height}px`,
       };
+      // console.log("canvas:", canvas)
       const CanvasComponent = () => (
         <canvas
           ref={(node) => {