ai-lab-tube

Sleeping

App Files Files Community

jbilcke-hf HF staff commited on May 3

Commit

f24ad59

•

1 Parent(s): 6419aeb

upgraded to @aitube/client 0.0.12

Browse files

Files changed (41) hide show

package-lock.json +13 -13
package.json +1 -1
src/app/api/actions/ai-tube-hf/downloadClapProject.ts +1 -1
src/app/api/actions/ai-tube-hf/getVideoRequestsFromChannel.ts +2 -2
src/app/api/actions/ai-tube-hf/parseChannel.ts +1 -1
src/app/api/generators/clap/getLatentScenes.ts +1 -1
src/app/api/generators/clap/unknownObjectToLatentScenes.ts +1 -1
src/app/api/generators/search/getLatentSearchResults.ts +1 -1
src/app/api/generators/search/unknownObjectToLatentSearchResults.ts +2 -2
src/app/api/parsers/parseBasicSearchResult.ts +8 -0
src/app/api/parsers/parseBasicSearchResults.ts +13 -0
src/app/api/parsers/parseCompletionMode.ts +10 -0
src/app/api/{utils → parsers}/parseDatasetPrompt.ts +0 -0
src/app/api/{utils → parsers}/parseDatasetReadme.ts +0 -0
src/app/api/parsers/parseLatentSearchMode.ts +10 -0
src/app/api/{utils → parsers}/parseProjectionFromLoRA.ts +0 -0
src/app/api/parsers/parsePrompt.ts +9 -0
src/app/api/{utils → parsers}/parsePromptFileName.ts +0 -0
src/app/api/{utils → parsers}/parseRawStringToYAML.ts +0 -0
src/app/api/{utils → parsers}/parseString.ts +0 -0
src/app/api/{utils → parsers}/parseStringArray.ts +0 -0
src/app/api/{utils → parsers}/parseVideoModelName.ts +0 -0
src/app/api/{utils → parsers}/parseVideoOrientation.ts +0 -0
src/app/api/utils/computeOrientationProjectionWidthHeight.ts +2 -2
src/app/api/v1/create/index.ts +143 -0
src/app/api/v1/create/route.ts +6 -126
src/app/api/v1/create/types.ts +6 -0
src/app/api/v1/edit/dialogues/processShot.ts +20 -5
src/app/api/v1/edit/dialogues/route.ts +18 -7
src/app/api/v1/edit/entities/index.ts +69 -0
src/app/api/v1/edit/entities/route.ts +15 -64
src/app/api/v1/edit/storyboards/processShot.ts +26 -9
src/app/api/v1/edit/storyboards/route.ts +19 -8
src/app/api/v1/edit/videos/processShot.ts +25 -7
src/app/api/v1/edit/videos/route.ts +19 -8
src/app/api/v1/search/index.ts +1 -1
src/app/api/v1/search/route.ts +7 -18
src/app/latent/watch/page.tsx +6 -7
src/app/main.tsx +1 -1
src/app/views/user-channel-view/index.tsx +2 -2
src/lib/utils/parseMediaProjectionType.ts +1 -1

package-lock.json CHANGED Viewed

@@ -9,7 +9,7 @@
       "version": "0.0.0",
       "dependencies": {
         "@aitube/clap": "0.0.10",
-        "@aitube/client": "0.0.11",
         "@aitube/engine": "0.0.2",
         "@huggingface/hub": "0.12.3-oauth",
         "@huggingface/inference": "^2.6.7",
@@ -129,9 +129,9 @@
       }
     },
     "node_modules/@aitube/client": {
-      "version": "0.0.11",
-      "resolved": "https://registry.npmjs.org/@aitube/client/-/client-0.0.11.tgz",
-      "integrity": "sha512-MqgLN/VxZ6BmHovCSIE8SqR+NpC8U2j1aGrxob6+yBUOsmjBm81cwRtv861jgz0LM3aVXOJsM03EJDFgUnigQQ==",
       "peerDependencies": {
         "@aitube/clap": "0.0.10"
       }
@@ -1520,9 +1520,9 @@
       }
     },
     "node_modules/@mediapipe/tasks-vision": {
-      "version": "0.10.13-rc.20240503",
-      "resolved": "https://registry.npmjs.org/@mediapipe/tasks-vision/-/tasks-vision-0.10.13-rc.20240503.tgz",
-      "integrity": "sha512-1ExneS7QNyKFCROGJ9A04B89ofFMga/m0ANLiD8yQeHJC7jZKSnWlaC+QxOwdIN4pV+uvMoQO4TPDpuqGlVEpw=="
     },
     "node_modules/@next/env": {
       "version": "14.2.3",
@@ -4314,9 +4314,9 @@
       "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA=="
     },
     "node_modules/electron-to-chromium": {
-      "version": "1.4.755",
-      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.755.tgz",
-      "integrity": "sha512-9nop+3jZxSHIxe1EzEUcjDXzK+3qOv3fY5w0sE88nIZUntbv1aXWmoxGWlklX5XSO4txCpLssWkUSh8RQPovBg=="
     },
     "node_modules/elliptic": {
       "version": "6.5.4",
@@ -8423,9 +8423,9 @@
       "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA=="
     },
     "node_modules/update-browserslist-db": {
-      "version": "1.0.14",
-      "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.14.tgz",
-      "integrity": "sha512-JixKH8GR2pWYshIPUg/NujK3JO7JiqEEUiNArE86NQyrgUuZeTlZQN3xuS/yiV5Kb48ev9K6RqNkaJjXsdg7Jw==",
       "funding": [
         {
           "type": "opencollective",

       "version": "0.0.0",
       "dependencies": {
         "@aitube/clap": "0.0.10",
+        "@aitube/client": "0.0.12",
         "@aitube/engine": "0.0.2",
         "@huggingface/hub": "0.12.3-oauth",
         "@huggingface/inference": "^2.6.7",
       }
     },
     "node_modules/@aitube/client": {
+      "version": "0.0.12",
+      "resolved": "https://registry.npmjs.org/@aitube/client/-/client-0.0.12.tgz",
+      "integrity": "sha512-b/QFTtAKwr7H5dMSco+iXhwJRpPw/sT487EGpNjDbuQamIJ3FqdlVMTC/c5jdX8meFp+m35n/dY58Iy39Lle5A==",
       "peerDependencies": {
         "@aitube/clap": "0.0.10"
       }
       }
     },
     "node_modules/@mediapipe/tasks-vision": {
+      "version": "0.10.13",
+      "resolved": "https://registry.npmjs.org/@mediapipe/tasks-vision/-/tasks-vision-0.10.13.tgz",
+      "integrity": "sha512-8uYOKbtASqZu4m1Tf0nBvOaT50pGTVt0siQ3AWJJ4OV+r+HsWDYquQvev/fo78i49mt2IM2eskV7UxX4+e4tLw=="
     },
     "node_modules/@next/env": {
       "version": "14.2.3",
       "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA=="
     },
     "node_modules/electron-to-chromium": {
+      "version": "1.4.756",
+      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.756.tgz",
+      "integrity": "sha512-RJKZ9+vEBMeiPAvKNWyZjuYyUqMndcP1f335oHqn3BEQbs2NFtVrnK5+6Xg5wSM9TknNNpWghGDUCKGYF+xWXw=="
     },
     "node_modules/elliptic": {
       "version": "6.5.4",
       "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA=="
     },
     "node_modules/update-browserslist-db": {
+      "version": "1.0.15",
+      "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.15.tgz",
+      "integrity": "sha512-K9HWH62x3/EalU1U6sjSZiylm9C8tgq2mSvshZpqc7QE69RaA2qjhkW2HlNA0tFpEbtyFz7HTqbSdN4MSwUodA==",
       "funding": [
         {
           "type": "opencollective",

package.json CHANGED Viewed

@@ -11,7 +11,7 @@
   },
   "dependencies": {
     "@aitube/clap": "0.0.10",
-    "@aitube/client": "0.0.11",
     "@aitube/engine": "0.0.2",
     "@huggingface/hub": "0.12.3-oauth",
     "@huggingface/inference": "^2.6.7",

   },
   "dependencies": {
     "@aitube/clap": "0.0.10",
+    "@aitube/client": "0.0.12",
     "@aitube/engine": "0.0.2",
     "@huggingface/hub": "0.12.3-oauth",
     "@huggingface/inference": "^2.6.7",

src/app/api/actions/ai-tube-hf/downloadClapProject.ts CHANGED Viewed

@@ -6,7 +6,7 @@ import { ClapProject, parseClap } from "@aitube/clap"
 import { ChannelInfo, MediaInfo, VideoRequest } from "@/types/general"
 import { defaultVideoModel } from "@/app/config"
-import { parseVideoModelName } from "../../utils/parseVideoModelName"
 import { computeOrientationProjectionWidthHeight } from "../../utils/computeOrientationProjectionWidthHeight"
 import { downloadFileAsBlob } from "./downloadFileAsBlob"

 import { ChannelInfo, MediaInfo, VideoRequest } from "@/types/general"
 import { defaultVideoModel } from "@/app/config"
+import { parseVideoModelName } from "../../parsers/parseVideoModelName"
 import { computeOrientationProjectionWidthHeight } from "../../utils/computeOrientationProjectionWidthHeight"
 import { downloadFileAsBlob } from "./downloadFileAsBlob"

src/app/api/actions/ai-tube-hf/getVideoRequestsFromChannel.ts CHANGED Viewed

@@ -3,9 +3,9 @@
 import { ChannelInfo, VideoRequest } from "@/types/general"
 import { getCredentials } from "./getCredentials"
 import { listFiles } from "@/lib/huggingface/hub/src"
-import { parsePromptFileName } from "../../utils/parsePromptFileName"
 import { downloadFileAsText } from "./downloadFileAsText"
-import { parseDatasetPrompt } from "../../utils/parseDatasetPrompt"
 import { computeOrientationProjectionWidthHeight } from "../../utils/computeOrientationProjectionWidthHeight"
 import { downloadClapProject } from "./downloadClapProject"

 import { ChannelInfo, VideoRequest } from "@/types/general"
 import { getCredentials } from "./getCredentials"
 import { listFiles } from "@/lib/huggingface/hub/src"
+import { parsePromptFileName } from "../../parsers/parsePromptFileName"
 import { downloadFileAsText } from "./downloadFileAsText"
+import { parseDatasetPrompt } from "../../parsers/parseDatasetPrompt"
 import { computeOrientationProjectionWidthHeight } from "../../utils/computeOrientationProjectionWidthHeight"
 import { downloadClapProject } from "./downloadClapProject"

src/app/api/actions/ai-tube-hf/parseChannel.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 "use server"
 import { Credentials, downloadFile, whoAmI } from "@/lib/huggingface/hub/src"
-import { parseDatasetReadme } from "@/app/api/utils/parseDatasetReadme"
 import { ChannelInfo, VideoGenerationModel, VideoOrientation } from "@/types/general"
 import { adminCredentials } from "../config"

 "use server"
 import { Credentials, downloadFile, whoAmI } from "@/lib/huggingface/hub/src"
+import { parseDatasetReadme } from "@/app/api/parsers/parseDatasetReadme"
 import { ChannelInfo, VideoGenerationModel, VideoOrientation } from "@/types/general"
 import { adminCredentials } from "../config"

src/app/api/generators/clap/getLatentScenes.ts CHANGED Viewed

@@ -8,7 +8,7 @@ import { predict as predictWithOpenAI } from "@/app/api/providers/openai/predict
 import { LatentScenes } from "./types"
 import { getSystemPrompt } from "./getSystemPrompt"
 import { unknownObjectToLatentScenes } from "./unknownObjectToLatentScenes"
-import { parseRawStringToYAML } from "../../utils/parseRawStringToYAML"
 export async function getLatentScenes({
   prompt = "",

 import { LatentScenes } from "./types"
 import { getSystemPrompt } from "./getSystemPrompt"
 import { unknownObjectToLatentScenes } from "./unknownObjectToLatentScenes"
+import { parseRawStringToYAML } from "../../parsers/parseRawStringToYAML"
 export async function getLatentScenes({
   prompt = "",

src/app/api/generators/clap/unknownObjectToLatentScenes.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { parseStringArray } from "../../utils/parseStringArray"
 import { LatentScene, LatentScenes } from "./types"
 /**

+import { parseStringArray } from "../../parsers/parseStringArray"
 import { LatentScene, LatentScenes } from "./types"
 /**

src/app/api/generators/search/getLatentSearchResults.ts CHANGED Viewed

@@ -6,7 +6,7 @@ import { predict as predictWithHuggingFace } from "@/app/api/providers/huggingfa
 import { predict as predictWithOpenAI } from "@/app/api/providers/openai/predictWithOpenAI"
 import { LatentSearchResults } from "./types"
 import { getSystemPrompt } from "./getSystemPrompt"
-import { parseRawStringToYAML } from "../../utils/parseRawStringToYAML"
 import { unknownObjectToLatentSearchResults } from "./unknownObjectToLatentSearchResults"
 export async function getLatentSearchResults({

 import { predict as predictWithOpenAI } from "@/app/api/providers/openai/predictWithOpenAI"
 import { LatentSearchResults } from "./types"
 import { getSystemPrompt } from "./getSystemPrompt"
+import { parseRawStringToYAML } from "../../parsers/parseRawStringToYAML"
 import { unknownObjectToLatentSearchResults } from "./unknownObjectToLatentSearchResults"
 export async function getLatentSearchResults({

src/app/api/generators/search/unknownObjectToLatentSearchResults.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { generateSeed } from "@aitube/clap"
-import { parseString } from "../../utils/parseString"
-import { parseStringArray } from "../../utils/parseStringArray"
 import { LatentSearchResult, LatentSearchResults } from "./types"
 export function unknownObjectToLatentSearchResults(something: any): LatentSearchResults {

 import { generateSeed } from "@aitube/clap"
+import { parseString } from "../../parsers/parseString"
+import { parseStringArray } from "../../parsers/parseStringArray"
 import { LatentSearchResult, LatentSearchResults } from "./types"
 export function unknownObjectToLatentSearchResults(something: any): LatentSearchResults {

src/app/api/parsers/parseBasicSearchResult.ts ADDED Viewed

	@@ -0,0 +1,8 @@

+import { decode } from "js-base64"
+import { BasicSearchResult } from "../v1/search/types"
+export function parseBasicSearchResult(input?: any): BasicSearchResult {
+  let basicResult = JSON.parse(decode(`${input || ""}`)) as BasicSearchResult
+  return basicResult
+}

src/app/api/parsers/parseBasicSearchResults.ts ADDED Viewed

	@@ -0,0 +1,13 @@

+import { BasicSearchResult } from "../v1/search/types"
+export function parseBasicSearchResult(input?: any, defaultResults: BasicSearchResult[] = []): BasicSearchResult[] {
+  let basicResults: BasicSearchResult[] = defaultResults
+  try {
+    const rawString = decodeURIComponent(`${input || ""}` || "").trim() as string
+    const maybeExistingResults = JSON.parse(rawString)
+    if (Array.isArray(maybeExistingResults)) {
+      basicResults = maybeExistingResults
+    }
+  } catch (err) {}
+  return basicResults
+}

src/app/api/parsers/parseCompletionMode.ts ADDED Viewed

	@@ -0,0 +1,10 @@

+import { ClapCompletionMode } from "../v1/edit/types"
+export function parseCompletionMode(input?: any, defaultMode: ClapCompletionMode = "partial"): ClapCompletionMode {
+  let mode = defaultMode
+  try {
+    let maybeMode = decodeURIComponent(`${input || ""}` || defaultMode).trim()
+    mode = ["partial", "full"].includes(maybeMode) ? (maybeMode as ClapCompletionMode) : defaultMode
+  } catch (err) {}
+  return mode
+}

src/app/api/{utils → parsers}/parseDatasetPrompt.ts RENAMED Viewed

File without changes

src/app/api/{utils → parsers}/parseDatasetReadme.ts RENAMED Viewed

File without changes

src/app/api/parsers/parseLatentSearchMode.ts ADDED Viewed

	@@ -0,0 +1,10 @@

+import { LatentSearchMode } from "../v1/search/route"
+export function parseLatentSearchMode(input?: any, defaultMode: LatentSearchMode = "basic"): LatentSearchMode {
+  let mode = defaultMode
+  try {
+    let maybeMode = decodeURIComponent(`${input || ""}` || defaultMode).trim()
+    mode = ["basic", "extended"].includes(maybeMode) ? (maybeMode as LatentSearchMode) : defaultMode
+  } catch (err) {}
+  return mode
+}

src/app/api/{utils → parsers}/parseProjectionFromLoRA.ts RENAMED Viewed

File without changes

src/app/api/parsers/parsePrompt.ts ADDED Viewed

	@@ -0,0 +1,9 @@

+export function parsePrompt(input?: any) {
+  let res = ""
+  try {
+    res = decodeURIComponent(`${input || ""}` || "").trim()
+  } catch (err) {}
+  if (!prompt.length) { throw new Error(`please provide a prompt`) }
+  return res
+}

src/app/api/{utils → parsers}/parsePromptFileName.ts RENAMED Viewed

File without changes

src/app/api/{utils → parsers}/parseRawStringToYAML.ts RENAMED Viewed

File without changes

src/app/api/{utils → parsers}/parseString.ts RENAMED Viewed

File without changes

src/app/api/{utils → parsers}/parseStringArray.ts RENAMED Viewed

File without changes

src/app/api/{utils → parsers}/parseVideoModelName.ts RENAMED Viewed

File without changes

src/app/api/{utils → parsers}/parseVideoOrientation.ts RENAMED Viewed

File without changes

src/app/api/utils/computeOrientationProjectionWidthHeight.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { VideoOrientation, MediaProjection } from "@/types/general"
-import { parseVideoOrientation } from "./parseVideoOrientation"
-import { parseProjectionFromLoRA } from "./parseProjectionFromLoRA"
 export function computeOrientationProjectionWidthHeight({
   lora: maybeLora,

 import { VideoOrientation, MediaProjection } from "@/types/general"
+import { parseVideoOrientation } from "../parsers/parseVideoOrientation"
+import { parseProjectionFromLoRA } from "../parsers/parseProjectionFromLoRA"
 export function computeOrientationProjectionWidthHeight({
   lora: maybeLora,

src/app/api/v1/create/index.ts ADDED Viewed

	@@ -0,0 +1,143 @@

+"use server"
+import { ClapProject, getValidNumber, newClap, newSegment } from "@aitube/clap"
+import { predict } from "@/app/api/providers/huggingface/predictWithHuggingFace"
+import { parseRawStringToYAML } from "@/app/api/parsers/parseRawStringToYAML"
+import { systemPrompt } from "./systemPrompt"
+import { LatentStory } from "./types"
+// a helper to generate Clap stories from a few sentences
+// this is mostly used by external apps such as the Stories Factory
+export async function create(request: {
+  prompt?: string
+  width?: number
+  height?: number
+}= {
+  prompt: "",
+  width: 1024,
+  height: 576,
+}): Promise<ClapProject> {
+  const prompt = `${request?.prompt || ""}`.trim()
+  console.log("api/v1/create(): request:", request)
+  if (!prompt.length) { throw new Error(`please provide a prompt`) }
+  const width = getValidNumber(request?.width, 256, 8192, 1024)
+  const height = getValidNumber(request?.height, 256, 8192, 576)
+  const userPrompt = `Video story to generate: ${prompt}`
+  // TODO use streaming for the Hugging Face prediction
+  //
+  // note that a Clap file is actually a YAML stream of documents
+  // so technically we could stream everything from end-to-end
+  // (but I haven't coded the helpers to do this yet)
+  const rawString = await predict({
+    systemPrompt,
+    userPrompt,
+    nbMaxNewTokens: 1400,
+    prefix: "```yaml\n",
+  })
+  console.log("api/v1/create(): rawString: ", rawString)
+  const shots = parseRawStringToYAML<LatentStory[]>(rawString, [])
+  console.log(`api/v1/create(): generated ${shots.length} shots`)
+  // this is approximate - TTS generation will determine the final duration of each shot
+  const defaultSegmentDurationInMs = 7000
+  let currentElapsedTimeInMs = 0
+  let currentSegmentDurationInMs = defaultSegmentDurationInMs
+  const clap: ClapProject = newClap({
+    meta: {
+      title: "Not needed", // we don't need a title actually
+      description: "This video has been generated using AI",
+      synopsis: "",
+      licence: "Non Commercial",
+      orientation: "vertical",
+      width,
+      height,
+      isInteractive: false,
+      isLoop: false,
+      durationInMs: shots.length * defaultSegmentDurationInMs,
+      defaultVideoModel: "AnimateDiff-Lightning",
+    }
+  })
+  for (const { title, image, voice } of shots) {
+    console.log(`api/v1/create():  - ${title}`)
+    // note: it would be nice if we could have a convention saying that
+    // track 0 is for videos and track 1 storyboards
+    //
+    // however, that's a bit constraining as people will generate .clap
+    // using all kind of tools and development experience,
+    // and they may not wish to learn the Clap protocol format completely
+    //
+    // TL;DR:
+    // we should fix the Clap file editor to make it able to react videos
+    // from any track number
+    /*
+    we disable it, because we don't generate animated videos yet
+    clap.segments.push(newSegment({
+      track: 0,
+      category: "video",
+      prompt: image,
+      outputType: "video"
+    }))
+    */
+    clap.segments.push(newSegment({
+      track: 1,
+      startTimeInMs: currentSegmentDurationInMs,
+      assetDurationInMs: defaultSegmentDurationInMs,
+      category: "storyboard",
+      prompt: image,
+      outputType: "image"
+    }))
+    clap.segments.push(newSegment({
+      track: 2,
+      startTimeInMs: currentSegmentDurationInMs,
+      assetDurationInMs: defaultSegmentDurationInMs,
+      category: "interface",
+      prompt: title,
+      // assetUrl: `data:text/plain;base64,${btoa(title)}`,
+      assetUrl: title,
+      outputType: "text"
+    }))
+    clap.segments.push(newSegment({
+      track: 3,
+      startTimeInMs: currentSegmentDurationInMs,
+      assetDurationInMs: defaultSegmentDurationInMs,
+      category: "dialogue",
+      prompt: voice,
+      outputType: "audio"
+    }))
+    // the presence of a camera is mandatory
+    clap.segments.push(newSegment({
+      track: 4,
+      startTimeInMs: currentSegmentDurationInMs,
+      assetDurationInMs: defaultSegmentDurationInMs,
+      category: "camera",
+      prompt: "vertical video",
+      outputType: "text"
+    }))
+    currentSegmentDurationInMs += defaultSegmentDurationInMs
+  }
+  return clap
+}

src/app/api/v1/create/route.ts CHANGED Viewed

@@ -1,16 +1,7 @@
 import { NextResponse, NextRequest } from "next/server"
-import { ClapProject, getValidNumber, newClap, newSegment, serializeClap } from "@aitube/clap"
-import { predict } from "@/app/api/providers/huggingface/predictWithHuggingFace"
-import { parseRawStringToYAML } from "@/app/api/utils/parseRawStringToYAML"
-import { systemPrompt } from "./systemPrompt"
-export type LatentStory = {
-  title: string
-  image: string
-  voice: string
-}
 // a helper to generate Clap stories from a few sentences
 // this is mostly used by external apps such as the Stories Factory
@@ -23,125 +14,14 @@ export async function POST(req: NextRequest) {
     // can add more stuff for the V2 of Stories Factory
   }
-  const prompt = `${request?.prompt || ""}`.trim()
   console.log("[api/v1/create] request:", request)
-  if (!prompt.length) { throw new Error(`please provide a prompt`) }
-  const width = getValidNumber(request?.width, 256, 8192, 1024)
-  const height = getValidNumber(request?.height, 256, 8192, 576)
-  const userPrompt = `Video story to generate: ${prompt}`
-  // TODO use streaming for the Hugging Face prediction
-  //
-  // note that a Clap file is actually a YAML stream of documents
-  // so technically we could stream everything from end-to-end
-  // (but I haven't coded the helpers to do this yet)
-  const rawString = await predict({
-    systemPrompt,
-    userPrompt,
-    nbMaxNewTokens: 1400,
-    prefix: "```yaml\n",
   })
-  console.log("[api/v1/create] rawString: ", rawString)
-  const shots = parseRawStringToYAML<LatentStory[]>(rawString, [])
-  console.log(`[api/v1/create] generated ${shots.length} shots`)
-  // this is approximate - TTS generation will determine the final duration of each shot
-  const defaultSegmentDurationInMs = 7000
-  let currentElapsedTimeInMs = 0
-  let currentSegmentDurationInMs = defaultSegmentDurationInMs
-  const clap: ClapProject = newClap({
-    meta: {
-      title: "Not needed", // we don't need a title actually
-      description: "This video has been generated using AI",
-      synopsis: "",
-      licence: "Non Commercial",
-      orientation: "vertical",
-      width,
-      height,
-      isInteractive: false,
-      isLoop: false,
-      durationInMs: shots.length * defaultSegmentDurationInMs,
-      defaultVideoModel: "AnimateDiff-Lightning",
-    }
-  })
-  for (const { title, image, voice } of shots) {
-    console.log(`[api/v1/create]  - ${title}`)
-    // note: it would be nice if we could have a convention saying that
-    // track 0 is for videos and track 1 storyboards
-    //
-    // however, that's a bit constraining as people will generate .clap
-    // using all kind of tools and development experience,
-    // and they may not wish to learn the Clap protocol format completely
-    //
-    // TL;DR:
-    // we should fix the Clap file editor to make it able to react videos
-    // from any track number
-    /*
-    we disable it, because we don't generate animated videos yet
-    clap.segments.push(newSegment({
-      track: 0,
-      category: "video",
-      prompt: image,
-      outputType: "video"
-    }))
-    */
-    clap.segments.push(newSegment({
-      track: 1,
-      startTimeInMs: currentSegmentDurationInMs,
-      assetDurationInMs: defaultSegmentDurationInMs,
-      category: "storyboard",
-      prompt: image,
-      outputType: "image"
-    }))
-    clap.segments.push(newSegment({
-      track: 2,
-      startTimeInMs: currentSegmentDurationInMs,
-      assetDurationInMs: defaultSegmentDurationInMs,
-      category: "interface",
-      prompt: title,
-      // assetUrl: `data:text/plain;base64,${btoa(title)}`,
-      assetUrl: title,
-      outputType: "text"
-    }))
-    clap.segments.push(newSegment({
-      track: 3,
-      startTimeInMs: currentSegmentDurationInMs,
-      assetDurationInMs: defaultSegmentDurationInMs,
-      category: "dialogue",
-      prompt: voice,
-      outputType: "audio"
-    }))
-    // the presence of a camera is mandatory
-    clap.segments.push(newSegment({
-      track: 4,
-      startTimeInMs: currentSegmentDurationInMs,
-      assetDurationInMs: defaultSegmentDurationInMs,
-      category: "camera",
-      prompt: "vertical video",
-      outputType: "text"
-    }))
-    currentSegmentDurationInMs += defaultSegmentDurationInMs
-  }
   // TODO replace by Clap file streaming
   return new NextResponse(await serializeClap(clap), {
     status: 200,

 import { NextResponse, NextRequest } from "next/server"
+import { getValidNumber, serializeClap } from "@aitube/clap"
+import { create } from "."
 // a helper to generate Clap stories from a few sentences
 // this is mostly used by external apps such as the Stories Factory
     // can add more stuff for the V2 of Stories Factory
   }
   console.log("[api/v1/create] request:", request)
+  const clap = await create({
+    prompt:  `${request?.prompt || ""}`.trim(),
+    width: getValidNumber(request?.width, 256, 8192, 1024),
+    height:  getValidNumber(request?.height, 256, 8192, 576)
   })
   // TODO replace by Clap file streaming
   return new NextResponse(await serializeClap(clap), {
     status: 200,

src/app/api/v1/create/types.ts ADDED Viewed

	@@ -0,0 +1,6 @@

+export type LatentStory = {
+  title: string
+  image: string
+  voice: string
+}

src/app/api/v1/edit/dialogues/processShot.ts CHANGED Viewed

@@ -5,18 +5,24 @@ import { getSpeechBackgroundAudioPrompt } from "@aitube/engine"
 import { generateSpeechWithParlerTTS } from "@/app/api/generators/speech/generateVoiceWithParlerTTS"
 import { getMediaInfo } from "@/app/api/utils/getMediaInfo"
 export async function processShot({
   shotSegment,
-  clap
 }: {
   shotSegment: ClapSegment
-  clap: ClapProject
 }): Promise<void> {
   const shotSegments: ClapSegment[] = filterSegments(
     ClapSegmentFilteringMode.START,
     shotSegment,
-    clap.segments
   )
   const shotDialogueSegments: ClapSegment[] = shotSegments.filter(s =>
@@ -34,7 +40,11 @@ export async function processShot({
       // this generates a mp3
       shotDialogueSegment.assetUrl = await generateSpeechWithParlerTTS({
         text: shotDialogueSegment.prompt,
-        audioId: getSpeechBackgroundAudioPrompt(shotSegments, clap.entityIndex, ["high quality", "crisp", "detailed"]),
         debug: true,
       })
       shotDialogueSegment.assetSourceType = getClapAssetSourceType(shotDialogueSegment.assetUrl)
@@ -47,7 +57,7 @@ export async function processShot({
         // we update the duration of all the segments for this shot
         // (it is possible that this makes the two previous lines redundant)
-        clap.segments.filter(s => {
           s.assetDurationInMs = durationInMs
         })
       }
@@ -58,6 +68,11 @@ export async function processShot({
     }
     console.log(`[api/edit/dialogues] processShot: generated dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)
   } else {
     console.log(`[api/edit/dialogues] processShot: there is already a dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)
   }

 import { generateSpeechWithParlerTTS } from "@/app/api/generators/speech/generateVoiceWithParlerTTS"
 import { getMediaInfo } from "@/app/api/utils/getMediaInfo"
+import { ClapCompletionMode } from "../types"
 export async function processShot({
   shotSegment,
+  existingClap,
+  newerClap,
+  mode
 }: {
   shotSegment: ClapSegment
+  existingClap: ClapProject
+  newerClap: ClapProject
+  mode: ClapCompletionMode
 }): Promise<void> {
   const shotSegments: ClapSegment[] = filterSegments(
     ClapSegmentFilteringMode.START,
     shotSegment,
+    existingClap.segments
   )
   const shotDialogueSegments: ClapSegment[] = shotSegments.filter(s =>
       // this generates a mp3
       shotDialogueSegment.assetUrl = await generateSpeechWithParlerTTS({
         text: shotDialogueSegment.prompt,
+        audioId: getSpeechBackgroundAudioPrompt(
+          shotSegments,
+          existingClap.entityIndex,
+          ["high quality", "crisp", "detailed"]
+        ),
         debug: true,
       })
       shotDialogueSegment.assetSourceType = getClapAssetSourceType(shotDialogueSegment.assetUrl)
         // we update the duration of all the segments for this shot
         // (it is possible that this makes the two previous lines redundant)
+        existingClap.segments.forEach(s => {
           s.assetDurationInMs = durationInMs
         })
       }
     }
     console.log(`[api/edit/dialogues] processShot: generated dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)
+  // if it's partial, we need to manually add it
+  if (mode === "partial") {
+      newerClap.segments.push(shotDialogueSegment)
+    }
   } else {
     console.log(`[api/edit/dialogues] processShot: there is already a dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)
   }

src/app/api/v1/edit/dialogues/route.ts CHANGED Viewed

@@ -1,42 +1,53 @@
 import { NextResponse, NextRequest } from "next/server"
-import { ClapProject, ClapSegment, parseClap, serializeClap } from "@aitube/clap"
 import { getToken } from "@/app/api/auth/getToken"
 import { processShot } from "./processShot"
 // a helper to generate speech for a Clap
 export async function POST(req: NextRequest) {
   const jwtToken = await getToken({ user: "anonymous" })
   const blob = await req.blob()
-  const clap: ClapProject = await parseClap(blob)
-  if (!clap?.segments) { throw new Error(`no segment found in the provided clap!`) }
-  console.log(`[api/edit/dialogues] detected ${clap.segments.length} segments`)
-  const shotsSegments: ClapSegment[] = clap.segments.filter(s => s.category === "camera")
   console.log(`[api/edit/dialogues] detected ${shotsSegments.length} shots`)
   if (shotsSegments.length > 32) {
     throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
   }
   // we process the shots in parallel (this will increase the queue size in the Gradio spaces)
   await Promise.all(shotsSegments.map(shotSegment =>
     processShot({
       shotSegment,
-      clap
     })
   ))
   // console.log(`[api/edit/dialogues] returning the clap augmented with dialogues`)
-  return new NextResponse(await serializeClap(clap), {
     status: 200,
     headers: new Headers({ "content-type": "application/x-gzip" }),
   })

 import { NextResponse, NextRequest } from "next/server"
+import { ClapProject, ClapSegment, newClap, parseClap, serializeClap } from "@aitube/clap"
 import { getToken } from "@/app/api/auth/getToken"
 import { processShot } from "./processShot"
+import queryString from "query-string"
+import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
 // a helper to generate speech for a Clap
 export async function POST(req: NextRequest) {
   const jwtToken = await getToken({ user: "anonymous" })
+  const qs = queryString.parseUrl(req.url || "")
+  const query = (qs || {}).query
+  const mode = parseCompletionMode(query?.c)
   const blob = await req.blob()
+  const existingClap: ClapProject = await parseClap(blob)
+  if (!existingClap?.segments) { throw new Error(`no segment found in the provided clap!`) }
+  console.log(`[api/edit/dialogues] detected ${existingClap.segments.length} segments`)
+  const shotsSegments: ClapSegment[] = existingClap.segments.filter(s => s.category === "camera")
   console.log(`[api/edit/dialogues] detected ${shotsSegments.length} shots`)
   if (shotsSegments.length > 32) {
     throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
   }
+  const newerClap = mode === "full" ? existingClap : newClap()
   // we process the shots in parallel (this will increase the queue size in the Gradio spaces)
   await Promise.all(shotsSegments.map(shotSegment =>
     processShot({
       shotSegment,
+      existingClap,
+      newerClap,
+      mode
     })
   ))
   // console.log(`[api/edit/dialogues] returning the clap augmented with dialogues`)
+  return new NextResponse(await serializeClap(newerClap), {
     status: 200,
     headers: new Headers({ "content-type": "application/x-gzip" }),
   })

src/app/api/v1/edit/entities/index.ts ADDED Viewed

	@@ -0,0 +1,69 @@

+import { ClapProject, getClapAssetSourceType, newClap } from "@aitube/clap"
+import { generateImageID } from "./generateImageID"
+import { generateAudioID } from "./generateAudioID"
+import { ClapCompletionMode } from "../types"
+export async function editEntities({
+  existingClap,
+  newerClap,
+  mode
+}: {
+  existingClap: ClapProject
+  newerClap: ClapProject
+  mode: ClapCompletionMode
+}) {
+  if (!existingClap.entities.length) { throw new Error(`please provide at least one entity`) }
+  for (const entity of existingClap.entities) {
+    let entityHasBeenModified = false
+    // TASK 1: GENERATE THE IMAGE PROMPT IF MISSING
+    if (!entity.imagePrompt) {
+      entity.imagePrompt = "a man with a beard"
+      entityHasBeenModified = true
+    }
+    // TASK 2: GENERATE THE IMAGE ID IF MISSING
+    if (!entity.imageId) {
+      entity.imageId = await generateImageID({
+        prompt: entity.imagePrompt,
+        seed: entity.seed
+      })
+      entity.imageSourceType = getClapAssetSourceType(entity.imageId)
+      entityHasBeenModified = true
+    }
+    // TASK 3: GENERATE THE AUDIO PROMPT IF MISSING
+    if (!entity.audioPrompt) {
+      entity.audioPrompt = "a man with a beard"
+      entityHasBeenModified = true
+    }
+    // TASK 4: GENERATE THE AUDIO ID IF MISSING
+    // TODO here: call Parler-TTS or a generic audio generator
+    if (!entity.audioId) {
+      entity.audioId = await generateAudioID({
+        prompt: entity.audioPrompt,
+        seed: entity.seed
+      })
+      entity.audioSourceType = getClapAssetSourceType(entity.audioId)
+      entityHasBeenModified = true
+    }
+    // in case we are doing a partial update
+    if (mode === "partial" && entityHasBeenModified && !newerClap.entityIndex[entity.id]) {
+      newerClap.entities.push(entity)
+      newerClap.entityIndex[entity.id] = entity
+    }
+  }
+  console.log(`[api/edit/entities] returning the newerClap`)
+  return newerClap
+}

src/app/api/v1/edit/entities/route.ts CHANGED Viewed

@@ -1,86 +1,37 @@
 import { NextResponse, NextRequest } from "next/server"
 import queryString from "query-string"
-import { getClapAssetSourceType, parseClap, serializeClap } from "@aitube/clap"
 import { getToken } from "@/app/api/auth/getToken"
-import { generateImageID } from "./generateImageID"
-import { generateAudioID } from "./generateAudioID"
-import { ClapCompletionMode } from "../types"
-const defaultMode: ClapCompletionMode = "full"
 export async function POST(req: NextRequest) {
   const qs = queryString.parseUrl(req.url || "")
   const query = (qs || {}).query
-  // TODO: use query parameters to determine *what* to generate:
-  /*
-  let prompt = ""
-  try {
-    prompt = decodeURIComponent(query?.p?.toString() || "").trim()
-  } catch (err) {}
-  if (!prompt) {
-    return NextResponse.json({ error: 'no prompt provided' }, { status: 400 });
-  }
-  if (!prompt.length) { throw new Error(`please provide a prompt`) }
-  */
-  let mode = defaultMode
-  try {
-    let maybeMode = decodeURIComponent(query?.mode?.toString() || defaultMode).trim()
-    mode = ["partial", "full"].includes(maybeMode) ? (maybeMode as ClapCompletionMode) : "full"
-  } catch (err) {}
-  console.log("[api/edit/entities] request:", prompt)
   const jwtToken = await getToken({ user: "anonymous" })
   const blob = await req.blob()
-  const clap = await parseClap(blob)
-  if (!clap.entities.length) { throw new Error(`please provide at least one entity`) }
-  for (const entity of clap.entities) {
-    // TASK 1: GENERATE THE IMAGE PROMPT IF MISSING
-    if (!entity.imagePrompt) {
-      entity.imagePrompt = "a man with a beard"
-    }
-    // TASK 2: GENERATE THE IMAGE ID IF MISSING
-    if (!entity.imageId) {
-      entity.imageId = await generateImageID({
-        prompt: entity.imagePrompt,
-        seed: entity.seed
-      })
-      entity.imageSourceType = getClapAssetSourceType(entity.imageId)
-    }
-    // TASK 3: GENERATE THE AUDIO PROMPT IF MISSING
-    if (!entity.audioPrompt) {
-      entity.audioPrompt = "a man with a beard"
-    }
-    // TASK 4: GENERATE THE AUDIO ID IF MISSING
-    // TODO here: call Parler-TTS or a generic audio generator
-    if (!entity.audioId) {
-      entity.audioId = await generateAudioID({
-        prompt: entity.audioPrompt,
-        seed: entity.seed
-      })
-      entity.audioSourceType = getClapAssetSourceType(entity.audioId)
-    }
-  }
-  console.log(`[api/edit/entities] returning the clap extended with the entities`)
-  return new NextResponse(await serializeClap(clap), {
     status: 200,
     headers: new Headers({ "content-type": "application/x-gzip" }),
   })

 import { NextResponse, NextRequest } from "next/server"
 import queryString from "query-string"
+import { newClap, parseClap, serializeClap } from "@aitube/clap"
 import { getToken } from "@/app/api/auth/getToken"
+import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
+import { editEntities } from "."
 export async function POST(req: NextRequest) {
   const qs = queryString.parseUrl(req.url || "")
   const query = (qs || {}).query
+  const mode = parseCompletionMode(query?.c)
+  // const prompt = parsePrompt(query?.p)
   const jwtToken = await getToken({ user: "anonymous" })
   const blob = await req.blob()
+  const existingClap = await parseClap(blob)
+  const newerClap = mode === "full" ? existingClap : newClap()
+  await editEntities({
+    existingClap,
+    newerClap,
+    mode
+  })
+  console.log(`[api/edit/entities] returning the newer clap extended with the entities`)
+  return new NextResponse(await serializeClap(newerClap), {
     status: 200,
     headers: new Headers({ "content-type": "application/x-gzip" }),
   })

src/app/api/v1/edit/storyboards/processShot.ts CHANGED Viewed

@@ -1,22 +1,27 @@
 import { ClapProject, ClapSegment, getClapAssetSourceType, newSegment, filterSegments, ClapSegmentFilteringMode } from "@aitube/clap"
 import { getVideoPrompt } from "@aitube/engine"
 import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
 import { generateStoryboard } from "./generateStoryboard"
 export async function processShot({
   shotSegment,
-  clap
 }: {
   shotSegment: ClapSegment
-  clap: ClapProject
 }): Promise<void> {
   const shotSegments: ClapSegment[] = filterSegments(
     ClapSegmentFilteringMode.START,
     shotSegment,
-    clap.segments
   )
   const shotStoryboardSegments: ClapSegment[] = shotSegments.filter(s =>
@@ -38,18 +43,24 @@ export async function processShot({
       outputType: "image"
     })
     if (shotStoryboardSegment) {
-      clap.segments.push(shotStoryboardSegment)
     }
     console.log(`[api/v1/edit/storyboards] processShot: generated storyboard segment [${shotSegment.startTimeInMs}:${shotSegment.endTimeInMs}]`)
   }
   if (!shotStoryboardSegment) { throw new Error(`failed to generate a newSegment`) }
   // TASK 2: GENERATE MISSING STORYBOARD PROMPT
   if (!shotStoryboardSegment?.prompt) {
     // storyboard is missing, let's generate it
-    shotStoryboardSegment.prompt = getVideoPrompt(shotSegments, clap.entityIndex, ["high quality", "crisp", "detailed"])
     console.log(`[api/v1/edit/storyboards] processShot: generating storyboard prompt: ${shotStoryboardSegment.prompt}`)
   }
@@ -60,8 +71,8 @@ export async function processShot({
     try {
       shotStoryboardSegment.assetUrl = await generateStoryboard({
         prompt: getPositivePrompt(shotStoryboardSegment.prompt),
-        width: clap.meta.width,
-        height: clap.meta.height,
       })
       shotStoryboardSegment.assetSourceType = getClapAssetSourceType(shotStoryboardSegment.assetUrl)
     } catch (err) {
@@ -69,7 +80,13 @@ export async function processShot({
       throw err
     }
-    console.log(`[api/v1/edit/storyboards] processShot: generated storyboard image: ${shotStoryboardSegment?.assetUrl?.slice?.(0, 50)}...`)
   } else {
     console.log(`[api/v1/edit/storyboards] processShot: there is already a storyboard image: ${shotStoryboardSegment?.assetUrl?.slice?.(0, 50)}...`)
   }

 import { ClapProject, ClapSegment, getClapAssetSourceType, newSegment, filterSegments, ClapSegmentFilteringMode } from "@aitube/clap"
 import { getVideoPrompt } from "@aitube/engine"
 import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
 import { generateStoryboard } from "./generateStoryboard"
+import { ClapCompletionMode } from "../types"
 export async function processShot({
   shotSegment,
+  existingClap,
+  newerClap,
+  mode
 }: {
   shotSegment: ClapSegment
+  existingClap: ClapProject
+  newerClap: ClapProject
+  mode: ClapCompletionMode
 }): Promise<void> {
   const shotSegments: ClapSegment[] = filterSegments(
     ClapSegmentFilteringMode.START,
     shotSegment,
+    existingClap.segments
   )
   const shotStoryboardSegments: ClapSegment[] = shotSegments.filter(s =>
       outputType: "image"
     })
+    // we fix the existing clap
     if (shotStoryboardSegment) {
+      existingClap.segments.push(shotStoryboardSegment)
     }
     console.log(`[api/v1/edit/storyboards] processShot: generated storyboard segment [${shotSegment.startTimeInMs}:${shotSegment.endTimeInMs}]`)
   }
   if (!shotStoryboardSegment) { throw new Error(`failed to generate a newSegment`) }
   // TASK 2: GENERATE MISSING STORYBOARD PROMPT
   if (!shotStoryboardSegment?.prompt) {
     // storyboard is missing, let's generate it
+    shotStoryboardSegment.prompt = getVideoPrompt(
+      shotSegments,
+      existingClap.entityIndex,
+      ["high quality", "crisp", "detailed"]
+    )
     console.log(`[api/v1/edit/storyboards] processShot: generating storyboard prompt: ${shotStoryboardSegment.prompt}`)
   }
     try {
       shotStoryboardSegment.assetUrl = await generateStoryboard({
         prompt: getPositivePrompt(shotStoryboardSegment.prompt),
+        width: existingClap.meta.width,
+        height: existingClap.meta.height,
       })
       shotStoryboardSegment.assetSourceType = getClapAssetSourceType(shotStoryboardSegment.assetUrl)
     } catch (err) {
       throw err
     }
+    console.log(`[api/v1/edit/storyboards] processShot: generated storyboard image: ${shotStoryboardSegment?.assetUrl?.slice?.(0, 50)}...`)
+    // if mode is full, newerClap already contains the ference to shotStoryboardSegment
+    // but if it's partial, we need to manually add it
+    if (mode === "partial") {
+      newerClap.segments.push(shotStoryboardSegment)
+    }
   } else {
     console.log(`[api/v1/edit/storyboards] processShot: there is already a storyboard image: ${shotStoryboardSegment?.assetUrl?.slice?.(0, 50)}...`)
   }

src/app/api/v1/edit/storyboards/route.ts CHANGED Viewed

@@ -1,9 +1,11 @@
 import { NextResponse, NextRequest } from "next/server"
-import { ClapProject, ClapSegment, parseClap, serializeClap } from "@aitube/clap"
 import { getToken } from "@/app/api/auth/getToken"
 import { processShot } from "./processShot"
 // a helper to generate storyboards for a Clap
@@ -17,32 +19,41 @@ export async function POST(req: NextRequest) {
   const jwtToken = await getToken({ user: "anonymous" })
   const blob = await req.blob()
-  const clap: ClapProject = await parseClap(blob)
-  if (!clap?.segments) { throw new Error(`no segment found in the provided clap!`) }
-  console.log(`[api/v1/edit/storyboards] detected ${clap.segments.length} segments`)
-  const shotsSegments: ClapSegment[] = clap.segments.filter(s => s.category === "camera")
   console.log(`[api/v1/edit/storyboards] detected ${shotsSegments.length} shots`)
   if (shotsSegments.length > 32) {
     throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
   }
   // we process the shots in parallel (this will increase the queue size in the Gradio spaces)
   await Promise.all(shotsSegments.map(shotSegment =>
     processShot({
       shotSegment,
-      clap
     })
   ))
   // console.log(`[api/v1/edit/storyboards] returning the clap augmented with storyboards`)
-  return new NextResponse(await serializeClap(clap), {
     status: 200,
     headers: new Headers({ "content-type": "application/x-gzip" }),
   })

 import { NextResponse, NextRequest } from "next/server"
+import queryString from "query-string"
+import { ClapProject, ClapSegment, newClap, parseClap, serializeClap } from "@aitube/clap"
 import { getToken } from "@/app/api/auth/getToken"
+import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
 import { processShot } from "./processShot"
 // a helper to generate storyboards for a Clap
   const jwtToken = await getToken({ user: "anonymous" })
+  const qs = queryString.parseUrl(req.url || "")
+  const query = (qs || {}).query
+  const mode = parseCompletionMode(query?.c)
   const blob = await req.blob()
+  const existingClap: ClapProject = await parseClap(blob)
+  if (!existingClap?.segments) { throw new Error(`no segment found in the provided clap!`) }
+  console.log(`[api/v1/edit/storyboards] detected ${existingClap.segments.length} segments`)
+  const shotsSegments: ClapSegment[] = existingClap.segments.filter(s => s.category === "camera")
   console.log(`[api/v1/edit/storyboards] detected ${shotsSegments.length} shots`)
   if (shotsSegments.length > 32) {
     throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
   }
+  const newerClap = mode === "full" ? existingClap : newClap()
   // we process the shots in parallel (this will increase the queue size in the Gradio spaces)
   await Promise.all(shotsSegments.map(shotSegment =>
     processShot({
       shotSegment,
+      existingClap,
+      newerClap,
+      mode,
     })
   ))
   // console.log(`[api/v1/edit/storyboards] returning the clap augmented with storyboards`)
+  return new NextResponse(await serializeClap(newerClap), {
     status: 200,
     headers: new Headers({ "content-type": "application/x-gzip" }),
   })

src/app/api/v1/edit/videos/processShot.ts CHANGED Viewed

@@ -5,18 +5,23 @@ import { getVideoPrompt } from "@aitube/engine"
 import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
 import { generateVideo } from "./generateVideo"
 export async function processShot({
   shotSegment,
-  clap
 }: {
   shotSegment: ClapSegment
-  clap: ClapProject
 }): Promise<void> {
   const shotSegments: ClapSegment[] = filterSegments(
     ClapSegmentFilteringMode.START,
     shotSegment,
-    clap.segments
   )
   const shotVideoSegments: ClapSegment[] = shotSegments.filter(s =>
@@ -40,8 +45,9 @@ export async function processShot({
       outputType: "video"
     })
     if (shotVideoSegment) {
-      clap.segments.push(shotVideoSegment)
     }
     console.log(`[api/edit/videos] processShot: generated video segment [${shotSegment.startTimeInMs}:${shotSegment.endTimeInMs}]`)
@@ -51,10 +57,15 @@ export async function processShot({
     throw new Error(`failed to generate a new segment`)
   }
   // TASK 2: GENERATE MISSING VIDEO PROMPT
   if (!shotVideoSegment?.prompt) {
     // video is missing, let's generate it
-    shotVideoSegment.prompt = getVideoPrompt(shotSegments, clap.entityIndex, ["high quality", "crisp", "detailed"])
     console.log(`[api/edit/videos] processShot: generating video prompt: ${shotVideoSegment.prompt}`)
   }
@@ -65,8 +76,8 @@ export async function processShot({
     try {
       shotVideoSegment.assetUrl = await generateVideo({
         prompt: getPositivePrompt(shotVideoSegment.prompt),
-        width: clap.meta.width,
-        height: clap.meta.height,
       })
       shotVideoSegment.assetSourceType = getClapAssetSourceType(shotVideoSegment.assetUrl)
     } catch (err) {
@@ -75,6 +86,13 @@ export async function processShot({
     }
     console.log(`[api/edit/videos] processShot: generated video files: ${shotVideoSegment?.assetUrl?.slice?.(0, 50)}...`)
   } else {
     console.log(`[api/edit/videos] processShot: there is already a video file: ${shotVideoSegment?.assetUrl?.slice?.(0, 50)}...`)
   }

 import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
 import { generateVideo } from "./generateVideo"
+import { ClapCompletionMode } from "../types"
 export async function processShot({
   shotSegment,
+  existingClap,
+  newerClap,
+  mode
 }: {
   shotSegment: ClapSegment
+  existingClap: ClapProject
+  newerClap: ClapProject
+  mode: ClapCompletionMode
 }): Promise<void> {
   const shotSegments: ClapSegment[] = filterSegments(
     ClapSegmentFilteringMode.START,
     shotSegment,
+    existingClap.segments
   )
   const shotVideoSegments: ClapSegment[] = shotSegments.filter(s =>
       outputType: "video"
     })
+    // we fix the existing clap
     if (shotVideoSegment) {
+      existingClap.segments.push(shotSegment)
     }
     console.log(`[api/edit/videos] processShot: generated video segment [${shotSegment.startTimeInMs}:${shotSegment.endTimeInMs}]`)
     throw new Error(`failed to generate a new segment`)
   }
   // TASK 2: GENERATE MISSING VIDEO PROMPT
   if (!shotVideoSegment?.prompt) {
     // video is missing, let's generate it
+    shotVideoSegment.prompt = getVideoPrompt(
+      shotSegments,
+      existingClap.entityIndex,
+      ["high quality", "crisp", "detailed"]
+    )
     console.log(`[api/edit/videos] processShot: generating video prompt: ${shotVideoSegment.prompt}`)
   }
     try {
       shotVideoSegment.assetUrl = await generateVideo({
         prompt: getPositivePrompt(shotVideoSegment.prompt),
+        width: existingClap.meta.width,
+        height: existingClap.meta.height,
       })
       shotVideoSegment.assetSourceType = getClapAssetSourceType(shotVideoSegment.assetUrl)
     } catch (err) {
     }
     console.log(`[api/edit/videos] processShot: generated video files: ${shotVideoSegment?.assetUrl?.slice?.(0, 50)}...`)
+    // if mode is full, newerClap already contains the ference to shotVideoSegment
+    // but if it's partial, we need to manually add it
+    if (mode === "partial") {
+      newerClap.segments.push(shotVideoSegment)
+    }
   } else {
     console.log(`[api/edit/videos] processShot: there is already a video file: ${shotVideoSegment?.assetUrl?.slice?.(0, 50)}...`)
   }

src/app/api/v1/edit/videos/route.ts CHANGED Viewed

@@ -1,11 +1,13 @@
 import { NextResponse, NextRequest } from "next/server"
-import { ClapProject, ClapSegment, parseClap, serializeClap } from "@aitube/clap"
 import { getToken } from "@/app/api/auth/getToken"
 import { processShot } from "./processShot"
 // a helper to generate videos for a Clap
 // this is mostly used by external apps such as the Stories Factory
@@ -18,32 +20,41 @@ export async function POST(req: NextRequest) {
   const jwtToken = await getToken({ user: "anonymous" })
   const blob = await req.blob()
-  const clap: ClapProject = await parseClap(blob)
-  if (!clap?.segments) { throw new Error(`no segment found in the provided clap!`) }
-  console.log(`[api/edit/videos] detected ${clap.segments.length} segments`)
-  const shotsSegments: ClapSegment[] = clap.segments.filter(s => s.category === "camera")
   console.log(`[api/edit/videos] detected ${shotsSegments.length} shots`)
   if (shotsSegments.length > 32) {
     throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
   }
   // we process the shots in parallel (this will increase the queue size in the Gradio spaces)
   await Promise.all(shotsSegments.map(shotSegment =>
     processShot({
       shotSegment,
-      clap
     })
   ))
   console.log(`[api/edit/videos] returning the clap augmented with videos`)
-  return new NextResponse(await serializeClap(clap), {
     status: 200,
     headers: new Headers({ "content-type": "application/x-gzip" }),
   })

 import { NextResponse, NextRequest } from "next/server"
+import queryString from "query-string"
+import { ClapProject, ClapSegment, newClap, parseClap, serializeClap } from "@aitube/clap"
 import { getToken } from "@/app/api/auth/getToken"
 import { processShot } from "./processShot"
+import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
 // a helper to generate videos for a Clap
 // this is mostly used by external apps such as the Stories Factory
   const jwtToken = await getToken({ user: "anonymous" })
+  const qs = queryString.parseUrl(req.url || "")
+  const query = (qs || {}).query
+  const mode = parseCompletionMode(query?.c)
   const blob = await req.blob()
+  const existingClap: ClapProject = await parseClap(blob)
+  if (!existingClap?.segments) { throw new Error(`no segment found in the provided clap!`) }
+  console.log(`[api/edit/videos] detected ${existingClap.segments.length} segments`)
+  const shotsSegments: ClapSegment[] = existingClap.segments.filter(s => s.category === "camera")
   console.log(`[api/edit/videos] detected ${shotsSegments.length} shots`)
   if (shotsSegments.length > 32) {
     throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
   }
+  const newerClap = mode === "full" ? existingClap : newClap()
   // we process the shots in parallel (this will increase the queue size in the Gradio spaces)
   await Promise.all(shotsSegments.map(shotSegment =>
     processShot({
       shotSegment,
+      existingClap,
+      newerClap,
+      mode
     })
   ))
   console.log(`[api/edit/videos] returning the clap augmented with videos`)
+  return new NextResponse(await serializeClap(newerClap), {
     status: 200,
     headers: new Headers({ "content-type": "application/x-gzip" }),
   })

src/app/api/v1/search/index.ts CHANGED Viewed

@@ -3,7 +3,7 @@
 import YAML from "yaml"
 import { predict } from "@/app/api/providers/huggingface/predictWithHuggingFace"
-import { parseRawStringToYAML } from "@/app/api/utils/parseRawStringToYAML"
 import { systemPromptForBasicSearchResults, systemPromptForExtendedSearchResults } from "./systemPrompt"
 import type { BasicSearchResult, ExtendedSearchResult } from "./types"

 import YAML from "yaml"
 import { predict } from "@/app/api/providers/huggingface/predictWithHuggingFace"
+import { parseRawStringToYAML } from "@/app/api/parsers/parseRawStringToYAML"
 import { systemPromptForBasicSearchResults, systemPromptForExtendedSearchResults } from "./systemPrompt"
 import type { BasicSearchResult, ExtendedSearchResult } from "./types"

src/app/api/v1/search/route.ts CHANGED Viewed

@@ -2,6 +2,9 @@ import { NextResponse, NextRequest } from "next/server"
 import queryString from "query-string"
 import { BasicSearchResult, ExtendedSearchResult } from "./types"
 import { extend, search } from "."
 export type LatentSearchMode =
   | "basic"
@@ -13,18 +16,11 @@ export async function GET(req: NextRequest, res: NextResponse) {
   const qs = queryString.parseUrl(req.url || "")
   const query = (qs || {}).query
-  let mode: LatentSearchMode = "basic"
-  try {
-    mode = decodeURIComponent(query?.m?.toString() || "basic").trim() as LatentSearchMode
-  } catch (err) {}
   if (mode === "basic") {
-    let prompt = ""
-    try {
-      prompt = decodeURIComponent(query?.p?.toString() || "").trim() as string
-    } catch (err) {}
     const basicSearchResults: BasicSearchResult[] = await search({
       prompt,
       nbResults: 4
@@ -39,14 +35,7 @@ export async function GET(req: NextRequest, res: NextResponse) {
     })
   } else if (mode === "extended") {
-    let basicResults: BasicSearchResult[] = []
-    try {
-      const rawString = decodeURIComponent(query?.e?.toString() || "").trim() as string
-      const maybeExistingResults = JSON.parse(rawString)
-      if (Array.isArray(maybeExistingResults)) {
-        basicResults = maybeExistingResults
-      }
-    } catch (err) {}
     const extendedSearchResults: ExtendedSearchResult[] = await extend({
       basicResults

 import queryString from "query-string"
 import { BasicSearchResult, ExtendedSearchResult } from "./types"
 import { extend, search } from "."
+import { parsePrompt } from "../../parsers/parsePrompt"
+import { parseLatentSearchMode } from "../../parsers/parseLatentSearchMode"
+import { parseBasicSearchResult } from "../../parsers/parseBasicSearchResults"
 export type LatentSearchMode =
   | "basic"
   const qs = queryString.parseUrl(req.url || "")
   const query = (qs || {}).query
+  const mode = parseLatentSearchMode(query?.m)
   if (mode === "basic") {
+    const prompt = parsePrompt(query?.p)
     const basicSearchResults: BasicSearchResult[] = await search({
       prompt,
       nbResults: 4
     })
   } else if (mode === "extended") {
+    const basicResults = parseBasicSearchResult(query?.e)
     const extendedSearchResults: ExtendedSearchResult[] = await extend({
       basicResults

src/app/latent/watch/page.tsx CHANGED Viewed

@@ -1,14 +1,13 @@
-import { encode, decode } from 'js-base64'
 import { LatentQueryProps } from "@/types/general"
-import { BasicSearchResult, ExtendedSearchResult } from "@/app/api/v1/search/types"
 import { Main } from "../../main"
 import { getNewMediaInfo } from "../../api/generators/search/getNewMediaInfo"
 import { getToken } from "../../api/auth/getToken"
-import { extend } from "@/app/api/v1/search"
 // https://jmswrnr.com/blog/protecting-next-js-api-routes-query-parameters
 export default async function DreamPage({
@@ -19,8 +18,8 @@ export default async function DreamPage({
 }: LatentQueryProps) {
   const jwtToken = await getToken({ user: "anonymous" })
   console.log(`[/latent/watch] prompt =`, prompt)
-  let basicResult = JSON.parse(decode(`${prompt || ""}`)) as BasicSearchResult
   console.log("[/latent/watch] basicResult:", basicResult)
   // note that we should generate a longer synopsis from the autocomplete result

 import { LatentQueryProps } from "@/types/general"
+import { ExtendedSearchResult } from "@/app/api/v1/search/types"
+import { extend } from "@/app/api/v1/search"
+import { parseBasicSearchResult } from '@/app/api/parsers/parseBasicSearchResult'
 import { Main } from "../../main"
 import { getNewMediaInfo } from "../../api/generators/search/getNewMediaInfo"
 import { getToken } from "../../api/auth/getToken"
 // https://jmswrnr.com/blog/protecting-next-js-api-routes-query-parameters
 export default async function DreamPage({
 }: LatentQueryProps) {
   const jwtToken = await getToken({ user: "anonymous" })
   console.log(`[/latent/watch] prompt =`, prompt)
+  const basicResult = parseBasicSearchResult(prompt)
   console.log("[/latent/watch] basicResult:", basicResult)
   // note that we should generate a longer synopsis from the autocomplete result

src/app/main.tsx CHANGED Viewed

@@ -81,7 +81,7 @@ export function Main({
   const setPublicTracks = useStore(s => s.setPublicTracks)
   const setPublicTrack = useStore(s => s.setPublicTrack)
-  console.log("[main.tsx] latentMedia = ", latentMedia)
   useEffect(() => {
     if (typeof jwtToken !== "string" && !jwtToken) { return }

   const setPublicTracks = useStore(s => s.setPublicTracks)
   const setPublicTrack = useStore(s => s.setPublicTrack)
+  // console.log("[main.tsx] latentMedia = ", latentMedia)
   useEffect(() => {
     if (typeof jwtToken !== "string" && !jwtToken) { return }

src/app/views/user-channel-view/index.tsx CHANGED Viewed

@@ -15,10 +15,10 @@ import { Button } from "@/components/ui/button"
 import { submitVideoRequest } from "@/app/api/actions/submitVideoRequest"
 import { PendingVideoList } from "@/components/interface/pending-video-list"
 import { getChannelVideos } from "@/app/api/actions/ai-tube-hf/getChannelVideos"
-import { parseVideoModelName } from "@/app/api/utils/parseVideoModelName"
 import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select"
 import { defaultVideoModel, defaultVideoOrientation, defaultVoice } from "@/app/config"
-import { parseVideoOrientation } from "@/app/api/utils/parseVideoOrientation"
 export function UserChannelView() {
   const [_isPending, startTransition] = useTransition()

 import { submitVideoRequest } from "@/app/api/actions/submitVideoRequest"
 import { PendingVideoList } from "@/components/interface/pending-video-list"
 import { getChannelVideos } from "@/app/api/actions/ai-tube-hf/getChannelVideos"
+import { parseVideoModelName } from "@/app/api/parsers/parseVideoModelName"
 import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select"
 import { defaultVideoModel, defaultVideoOrientation, defaultVoice } from "@/app/config"
+import { parseVideoOrientation } from "@/app/api/parsers/parseVideoOrientation"
 export function UserChannelView() {
   const [_isPending, startTransition] = useTransition()

src/lib/utils/parseMediaProjectionType.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { parseProjectionFromLoRA } from "@/app/api/utils/parseProjectionFromLoRA"
 import { MediaInfo, MediaProjection } from "@/types/general"
 import { parseAssetToCheckIfGaussian } from "./parseAssetToCheckIfGaussian"

+import { parseProjectionFromLoRA } from "@/app/api/parsers/parseProjectionFromLoRA"
 import { MediaInfo, MediaProjection } from "@/types/general"
 import { parseAssetToCheckIfGaussian } from "./parseAssetToCheckIfGaussian"