jbilcke-hf HF staff commited on
Commit
8101ed0
1 Parent(s): fddab62

yep, it works

Browse files
src/app/api/generators/search/searchResultToMediaInfo.ts CHANGED
@@ -87,6 +87,7 @@ export async function searchResultToMediaInfo(searchResult: LatentSearchResult):
87
  prompt: searchResult.thumbnail,
88
  negativePrompt: "",
89
  nbFrames: 1,
 
90
  nbSteps: 4,
91
  width: 1024,
92
  height: 576,
 
87
  prompt: searchResult.thumbnail,
88
  negativePrompt: "",
89
  nbFrames: 1,
90
+ nbFPS: 1,
91
  nbSteps: 4,
92
  width: 1024,
93
  height: 576,
src/app/api/providers/videochain/renderWithVideoChain.ts CHANGED
@@ -11,6 +11,7 @@ export async function newRender({
11
  prompt,
12
  negativePrompt,
13
  nbFrames,
 
14
  nbSteps,
15
  width,
16
  height,
@@ -21,6 +22,7 @@ export async function newRender({
21
  prompt: string
22
  negativePrompt: string
23
  nbFrames: number
 
24
  nbSteps: number
25
  width: number
26
  height: number
@@ -64,6 +66,7 @@ export async function newRender({
64
  negativePrompt,
65
  // nbFrames: 8 and nbSteps: 15 --> ~10 sec generation
66
  nbFrames, // when nbFrames is 1, we will only generate static images
 
67
  nbSteps, // 20 = fast, 30 = better, 50 = best
68
  width,
69
  height,
@@ -72,8 +75,11 @@ export async function newRender({
72
  segmentation: "disabled", // one day we will remove this param, to make it automatic
73
  upscalingFactor: 1, // let's disable upscaling right now
74
  turbo, // always use turbo mode (it's for images only anyway)
 
75
  // also what could be done iw that we could use the width and height to control this
76
  cache: "ignore", // shouldRenewCache ? "renew" : "use",
 
 
77
  wait: true,
78
  } as Partial<RenderRequest>),
79
  cache: 'no-store',
 
11
  prompt,
12
  negativePrompt,
13
  nbFrames,
14
+ nbFPS,
15
  nbSteps,
16
  width,
17
  height,
 
22
  prompt: string
23
  negativePrompt: string
24
  nbFrames: number
25
+ nbFPS: number
26
  nbSteps: number
27
  width: number
28
  height: number
 
66
  negativePrompt,
67
  // nbFrames: 8 and nbSteps: 15 --> ~10 sec generation
68
  nbFrames, // when nbFrames is 1, we will only generate static images
69
+ nbFPS,
70
  nbSteps, // 20 = fast, 30 = better, 50 = best
71
  width,
72
  height,
 
75
  segmentation: "disabled", // one day we will remove this param, to make it automatic
76
  upscalingFactor: 1, // let's disable upscaling right now
77
  turbo, // always use turbo mode (it's for images only anyway)
78
+
79
  // also what could be done iw that we could use the width and height to control this
80
  cache: "ignore", // shouldRenewCache ? "renew" : "use",
81
+
82
+ // this is the "blocking mode", which help us having to poll the server
83
  wait: true,
84
  } as Partial<RenderRequest>),
85
  cache: 'no-store',
src/app/api/resolvers/image/route.ts CHANGED
@@ -4,6 +4,8 @@ import queryString from "query-string"
4
  import { newRender, getRender } from "../../providers/videochain/renderWithVideoChain"
5
  import { generateSeed } from "@/lib/utils/generateSeed"
6
  import { sleep } from "@/lib/utils/sleep"
 
 
7
 
8
  export async function GET(req: NextRequest) {
9
 
@@ -18,12 +20,21 @@ let prompt = ""
18
  return NextResponse.json({ error: 'no prompt provided' }, { status: 400 });
19
  }
20
 
 
 
 
 
 
 
21
  // console.log("calling await newRender")
 
 
22
 
23
  let render = await newRender({
24
- prompt: `${prompt}, cinematic, photo, sublime, pro quality, sharp, crisp, beautiful, impressive, amazing, high quality, 4K`,
25
- negativePrompt: "logo, text, ui, hud, interface, buttons, ad, signature, copyright, blurry, cropped, bad quality",
26
  nbFrames: 1,
 
27
  nbSteps: 8,
28
  width: 1024,
29
  height: 576,
@@ -36,11 +47,23 @@ let prompt = ""
36
 
37
  while (attempts-- > 0) {
38
  if (render.status === "completed") {
39
- return NextResponse.json(render, {
40
- status: 200,
41
- statusText: "OK",
42
- })
43
-
 
 
 
 
 
 
 
 
 
 
 
 
44
  }
45
 
46
  if (render.status === "error") {
@@ -50,9 +73,9 @@ let prompt = ""
50
  })
51
  }
52
 
53
- await sleep(1000) // minimum wait time
54
 
55
- console.log("asking getRender")
56
  render = await getRender(render.renderId)
57
  }
58
 
 
4
  import { newRender, getRender } from "../../providers/videochain/renderWithVideoChain"
5
  import { generateSeed } from "@/lib/utils/generateSeed"
6
  import { sleep } from "@/lib/utils/sleep"
7
+ import { getNegativePrompt, getPositivePrompt } from "../../utils/imagePrompts"
8
+ import { getContentType } from "@/lib/data/getContentType"
9
 
10
  export async function GET(req: NextRequest) {
11
 
 
20
  return NextResponse.json({ error: 'no prompt provided' }, { status: 400 });
21
  }
22
 
23
+ let format = "binary"
24
+ try {
25
+ const f = decodeURIComponent(query?.f?.toString() || "").trim()
26
+ if (f === "json" || f === "binary") { format = f }
27
+ } catch (err) {}
28
+
29
  // console.log("calling await newRender")
30
+ prompt = getPositivePrompt(prompt)
31
+ const negativePrompt = getNegativePrompt()
32
 
33
  let render = await newRender({
34
+ prompt,
35
+ negativePrompt,
36
  nbFrames: 1,
37
+ nbFPS: 1,
38
  nbSteps: 8,
39
  width: 1024,
40
  height: 576,
 
47
 
48
  while (attempts-- > 0) {
49
  if (render.status === "completed") {
50
+ if (format === "json") {
51
+ return NextResponse.json(render, {
52
+ status: 200,
53
+ statusText: "OK",
54
+ })
55
+ } else {
56
+ const contentType = getContentType(render.assetUrl)
57
+ const base64String = render.assetUrl.split(";base64,").pop() || ""
58
+ const data = Buffer.from(base64String, "base64")
59
+ const headers = new Headers()
60
+ headers.set('Content-Type', contentType)
61
+ return new NextResponse(data, {
62
+ status: 200,
63
+ statusText: "OK",
64
+ headers
65
+ })
66
+ }
67
  }
68
 
69
  if (render.status === "error") {
 
73
  })
74
  }
75
 
76
+ await sleep(2000) // minimum wait time
77
 
78
+ // console.log("asking getRender")
79
  render = await getRender(render.renderId)
80
  }
81
 
src/app/api/resolvers/video/route.ts CHANGED
@@ -4,44 +4,101 @@ import queryString from "query-string"
4
  import { newRender, getRender } from "../../providers/videochain/renderWithVideoChain"
5
  import { generateSeed } from "@/lib/utils/generateSeed"
6
  import { sleep } from "@/lib/utils/sleep"
 
7
  import { getContentType } from "@/lib/data/getContentType"
8
 
9
  export async function GET(req: NextRequest) {
10
 
11
- const qs = queryString.parseUrl(req.url || "")
12
- const query = (qs || {}).query
13
 
14
- let prompt = ""
 
15
  try {
16
  prompt = decodeURIComponent(query?.p?.toString() || "").trim()
17
  } catch (err) {}
 
18
  if (!prompt) {
19
  return NextResponse.json({ error: 'no prompt provided' }, { status: 400 });
20
  }
21
 
 
 
 
 
 
 
 
 
 
22
  // console.log("calling await newRender")
23
 
24
  let render = await newRender({
25
- prompt: `${prompt}, cinematic, photo, sublime, pro quality, sharp, crisp, beautiful, impressive, amazing, high quality, 4K`,
26
- negativePrompt: "logo, text, ui, hud, interface, buttons, ad, signature, copyright, blurry, cropped, bad quality",
27
- nbFrames: 1,
28
- nbSteps: 8,
29
- width: 1024,
30
- height: 576,
31
- turbo: true,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  shouldRenewCache: true,
33
  seed: generateSeed()
34
  })
35
 
36
- let attempts = 20
37
 
38
  while (attempts-- > 0) {
39
  if (render.status === "completed") {
40
- return NextResponse.json(render, {
41
- status: 200,
42
- statusText: "OK",
43
- })
44
-
 
 
 
 
 
 
 
 
 
 
 
 
45
  }
46
 
47
  if (render.status === "error") {
@@ -51,9 +108,9 @@ let prompt = ""
51
  })
52
  }
53
 
54
- await sleep(2000) // minimum wait time
55
 
56
- console.log("asking getRender")
57
  render = await getRender(render.renderId)
58
  }
59
 
 
4
  import { newRender, getRender } from "../../providers/videochain/renderWithVideoChain"
5
  import { generateSeed } from "@/lib/utils/generateSeed"
6
  import { sleep } from "@/lib/utils/sleep"
7
+ import { getNegativePrompt, getPositivePrompt } from "../../utils/imagePrompts"
8
  import { getContentType } from "@/lib/data/getContentType"
9
 
10
  export async function GET(req: NextRequest) {
11
 
12
+ const qs = queryString.parseUrl(req.url || "")
13
+ const query = (qs || {}).query
14
 
15
+
16
+ let prompt = ""
17
  try {
18
  prompt = decodeURIComponent(query?.p?.toString() || "").trim()
19
  } catch (err) {}
20
+
21
  if (!prompt) {
22
  return NextResponse.json({ error: 'no prompt provided' }, { status: 400 });
23
  }
24
 
25
+ let format = "binary"
26
+ try {
27
+ const f = decodeURIComponent(query?.f?.toString() || "").trim()
28
+ if (f === "json" || f === "binary") { format = f }
29
+ } catch (err) {}
30
+
31
+ prompt = getPositivePrompt(prompt)
32
+ const negativePrompt = getNegativePrompt()
33
+
34
  // console.log("calling await newRender")
35
 
36
  let render = await newRender({
37
+ prompt,
38
+ negativePrompt,
39
+
40
+ // ATTENTION: changing those will slow things to 5-6s of loading time (compared to 3-4s)
41
+ // and with no real visible change
42
+
43
+ nbFrames: 20, // apparently the model can only do 2 seconds at 10, so be it
44
+
45
+ nbFPS: 10,
46
+
47
+ // possibles values are 1, 2, 4, and 8
48
+ // but I don't see much improvements with 8 to be honest
49
+ // the best one seems to be 4
50
+ nbSteps: 4,
51
+
52
+ // this corresponds roughly to 16:9
53
+ // which is the aspect ratio video used by AiTube
54
+
55
+ // unfortunately, this is too compute intensive, so we have to take half of that
56
+ // width: 1024,
57
+ // height: 576,
58
+
59
+ // IMPORTANT: since we use the tailwind class aspect-video,
60
+ // you cannot use use anything here!
61
+ // this must be aligned with whatever you choose in the frontend UI
62
+ //
63
+ // if you don't do this:
64
+ // - that's pixel waste, you are rendering some area for nothing (and on this project each pixel is a precious nanosecond)
65
+ // - clicks won't be aligned with the video, so segmentation will be off
66
+ // eg you cannot use 1024x512 or 512x256, because that's not aspect-video
67
+ // (you would have to create an entry in the tailwind config to do that properly)
68
+ //
69
+ // that's not the only constraint: you also need to respect this:
70
+ // `height` and `width` have to be divisible by 8 (use 32 to be safe)
71
+ // width: 512,
72
+ // height: 288,
73
+ width: 456, // 512,
74
+ height: 256, // 288,
75
+
76
+ turbo: true, // without much effect for videos as of now, as we only supports turbo (AnimateDiff Lightning)
77
  shouldRenewCache: true,
78
  seed: generateSeed()
79
  })
80
 
81
+ let attempts = 10
82
 
83
  while (attempts-- > 0) {
84
  if (render.status === "completed") {
85
+ if (format === "json") {
86
+ return NextResponse.json(render, {
87
+ status: 200,
88
+ statusText: "OK",
89
+ })
90
+ } else {
91
+ const contentType = getContentType(render.assetUrl)
92
+ const base64String = render.assetUrl.split(";base64,").pop() || ""
93
+ const data = Buffer.from(base64String, "base64")
94
+ const headers = new Headers()
95
+ headers.set('Content-Type', contentType)
96
+ return new NextResponse(data, {
97
+ status: 200,
98
+ statusText: "OK",
99
+ headers
100
+ })
101
+ }
102
  }
103
 
104
  if (render.status === "error") {
 
108
  })
109
  }
110
 
111
+ await sleep(1000) // minimum wait time
112
 
113
+ // console.log("asking getRender")
114
  render = await getRender(render.renderId)
115
  }
116
 
src/app/api/utils/imagePrompts.ts ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // should we really have default prompts in here?
2
+ // I think they should probably be defined at the applicative software layer (ie. in the client)
3
+
4
+ export function addWordsIfNotPartOfThePrompt(prompt: string = "", words: string[] = []): string {
5
+ const promptWords = prompt.split(",").map(w => w.trim().toLocaleLowerCase())
6
+
7
+ return [
8
+ prompt,
9
+ // we add our keywords, unless they are already part of the prompt
10
+ ...words.filter(w => !promptWords.includes(w.toLocaleLowerCase()))
11
+ ].join(", ")
12
+ }
13
+
14
+ export function getPositivePrompt(prompt: string = "", triggerWord = "") {
15
+ return addWordsIfNotPartOfThePrompt(prompt, [
16
+ triggerWord,
17
+ "sublime",
18
+ "pro quality",
19
+ "sharp",
20
+ "crisp",
21
+ "beautiful",
22
+ "impressive",
23
+ "amazing",
24
+ "4K",
25
+ "hd"
26
+ ])
27
+ }
28
+
29
+ export function getNegativePrompt(prompt: string = "") {
30
+ return addWordsIfNotPartOfThePrompt(prompt, [
31
+ "cropped",
32
+ // "underexposed", // <-- can be a desired style
33
+ // "overexposed", // <-- can be a desired style
34
+ "logo",
35
+ "hud",
36
+ "ui",
37
+ "censored",
38
+ "blurry",
39
+ "watermark",
40
+ "watermarked",
41
+ "copyright",
42
+ "extra digit",
43
+ "fewer digits",
44
+ "bad fingers",
45
+ "bad quality",
46
+ "worst quality",
47
+ "low quality",
48
+ "low resolution",
49
+ "glitch", // <-- keep or not? could be a desired style?
50
+ // "deformed",
51
+ // "mutated",
52
+ // "ugly",
53
+ // "disfigured",
54
+ // "3D render", // <-- keep or not? could be a desired style?
55
+ "signature"
56
+ ])
57
+ }
src/components/interface/latent-engine/components/content-layer/index.tsx CHANGED
@@ -26,7 +26,7 @@ export const ContentLayer = forwardRef(function ContentLayer({
26
  ref={ref}
27
  onClick={onClick}
28
  >
29
- <div className="h-full aspect-video">
30
  {children}
31
  </div>
32
  </div>
 
26
  ref={ref}
27
  onClick={onClick}
28
  >
29
+ <div className="h-full aspect-video opacity-60">
30
  {children}
31
  </div>
32
  </div>
src/components/interface/latent-engine/core/engine.tsx CHANGED
@@ -31,6 +31,9 @@ function LatentEngine({
31
  const setVideoElement = useLatentEngine(s => s.setVideoElement)
32
  const setSegmentationElement = useLatentEngine(s => s.setSegmentationElement)
33
 
 
 
 
34
  const streamType = useLatentEngine(s => s.streamType)
35
  const isStatic = useLatentEngine(s => s.isStatic)
36
  const isLive = useLatentEngine(s => s.isLive)
@@ -254,15 +257,17 @@ function LatentEngine({
254
 
255
  {/* right-side buttons */}
256
  <div className={cn(`
257
- flex flex-none
258
  items-center justify-center
259
- w-14 h-full
260
  `)}>
261
  {/*
262
 
263
  TODO: put a fullscreen button (and mode) here
264
 
265
  */}
 
 
266
  </div>
267
  </div>
268
  </div>
 
31
  const setVideoElement = useLatentEngine(s => s.setVideoElement)
32
  const setSegmentationElement = useLatentEngine(s => s.setSegmentationElement)
33
 
34
+ const simulationVideoPlaybackFPS = useLatentEngine(s => s.simulationVideoPlaybackFPS)
35
+ const simulationRenderingTimeFPS = useLatentEngine(s => s.simulationRenderingTimeFPS)
36
+
37
  const streamType = useLatentEngine(s => s.streamType)
38
  const isStatic = useLatentEngine(s => s.isStatic)
39
  const isLive = useLatentEngine(s => s.isLive)
 
257
 
258
  {/* right-side buttons */}
259
  <div className={cn(`
260
+ flex flex-none flex-row space-x-2
261
  items-center justify-center
262
+ w-32 h-full
263
  `)}>
264
  {/*
265
 
266
  TODO: put a fullscreen button (and mode) here
267
 
268
  */}
269
+ <div className="mono text-xs text-center">playback: {Math.round(simulationVideoPlaybackFPS * 100) / 100} FPS</div>
270
+ <div className="mono text-xs text-center">rendering: {Math.round(simulationRenderingTimeFPS * 100) / 100} FPS</div>
271
  </div>
272
  </div>
273
  </div>
src/components/interface/latent-engine/core/types.ts CHANGED
@@ -53,6 +53,11 @@ export type LatentEngineStore = {
53
 
54
  simulationPromise?: Promise<void>
55
  simulationPending: boolean // used as a "lock"
 
 
 
 
 
56
 
57
  renderingIntervalId: NodeJS.Timeout | string | number | undefined
58
  renderingIntervalDelayInMs: number
 
53
 
54
  simulationPromise?: Promise<void>
55
  simulationPending: boolean // used as a "lock"
56
+ simulationStartedAt: number
57
+ simulationEndedAt: number
58
+ simulationDurationInMs: number
59
+ simulationVideoPlaybackFPS: number
60
+ simulationRenderingTimeFPS: number
61
 
62
  renderingIntervalId: NodeJS.Timeout | string | number | undefined
63
  renderingIntervalDelayInMs: number
src/components/interface/latent-engine/resolvers/image/generateImage.ts CHANGED
@@ -1,15 +1,7 @@
1
- import { RenderedScene } from "@/types/general"
2
-
3
  export async function generateImage(prompt: string): Promise<string> {
4
  const requestUri = `/api/resolvers/image?p=${encodeURIComponent(prompt)}`
5
-
6
  const res = await fetch(requestUri)
7
-
8
- const scene = (await res.json()) as RenderedScene
9
-
10
- if (scene.error || scene.status !== "completed") {
11
- throw new Error(scene.error)
12
- }
13
-
14
- return scene.assetUrl
15
  }
 
 
 
1
  export async function generateImage(prompt: string): Promise<string> {
2
  const requestUri = `/api/resolvers/image?p=${encodeURIComponent(prompt)}`
 
3
  const res = await fetch(requestUri)
4
+ const blob = await res.blob()
5
+ const url = URL.createObjectURL(blob)
6
+ return url
 
 
 
 
 
7
  }
src/components/interface/latent-engine/resolvers/video/generateVideo.ts CHANGED
@@ -1,17 +1,8 @@
1
- import { RenderedScene } from "@/types/general"
2
-
3
  export async function generateVideo(prompt: string): Promise<string> {
4
  const requestUri = `/api/resolvers/video?p=${encodeURIComponent(prompt)}`
5
-
6
- // console.log(`generateVideo: calling ${requestUri}`)
7
-
8
  const res = await fetch(requestUri)
 
 
 
9
 
10
- const scene = (await res.json()) as RenderedScene
11
-
12
- if (scene.error || scene.status !== "completed") {
13
- throw new Error(scene.error)
14
- }
15
-
16
- return scene.assetUrl
17
  }
 
 
 
1
  export async function generateVideo(prompt: string): Promise<string> {
2
  const requestUri = `/api/resolvers/video?p=${encodeURIComponent(prompt)}`
 
 
 
3
  const res = await fetch(requestUri)
4
+ const blob = await res.blob()
5
+ const url = URL.createObjectURL(blob)
6
+ return url
7
 
 
 
 
 
 
 
 
8
  }
src/components/interface/latent-engine/resolvers/video/index.tsx CHANGED
@@ -23,6 +23,18 @@ export async function resolve(segment: ClapSegment, clap: ClapProject): Promise<
23
  // note: the latent-video class is not used for styling, but to grab the component
24
  // from JS when we need to segment etc
25
  return (
26
- <img className="latent-video object-cover" src={assetUrl} />
 
 
 
 
 
 
 
 
 
 
 
 
27
  )
28
  }
 
23
  // note: the latent-video class is not used for styling, but to grab the component
24
  // from JS when we need to segment etc
25
  return (
26
+ <video
27
+ loop
28
+ className="latent-video object-cover h-full"
29
+ playsInline
30
+
31
+ // muted needs to be enabled for iOS to properly autoplay
32
+ muted
33
+ autoPlay
34
+
35
+ // we hide the controls
36
+ // controls
37
+ src={assetUrl}>
38
+ </video>
39
  )
40
  }
src/components/interface/latent-engine/store/useLatentEngine.ts CHANGED
@@ -39,6 +39,11 @@ export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
39
 
40
  simulationPromise: undefined,
41
  simulationPending: false,
 
 
 
 
 
42
 
43
  renderingIntervalId: undefined,
44
  renderingIntervalDelayInMs: 2000, // 2 sec
@@ -174,7 +179,7 @@ export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
174
  console.log("onClickOnSegmentationLayer")
175
  }
176
  // TODO use the videoElement if this is is video!
177
- if (!imageElement) { return }
178
 
179
  const box = event.currentTarget.getBoundingClientRect()
180
 
@@ -183,17 +188,17 @@ export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
183
 
184
  const x = px / box.width
185
  const y = py / box.height
186
- console.log(`onClickOnSegmentationLayer: user clicked on `, { x, y, px, py, box, imageElement })
187
 
188
  const fn = async () => {
189
- const results: InteractiveSegmenterResult = await segmentFrame(imageElement, x, y)
190
  get().processClickOnSegment(results)
191
  }
192
  fn()
193
  },
194
 
195
  togglePlayPause: (): boolean => {
196
- const { isLoaded, isPlaying, renderingIntervalId } = get()
197
  if (!isLoaded) { return false }
198
 
199
  const newValue = !isPlaying
@@ -201,11 +206,25 @@ export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
201
  clearInterval(renderingIntervalId)
202
 
203
  if (newValue) {
 
 
 
 
 
 
 
204
  set({
205
  isPlaying: true,
206
  renderingIntervalId: setTimeout(() => { get().runRenderingLoop() }, 0)
207
  })
208
  } else {
 
 
 
 
 
 
 
209
  set({ isPlaying: false })
210
  }
211
 
@@ -264,6 +283,7 @@ export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
264
 
265
  set({
266
  simulationPending: true,
 
267
  })
268
 
269
  try {
@@ -284,7 +304,7 @@ export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
284
  }
285
  */
286
 
287
- await sleep(500)
288
 
289
  // note: since we are asynchronous, we need to regularly check if
290
  // the user asked to pause the system or no
@@ -292,6 +312,8 @@ export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
292
  // console.log(`runSimulationLoop: rendering video content layer..`)
293
  // we only grab the first one
294
 
 
 
295
  const videoLayer = (await resolveSegments(clap, "video", 1)).at(0)
296
 
297
  if (get().isPlaying) {
@@ -302,6 +324,12 @@ export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
302
 
303
  const { videoElement, imageElement, segmentationElement } = get()
304
 
 
 
 
 
 
 
305
  const canvas = drawSegmentation({
306
  // no mask means this will effectively clear the canvas
307
  canvas: segmentationElement,
@@ -309,7 +337,7 @@ export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
309
  })
310
 
311
 
312
- console.log(`runSimulationLoop: rendered video content layer`)
313
  }
314
  }
315
 
@@ -319,7 +347,7 @@ export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
319
 
320
  try {
321
  if (get().isPlaying) {
322
- console.log(`runSimulationLoop: rendering UI layer..`)
323
 
324
  // note: for now we only display one element, to avoid handing a list of html elements
325
  const interfaceLayer = (await resolveSegments(clap, "interface", 1)).at(0)
@@ -328,15 +356,30 @@ export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
328
  interfaceLayer
329
  })
330
 
331
- console.log(`runSimulationLoop: rendered UI layer`)
332
  }
333
  }
334
  } catch (err) {
335
  console.error(`runSimulationLoop failed to render UI layer ${err}`)
336
  }
337
 
 
 
 
 
 
 
 
 
 
 
 
338
  set({
339
  simulationPending: false,
 
 
 
 
340
  })
341
  },
342
 
 
39
 
40
  simulationPromise: undefined,
41
  simulationPending: false,
42
+ simulationStartedAt: performance.now(),
43
+ simulationEndedAt: performance.now(),
44
+ simulationDurationInMs: 0,
45
+ simulationVideoPlaybackFPS: 0,
46
+ simulationRenderingTimeFPS: 0,
47
 
48
  renderingIntervalId: undefined,
49
  renderingIntervalDelayInMs: 2000, // 2 sec
 
179
  console.log("onClickOnSegmentationLayer")
180
  }
181
  // TODO use the videoElement if this is is video!
182
+ if (!videoElement) { return }
183
 
184
  const box = event.currentTarget.getBoundingClientRect()
185
 
 
188
 
189
  const x = px / box.width
190
  const y = py / box.height
191
+ console.log(`onClickOnSegmentationLayer: user clicked on `, { x, y, px, py, box, videoElement })
192
 
193
  const fn = async () => {
194
+ const results: InteractiveSegmenterResult = await segmentFrame(videoElement, x, y)
195
  get().processClickOnSegment(results)
196
  }
197
  fn()
198
  },
199
 
200
  togglePlayPause: (): boolean => {
201
+ const { isLoaded, isPlaying, renderingIntervalId, videoElement } = get()
202
  if (!isLoaded) { return false }
203
 
204
  const newValue = !isPlaying
 
206
  clearInterval(renderingIntervalId)
207
 
208
  if (newValue) {
209
+ if (videoElement) {
210
+ try {
211
+ videoElement.play()
212
+ } catch (err) {
213
+ console.error(`togglePlayPause: failed to start the video (${err})`)
214
+ }
215
+ }
216
  set({
217
  isPlaying: true,
218
  renderingIntervalId: setTimeout(() => { get().runRenderingLoop() }, 0)
219
  })
220
  } else {
221
+ if (videoElement) {
222
+ try {
223
+ videoElement.pause()
224
+ } catch (err) {
225
+ console.error(`togglePlayPause: failed to pause the video (${err})`)
226
+ }
227
+ }
228
  set({ isPlaying: false })
229
  }
230
 
 
283
 
284
  set({
285
  simulationPending: true,
286
+ simulationStartedAt: performance.now(),
287
  })
288
 
289
  try {
 
304
  }
305
  */
306
 
307
+ // await sleep(500)
308
 
309
  // note: since we are asynchronous, we need to regularly check if
310
  // the user asked to pause the system or no
 
312
  // console.log(`runSimulationLoop: rendering video content layer..`)
313
  // we only grab the first one
314
 
315
+
316
+
317
  const videoLayer = (await resolveSegments(clap, "video", 1)).at(0)
318
 
319
  if (get().isPlaying) {
 
324
 
325
  const { videoElement, imageElement, segmentationElement } = get()
326
 
327
+ if (videoElement) {
328
+ // yes, it is a very a dirty trick
329
+ // yes, it will look back
330
+ videoElement.defaultPlaybackRate = 0.5
331
+ }
332
+
333
  const canvas = drawSegmentation({
334
  // no mask means this will effectively clear the canvas
335
  canvas: segmentationElement,
 
337
  })
338
 
339
 
340
+ // console.log(`runSimulationLoop: rendered video content layer`)
341
  }
342
  }
343
 
 
347
 
348
  try {
349
  if (get().isPlaying) {
350
+ // console.log(`runSimulationLoop: rendering UI layer..`)
351
 
352
  // note: for now we only display one element, to avoid handing a list of html elements
353
  const interfaceLayer = (await resolveSegments(clap, "interface", 1)).at(0)
 
356
  interfaceLayer
357
  })
358
 
359
+ // console.log(`runSimulationLoop: rendered UI layer`)
360
  }
361
  }
362
  } catch (err) {
363
  console.error(`runSimulationLoop failed to render UI layer ${err}`)
364
  }
365
 
366
+ const simulationEndedAt = performance.now()
367
+ const simulationDurationInMs = simulationEndedAt - get().simulationStartedAt
368
+ const simulationDurationInSec =simulationDurationInMs / 1000
369
+
370
+ // I've counted the frames manually, and we indeed have, in term of pure video playback,
371
+ // 10 fps divided by 2 (the 0.5 playback factor)
372
+ const videoFPS = 10
373
+ const videoDurationInSec = 1
374
+ const videoPlaybackSpeed = 0.5
375
+ const simulationVideoPlaybackFPS = videoDurationInSec * videoFPS * videoPlaybackSpeed
376
+ const simulationRenderingTimeFPS = (videoDurationInSec * videoFPS) / simulationDurationInSec
377
  set({
378
  simulationPending: false,
379
+ simulationEndedAt,
380
+ simulationDurationInMs,
381
+ simulationVideoPlaybackFPS,
382
+ simulationRenderingTimeFPS,
383
  })
384
  },
385
 
src/components/interface/track-card/index.tsx CHANGED
@@ -118,6 +118,9 @@ export function TrackCard({
118
  // prevent iOS from attempting to open the video in full screen, which is annoying
119
  playsInline
120
 
 
 
 
121
  ref={ref}
122
  src={media.assetUrlHd || media.assetUrl}
123
  className={cn(
 
118
  // prevent iOS from attempting to open the video in full screen, which is annoying
119
  playsInline
120
 
121
+ // muted needs to be enabled for iOS to properly autoplay
122
+ muted
123
+
124
  ref={ref}
125
  src={media.assetUrlHd || media.assetUrl}
126
  className={cn(
src/lib/clap/getMockClap.ts CHANGED
@@ -4,10 +4,11 @@ import { ClapProject } from "./types"
4
 
5
  let defaultSegmentDurationInMs = 2000
6
 
7
- // const demoPrompt = "closeup of Queen angelfish, bokeh"
8
- // const demoPrompt = "portrait of a man tv news anchor, pierre-jean-hyves, serious, bokeh"
9
- // const demoPrompt = "screenshot from Call of Duty, FPS game, nextgen, videogame screenshot, unreal engine, raytracing"
10
- const demoPrompt = "screenshot from a flight simulator, nextgen, videogame screenshot, unreal engine, raytracing"
 
11
 
12
  export function getMockClap({
13
  prompt = demoPrompt,
@@ -66,15 +67,31 @@ export function getMockClap({
66
 
67
  currentElapsedTimeInMs += currentSegmentDurationInMs
68
  */
69
-
70
- clap.segments.push(newSegment({
71
- startTimeInMs: currentElapsedTimeInMs,
72
- endTimeInMs: currentSegmentDurationInMs,
73
- category: "video",
74
- prompt,
75
- label: "demo",
76
- outputType: "video",
77
- }))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  return clap
80
  }
 
4
 
5
  let defaultSegmentDurationInMs = 2000
6
 
7
+ let demoPrompt = "closeup of Queen angelfish, bokeh"
8
+ // demoPrompt = "portrait of a man tv news anchor, pierre-jean-hyves, serious, bokeh"
9
+ // demoPrompt = "screenshot from Call of Duty, FPS game, nextgen, videogame screenshot, unreal engine, raytracing"
10
+ demoPrompt = "screenshot from a flight simulator, nextgen, videogame screenshot, unreal engine, raytracing"
11
+ demoPrompt = "screenshot from fallout3, fallout4, wasteland, 3rd person RPG, nextgen, videogame screenshot, unreal engine, raytracing"
12
 
13
  export function getMockClap({
14
  prompt = demoPrompt,
 
67
 
68
  currentElapsedTimeInMs += currentSegmentDurationInMs
69
  */
70
+
71
+ // this is just for us, to quickly switch between video or image
72
+ const generationMode: "IMAGE" | "VIDEO" =
73
+ "VIDEO"
74
+ //"IMAGE"
75
+
76
+ if (generationMode === "VIDEO") {
77
+ clap.segments.push(newSegment({
78
+ startTimeInMs: currentElapsedTimeInMs,
79
+ endTimeInMs: currentSegmentDurationInMs,
80
+ category: "video",
81
+ prompt,
82
+ label: "demo",
83
+ outputType: "video",
84
+ }))
85
+ } else {
86
+ clap.segments.push(newSegment({
87
+ startTimeInMs: currentElapsedTimeInMs,
88
+ endTimeInMs: currentSegmentDurationInMs,
89
+ category: "storyboard",
90
+ prompt,
91
+ label: "demo",
92
+ outputType: "image",
93
+ }))
94
+ }
95
 
96
  return clap
97
  }
src/types/general.ts CHANGED
@@ -21,12 +21,8 @@ export interface RenderRequest {
21
  // actionnables are names of things like "chest", "key", "tree", "chair" etc
22
  actionnables: string[]
23
 
24
- // note: this is the number of frames for Zeroscope,
25
- // which is currently configured to only output 3 seconds, so:
26
- // nbFrames=8 -> 1 sec
27
- // nbFrames=16 -> 2 sec
28
- // nbFrames=24 -> 3 sec
29
- nbFrames: number // min: 1, max: 24
30
 
31
  nbSteps: number // min: 1, max: 50
32
 
 
21
  // actionnables are names of things like "chest", "key", "tree", "chair" etc
22
  actionnables: string[]
23
 
24
+ nbFrames: number
25
+ nbFPS: number
 
 
 
 
26
 
27
  nbSteps: number // min: 1, max: 50
28