Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
β’
b843e59
1
Parent(s):
2e12a66
improve speech bubbles
Browse files
src/app/queries/getSystemPrompt.ts
CHANGED
@@ -19,7 +19,7 @@ export function getSystemPrompt({
|
|
19 |
}) {
|
20 |
return [
|
21 |
`You are a writer specialized in ${preset.llmPrompt}`,
|
22 |
-
`Please write detailed drawing instructions and short (2-3 sentences long) speeches and narrator captions for the ${firstNextOrLast} ${nbPanelsToGenerate} panels (out of ${maxNbPanels} in total) of a new story, but keep it open-ended (it will be continued and expanded later). Please make sure each of those ${nbPanelsToGenerate} panels include info about character gender, age, origin, clothes, colors, location, lights, etc. Speeches are the dialogues, so they MUST be written in 1st person style. Only generate those ${nbPanelsToGenerate} panels, but take into account the fact the panels are part of a longer story (${maxNbPanels} panels long).`,
|
23 |
`Give your response as a VALID JSON array like this: \`Array<{ panel: number; instructions: string; speech: string; caption: string; }>\`.`,
|
24 |
// `Give your response as Markdown bullet points.`,
|
25 |
`Be brief in the instructions, the speeches and the narrative captions of those ${nbPanelsToGenerate} panels, don't add your own comments. Write speeces in 1st person style, with intensity, humor etc. The speech must be captivating, smart, entertaining, usually a sentence or two. Be straight to the point, return JSON and never reply things like "Sure, I can.." etc. Reply using valid JSON!! Important: Write valid JSON!`
|
|
|
19 |
}) {
|
20 |
return [
|
21 |
`You are a writer specialized in ${preset.llmPrompt}`,
|
22 |
+
`Please write detailed drawing instructions and short (2-3 sentences long) speeches and narrator captions for the ${firstNextOrLast} ${nbPanelsToGenerate} panels (out of ${maxNbPanels} in total) of a new story, but keep it open-ended (it will be continued and expanded later). Please make sure each of those ${nbPanelsToGenerate} panels include info about character gender, age, origin, clothes, colors, location, lights, etc. Speeches are the dialogues, so they MUST be written in 1st person style, and be short, eg a couple of short sentences. Only generate those ${nbPanelsToGenerate} panels, but take into account the fact the panels are part of a longer story (${maxNbPanels} panels long).`,
|
23 |
`Give your response as a VALID JSON array like this: \`Array<{ panel: number; instructions: string; speech: string; caption: string; }>\`.`,
|
24 |
// `Give your response as Markdown bullet points.`,
|
25 |
`Be brief in the instructions, the speeches and the narrative captions of those ${nbPanelsToGenerate} panels, don't add your own comments. Write speeces in 1st person style, with intensity, humor etc. The speech must be captivating, smart, entertaining, usually a sentence or two. Be straight to the point, return JSON and never reply things like "Sure, I can.." etc. Reply using valid JSON!! Important: Write valid JSON!`
|
src/lib/bubble/injectSpeechBubbleInTheBackground.ts
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import { ImageSegmenter, FilesetResolver } from "@mediapipe/tasks-vision"
|
2 |
import { actionman } from "../fonts";
|
3 |
|
4 |
interface BoundingBox {
|
@@ -53,7 +53,7 @@ export async function injectSpeechBubbleInTheBackground(params: {
|
|
53 |
outputConfidenceMasks: false
|
54 |
});
|
55 |
|
56 |
-
const segmentationResult = imageSegmenter.segment(image);
|
57 |
let characterBoundingBox: BoundingBox | null = null;
|
58 |
|
59 |
if (segmentationResult.categoryMask) {
|
@@ -85,24 +85,61 @@ function loadImage(base64: string): Promise<HTMLImageElement> {
|
|
85 |
});
|
86 |
}
|
87 |
|
88 |
-
function findCharacterBoundingBox(mask: Uint8Array, width: number, height: number): BoundingBox {
|
89 |
-
let
|
|
|
|
|
90 |
for (let y = 0; y < height; y++) {
|
91 |
for (let x = 0; x < width; x++) {
|
92 |
const index = y * width + x;
|
93 |
-
if (mask[index] > 0) {
|
94 |
-
|
95 |
-
|
96 |
-
maxX = Math.max(maxX, x);
|
97 |
-
maxY = Math.max(maxY, y);
|
98 |
}
|
99 |
}
|
100 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
return {
|
102 |
-
top: minY,
|
103 |
left: minX,
|
104 |
-
|
105 |
-
|
|
|
106 |
};
|
107 |
}
|
108 |
|
@@ -134,13 +171,21 @@ function calculateBubbleLocations(
|
|
134 |
const padding = 50;
|
135 |
const availableWidth = imageWidth - padding * 2;
|
136 |
const availableHeight = imageHeight - padding * 2;
|
137 |
-
const maxAttempts =
|
138 |
|
139 |
for (let i = 0; i < bubbleCount; i++) {
|
140 |
let x, y;
|
141 |
let attempts = 0;
|
142 |
do {
|
143 |
-
x
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
y = (i / bubbleCount) * availableHeight + padding;
|
145 |
attempts++;
|
146 |
|
@@ -224,8 +269,8 @@ function drawSpeechBubble(
|
|
224 |
const fontSize = 20;
|
225 |
ctx.font = `${fontSize}px ${font}`;
|
226 |
|
227 |
-
// Adjust maximum width to account for border padding
|
228 |
-
const maxBubbleWidth = imageWidth - 2 * borderPadding;
|
229 |
const wrappedText = wrapText(ctx, text, maxBubbleWidth - padding * 2, fontSize);
|
230 |
const textDimensions = measureTextDimensions(ctx, wrappedText, fontSize);
|
231 |
|
@@ -347,13 +392,12 @@ function adjustBubbleLocation(
|
|
347 |
|
348 |
// Ensure the bubble doesn't overlap with the character
|
349 |
if (characterBoundingBox) {
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
: characterBoundingBox.left + characterBoundingBox.width + width / 2 + 10;
|
357 |
}
|
358 |
}
|
359 |
|
|
|
1 |
+
import { ImageSegmenter, FilesetResolver, ImageSegmenterResult } from "@mediapipe/tasks-vision"
|
2 |
import { actionman } from "../fonts";
|
3 |
|
4 |
interface BoundingBox {
|
|
|
53 |
outputConfidenceMasks: false
|
54 |
});
|
55 |
|
56 |
+
const segmentationResult: ImageSegmenterResult = imageSegmenter.segment(image);
|
57 |
let characterBoundingBox: BoundingBox | null = null;
|
58 |
|
59 |
if (segmentationResult.categoryMask) {
|
|
|
85 |
});
|
86 |
}
|
87 |
|
88 |
+
function findCharacterBoundingBox(mask: Uint8Array, width: number, height: number): BoundingBox | null {
|
89 |
+
let shapes: BoundingBox[] = [];
|
90 |
+
let visited = new Set<number>();
|
91 |
+
|
92 |
for (let y = 0; y < height; y++) {
|
93 |
for (let x = 0; x < width; x++) {
|
94 |
const index = y * width + x;
|
95 |
+
if (mask[index] > 0 && !visited.has(index)) {
|
96 |
+
let shape = floodFill(mask, width, height, x, y, visited);
|
97 |
+
shapes.push(shape);
|
|
|
|
|
98 |
}
|
99 |
}
|
100 |
}
|
101 |
+
|
102 |
+
// Sort shapes by area (descending) and filter out small shapes
|
103 |
+
shapes = shapes
|
104 |
+
.filter(shape => (shape.width * shape.height) > (width * height * 0.01))
|
105 |
+
.sort((a, b) => (b.width * b.height) - (a.width * a.height));
|
106 |
+
|
107 |
+
// Find the most vertically rectangular shape
|
108 |
+
let mostVerticalShape = shapes.reduce((prev, current) => {
|
109 |
+
let prevRatio = prev.height / prev.width;
|
110 |
+
let currentRatio = current.height / current.width;
|
111 |
+
return currentRatio > prevRatio ? current : prev;
|
112 |
+
});
|
113 |
+
|
114 |
+
return mostVerticalShape || null;
|
115 |
+
}
|
116 |
+
|
117 |
+
function floodFill(mask: Uint8Array, width: number, height: number, startX: number, startY: number, visited: Set<number>): BoundingBox {
|
118 |
+
let queue = [[startX, startY]];
|
119 |
+
let minX = startX, maxX = startX, minY = startY, maxY = startY;
|
120 |
+
|
121 |
+
while (queue.length > 0) {
|
122 |
+
let [x, y] = queue.pop()!;
|
123 |
+
let index = y * width + x;
|
124 |
+
|
125 |
+
if (x < 0 || x >= width || y < 0 || y >= height || mask[index] === 0 || visited.has(index)) {
|
126 |
+
continue;
|
127 |
+
}
|
128 |
+
|
129 |
+
visited.add(index);
|
130 |
+
minX = Math.min(minX, x);
|
131 |
+
maxX = Math.max(maxX, x);
|
132 |
+
minY = Math.min(minY, y);
|
133 |
+
maxY = Math.max(maxY, y);
|
134 |
+
|
135 |
+
queue.push([x+1, y], [x-1, y], [x, y+1], [x, y-1]);
|
136 |
+
}
|
137 |
+
|
138 |
return {
|
|
|
139 |
left: minX,
|
140 |
+
top: minY,
|
141 |
+
width: maxX - minX + 1,
|
142 |
+
height: maxY - minY + 1
|
143 |
};
|
144 |
}
|
145 |
|
|
|
171 |
const padding = 50;
|
172 |
const availableWidth = imageWidth - padding * 2;
|
173 |
const availableHeight = imageHeight - padding * 2;
|
174 |
+
const maxAttempts = 100;
|
175 |
|
176 |
for (let i = 0; i < bubbleCount; i++) {
|
177 |
let x, y;
|
178 |
let attempts = 0;
|
179 |
do {
|
180 |
+
// Adjust x to avoid the middle of the character
|
181 |
+
if (characterBoundingBox) {
|
182 |
+
const characterMiddle = characterBoundingBox.left + characterBoundingBox.width / 2;
|
183 |
+
const leftSide = Math.random() * (characterMiddle - padding - padding);
|
184 |
+
const rightSide = characterMiddle + Math.random() * (imageWidth - characterMiddle - padding - padding);
|
185 |
+
x = Math.random() < 0.5 ? leftSide : rightSide;
|
186 |
+
} else {
|
187 |
+
x = Math.random() * availableWidth + padding;
|
188 |
+
}
|
189 |
y = (i / bubbleCount) * availableHeight + padding;
|
190 |
attempts++;
|
191 |
|
|
|
269 |
const fontSize = 20;
|
270 |
ctx.font = `${fontSize}px ${font}`;
|
271 |
|
272 |
+
// Adjust maximum width to account for border padding and limit to 33% of image width
|
273 |
+
const maxBubbleWidth = Math.min(imageWidth - 2 * borderPadding, imageWidth * 0.33);
|
274 |
const wrappedText = wrapText(ctx, text, maxBubbleWidth - padding * 2, fontSize);
|
275 |
const textDimensions = measureTextDimensions(ctx, wrappedText, fontSize);
|
276 |
|
|
|
392 |
|
393 |
// Ensure the bubble doesn't overlap with the character
|
394 |
if (characterBoundingBox) {
|
395 |
+
const characterMiddle = characterBoundingBox.left + characterBoundingBox.width / 2;
|
396 |
+
if (Math.abs(adjustedX - characterMiddle) < width / 2) {
|
397 |
+
// If the bubble is in the middle of the character, move it to the side
|
398 |
+
adjustedX = adjustedX < characterMiddle
|
399 |
+
? Math.max(width / 2 + borderPadding, characterBoundingBox.left - width / 2 - 10)
|
400 |
+
: Math.min(imageWidth - width / 2 - borderPadding, characterBoundingBox.left + characterBoundingBox.width + width / 2 + 10);
|
|
|
401 |
}
|
402 |
}
|
403 |
|