Added first token latency and replaced latency with total generation time.
#2
by
KingNish
- opened
src/lib/components/InferencePlayground/InferencePlayground.svelte
CHANGED
@@ -42,7 +42,8 @@
|
|
42 |
let viewSettings = false;
|
43 |
let showTokenModal = false;
|
44 |
let loading = false;
|
45 |
-
let latency = 0;
|
|
|
46 |
let generatedTokensCount = 0;
|
47 |
let abortController: AbortController | undefined = undefined;
|
48 |
let waitForNonStreaming = true;
|
@@ -91,12 +92,16 @@
|
|
91 |
|
92 |
(document.activeElement as HTMLElement).blur();
|
93 |
loading = true;
|
|
|
|
|
|
|
94 |
|
95 |
try {
|
96 |
const startTime = performance.now();
|
97 |
const hf = createHfInference(hfToken);
|
98 |
|
99 |
if (conversation.streaming) {
|
|
|
100 |
const streamingMessage = { role: "assistant", content: "" };
|
101 |
conversation.messages = [...conversation.messages, streamingMessage];
|
102 |
abortController = new AbortController();
|
@@ -109,6 +114,11 @@
|
|
109 |
streamingMessage.content = content;
|
110 |
conversation.messages = [...conversation.messages];
|
111 |
generatedTokensCount += 1;
|
|
|
|
|
|
|
|
|
|
|
112 |
}
|
113 |
},
|
114 |
abortController
|
@@ -123,11 +133,11 @@
|
|
123 |
if (waitForNonStreaming) {
|
124 |
conversation.messages = [...conversation.messages, newMessage];
|
125 |
generatedTokensCount += newTokensCount;
|
|
|
126 |
}
|
127 |
}
|
128 |
|
129 |
-
|
130 |
-
latency = Math.round(endTime - startTime);
|
131 |
} catch (error) {
|
132 |
if (conversation.messages.at(-1)?.role === "assistant" && !conversation.messages.at(-1)?.content?.trim()) {
|
133 |
conversation.messages.pop();
|
@@ -261,7 +271,9 @@
|
|
261 |
<IconDelete />
|
262 |
</button>
|
263 |
<div class="flex-1 items-center justify-center text-center text-sm text-gray-500">
|
264 |
-
<span class="max-xl:hidden">
|
|
|
|
|
265 |
</div>
|
266 |
<button
|
267 |
type="button"
|
@@ -269,8 +281,8 @@
|
|
269 |
class="flex h-[39px] items-center gap-2 rounded-lg border border-gray-200 bg-white px-3 py-2.5 text-sm font-medium text-gray-900 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:outline-none focus:ring-4 focus:ring-gray-100 dark:border-gray-600 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-white dark:focus:ring-gray-700"
|
270 |
>
|
271 |
<IconCode />
|
272 |
-
{!viewCode ? "View Code" : "Hide Code"}
|
273 |
-
>
|
274 |
<button
|
275 |
on:click={() => {
|
276 |
viewCode = false;
|
@@ -357,4 +369,4 @@
|
|
357 |
>
|
358 |
<IconInfo classNames="text-xs" />
|
359 |
Give feedback
|
360 |
-
</a>
|
|
|
42 |
let viewSettings = false;
|
43 |
let showTokenModal = false;
|
44 |
let loading = false;
|
45 |
+
let latency = 0; // Renamed to total generation time
|
46 |
+
let firstTokenLatency = 0; // New variable for first token latency
|
47 |
let generatedTokensCount = 0;
|
48 |
let abortController: AbortController | undefined = undefined;
|
49 |
let waitForNonStreaming = true;
|
|
|
92 |
|
93 |
(document.activeElement as HTMLElement).blur();
|
94 |
loading = true;
|
95 |
+
firstTokenLatency = 0; // Reset before each submission
|
96 |
+
generatedTokensCount = 0; // Reset before each submission
|
97 |
+
|
98 |
|
99 |
try {
|
100 |
const startTime = performance.now();
|
101 |
const hf = createHfInference(hfToken);
|
102 |
|
103 |
if (conversation.streaming) {
|
104 |
+
let firstTokenReceived = false; // Flag to track first token
|
105 |
const streamingMessage = { role: "assistant", content: "" };
|
106 |
conversation.messages = [...conversation.messages, streamingMessage];
|
107 |
abortController = new AbortController();
|
|
|
114 |
streamingMessage.content = content;
|
115 |
conversation.messages = [...conversation.messages];
|
116 |
generatedTokensCount += 1;
|
117 |
+
|
118 |
+
if (!firstTokenReceived) { // Check if it's the first token
|
119 |
+
firstTokenLatency = Math.round(performance.now() - startTime);
|
120 |
+
firstTokenReceived = true;
|
121 |
+
}
|
122 |
}
|
123 |
},
|
124 |
abortController
|
|
|
133 |
if (waitForNonStreaming) {
|
134 |
conversation.messages = [...conversation.messages, newMessage];
|
135 |
generatedTokensCount += newTokensCount;
|
136 |
+
firstTokenLatency = latency; //In non-streaming, first token latency equals total latency.
|
137 |
}
|
138 |
}
|
139 |
|
140 |
+
latency = Math.round(performance.now() - startTime); // Total generation time
|
|
|
141 |
} catch (error) {
|
142 |
if (conversation.messages.at(-1)?.role === "assistant" && !conversation.messages.at(-1)?.content?.trim()) {
|
143 |
conversation.messages.pop();
|
|
|
271 |
<IconDelete />
|
272 |
</button>
|
273 |
<div class="flex-1 items-center justify-center text-center text-sm text-gray-500">
|
274 |
+
<span class="max-xl:hidden">
|
275 |
+
{generatedTokensCount} tokens · First Token: {firstTokenLatency}ms · Total Generation: {latency}ms
|
276 |
+
</span>
|
277 |
</div>
|
278 |
<button
|
279 |
type="button"
|
|
|
281 |
class="flex h-[39px] items-center gap-2 rounded-lg border border-gray-200 bg-white px-3 py-2.5 text-sm font-medium text-gray-900 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:outline-none focus:ring-4 focus:ring-gray-100 dark:border-gray-600 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-white dark:focus:ring-gray-700"
|
282 |
>
|
283 |
<IconCode />
|
284 |
+
{!viewCode ? "View Code" : "Hide Code"}
|
285 |
+
</button>
|
286 |
<button
|
287 |
on:click={() => {
|
288 |
viewCode = false;
|
|
|
369 |
>
|
370 |
<IconInfo classNames="text-xs" />
|
371 |
Give feedback
|
372 |
+
</a>
|