Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -101,7 +101,7 @@ def embedding_shift(original_embedding,shift_embeddings,prefix_embedding,suffix_
|
|
101 |
)
|
102 |
return input_embeddings
|
103 |
|
104 |
-
@spaces.GPU
|
105 |
def engine(input_embeds):
|
106 |
m.to("cuda")
|
107 |
output_text = []
|
@@ -120,14 +120,14 @@ def engine(input_embeds):
|
|
120 |
output_text += tok.batch_decode(outputs, skip_special_tokens=True)
|
121 |
return output_text
|
122 |
|
123 |
-
@spaces.GPU
|
124 |
def chat_engine(input_ids):
|
125 |
m.to("cuda")
|
126 |
prompt_length=len(input_ids[0])
|
127 |
with torch.no_grad():
|
128 |
outputs = m.generate(
|
129 |
input_ids = input_ids.to("cuda"),
|
130 |
-
max_new_tokens =
|
131 |
do_sample = True,
|
132 |
temperature = 0.6,
|
133 |
top_p = 0.9,
|
|
|
101 |
)
|
102 |
return input_embeddings
|
103 |
|
104 |
+
@spaces.GPU(duration=10)
|
105 |
def engine(input_embeds):
|
106 |
m.to("cuda")
|
107 |
output_text = []
|
|
|
120 |
output_text += tok.batch_decode(outputs, skip_special_tokens=True)
|
121 |
return output_text
|
122 |
|
123 |
+
@spaces.GPU(duration=10)
|
124 |
def chat_engine(input_ids):
|
125 |
m.to("cuda")
|
126 |
prompt_length=len(input_ids[0])
|
127 |
with torch.no_grad():
|
128 |
outputs = m.generate(
|
129 |
input_ids = input_ids.to("cuda"),
|
130 |
+
max_new_tokens = 16,
|
131 |
do_sample = True,
|
132 |
temperature = 0.6,
|
133 |
top_p = 0.9,
|