Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -137,7 +137,7 @@ def gradient_cuff_reject(message,with_defense, sample_times,perturb_times,thresh
|
|
137 |
|
138 |
# first-stage rejection
|
139 |
if not with_defense:
|
140 |
-
return False
|
141 |
sft_embed=shift_direction_embedding[0]*0.0
|
142 |
original_input_id=tok.encode(message,return_tensors="pt",add_special_tokens=False)[0]
|
143 |
original_embedding=embedding_func(original_input_id)
|
@@ -177,15 +177,15 @@ def gradient_cuff_reject(message,with_defense, sample_times,perturb_times,thresh
|
|
177 |
est_grad=sum(est_grad)/len(est_grad)
|
178 |
if est_grad.norm().item()>threshold:
|
179 |
return (True,results[0],est_grad.norm().item())
|
180 |
-
return (False,
|
181 |
|
182 |
def chat(message, history, with_defense,threshold):
|
183 |
perturb_times=9
|
184 |
sample_times=10
|
185 |
#threshold=thresholds[perturb_times-1]
|
186 |
return_value=gradient_cuff_reject(message,with_defense, sample_times, perturb_times, threshold)
|
|
|
187 |
if return_value[0]:
|
188 |
-
reject_information=json.dumps({'refusal_loss':1-return_value[1],'gradient_norm':return_value[2]})
|
189 |
answer="Gradient Cuff Rejection: "+reject_information
|
190 |
answer=answer.split(" ")
|
191 |
partial_text = ""
|
@@ -203,12 +203,12 @@ def chat(message, history, with_defense,threshold):
|
|
203 |
messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
|
204 |
# Tokenize the messages string
|
205 |
input_ids = tok([messages], return_tensors="pt")["input_ids"]
|
206 |
-
response= chat_engine(input_ids)
|
|
|
207 |
|
208 |
# Initialize an empty string to store the generated text
|
209 |
partial_text = ""
|
210 |
-
|
211 |
-
for new_text in response_split:
|
212 |
partial_text += (new_text+" ")
|
213 |
yield partial_text
|
214 |
|
|
|
137 |
|
138 |
# first-stage rejection
|
139 |
if not with_defense:
|
140 |
+
return (False,None,None)
|
141 |
sft_embed=shift_direction_embedding[0]*0.0
|
142 |
original_input_id=tok.encode(message,return_tensors="pt",add_special_tokens=False)[0]
|
143 |
original_embedding=embedding_func(original_input_id)
|
|
|
177 |
est_grad=sum(est_grad)/len(est_grad)
|
178 |
if est_grad.norm().item()>threshold:
|
179 |
return (True,results[0],est_grad.norm().item())
|
180 |
+
return (False,results[0],est_grad.norm().item())
|
181 |
|
182 |
def chat(message, history, with_defense,threshold):
|
183 |
perturb_times=9
|
184 |
sample_times=10
|
185 |
#threshold=thresholds[perturb_times-1]
|
186 |
return_value=gradient_cuff_reject(message,with_defense, sample_times, perturb_times, threshold)
|
187 |
+
reject_information=json.dumps({'refusal_loss':1-return_value[1],'gradient_norm':return_value[2]})
|
188 |
if return_value[0]:
|
|
|
189 |
answer="Gradient Cuff Rejection: "+reject_information
|
190 |
answer=answer.split(" ")
|
191 |
partial_text = ""
|
|
|
203 |
messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
|
204 |
# Tokenize the messages string
|
205 |
input_ids = tok([messages], return_tensors="pt")["input_ids"]
|
206 |
+
response= "Gradient Cuff Checking: "+reject_information + "\n"+ chat_engine(input_ids)
|
207 |
+
response=response.split(" ")
|
208 |
|
209 |
# Initialize an empty string to store the generated text
|
210 |
partial_text = ""
|
211 |
+
for new_text in response:
|
|
|
212 |
partial_text += (new_text+" ")
|
213 |
yield partial_text
|
214 |
|