metadata
datasets:
- Dahoas/rm-static
- openai/webgpt_comparisons
language:
- en
Inference Example:
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
peft_model_id = "edu-linguistic/deberta-v3-large-edu-rm"
model_name = 'microsoft/deberta-v3-large'
config = PeftConfig.from_pretrained(peft_model_id)
model_config = AutoConfig.from_pretrained(model_name, cache_dir=self.model_cache_dir)
model_config.num_labels = 1
model = AutoModelForSequenceClassification.from_pretrained(model_name)
model = PeftModelForSequenceClassification.from_pretrained(model, peft_model_id)
tokenizer = AutoTokenizer.from_pretrained(model_name)
texts = "<|prompter|> When using linear regression, how do you help prevent numerical instabilities? (One or multiple answers) \n <|assistant|> 4. add more features"
inputs = tokenizer(texts, return_tensors='pt', padding=True, truncation=True)
score = self.reward_model(**inputs).logits.cpu().detach()
print(score)