Quantized model inference
Hi,
I am trying to evaluate the quantized int8 model on the MRPC task but failed to reproduce the reported f1 score. The following code is how I manage to tokenize/load model/evaluate model:
self.tokenizer = AutoTokenizer.from_pretrained("Intel/bert-base-uncased-mrpc-int8-qat")
int8_model = AutoModelForSequenceClassification.from_pretrained("Intel/bert-base-uncased-mrpc-int8-qat")
logits=int8_model(input_ids=batch_data["input_ids"], token_type_ids=batch_data["token_type_ids"],attention_mask=batch_data["attention_mask"]).logits
Could you help to check if there is something wrong? And the weirdest thing is the f1 score is zero always with eval() on, however f1 score becomes 0.81 when eval() is off.
Please follow model card to load the int8 model.
from neural_compressor.utils.load_huggingface import OptimizedModel
int8_model = OptimizedModel.from_pretrained(
'Intel/bert-base-uncased-mrpc-int8-qat',
)
or
from optimum.intel.neural_compressor.quantization import IncQuantizedModelForSequenceClassification
int8_model = IncQuantizedModelForSequenceClassification.from_pretrained(
'Intel/bert-base-uncased-mrpc-int8-qat',
)
Thank you for your reply. I used the way you suggested at the very beginning but still could not reproduce the reported f1 score (the f1 score I got is 0.51, reported one is 0.91). I am attaching all the code that I manage to evaluate the model. Could you help to see if there are something wrong?:
import torch
import json
import random
import numpy as np
import pandas as pd
import os
import re
from tqdm import tqdm
from torch.utils import data
from transformers import BertTokenizer
from neural_compressor.utils.load_huggingface import OptimizedModel
from sklearn.metrics import f1_score
from ipdb import set_trace
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from optimum.intel.neural_compressor.quantization import IncQuantizedModelForSequenceClassification
class MRPCset(data.Dataset):
def __init__(self,path1, path2):
self.convert_tsv_to_json(path1,path2)
self.read_json_data(path2)
self.tokenizer = AutoTokenizer.from_pretrained("Intel/bert-base-uncased-mrpc-int8-qat")
def read_data(self, path):
self.data = pd.read_csv(path, sep='delimiter', header=None)
def read_json_data(self, path):
with open(path) as f:
self.data = json.load(f)["0"]
self.data_dict = []
for key, value in self.data.items():
if key == '0':
continue
label, id_1,id_2, sent_1, sent_2 = re.split(r"\t+",value)
self.data_dict.append({
"label": label,
"id_1": id_1,
"id_2": id_2,
"sent_1": sent_1,
"sent_2": sent_2,
})
def convert_tsv_to_json(self,tsv_path,json_path):
if os.path.exists(json_path):
return
data = pd.read_csv(path, sep='delimiter', header=None)
data.to_json(json_path)
def preprocess_text(self,sent1, sent2):
return self.tokenizer(sent1,sent2,return_tensors="pt")
def __len__(self):
return len(self.data_dict)
def __getitem__(self, index):
current_data = self.data_dict[index]
data_dict = self.preprocess_text(current_data["sent_1"],current_data["sent_2"])
label = torch.tensor(int(current_data["label"]))
batch = {"input_ids": data_dict["input_ids"].squeeze(0),
"token_type_ids": data_dict["token_type_ids"].squeeze(0),
"attention_mask": data_dict["attention_mask"].squeeze(0),
"label":label}
return batch
if __name__ == "__main__":
batch_size = 1
def _main():
path_root = './glue_data/MRPC/'
json_path = path_root+ 'dev_data.json'
tsv_path = path_root + 'dev.tsv'
test = MRPCset(tsv_path, json_path)
data_loader = data.DataLoader(
test,
batch_size = batch_size,
shuffle=False,
num_workers=0,
drop_last=False,
pin_memory=False,
)
#fp_model = OptimizedModel.from_pretrained('Intel/bert-base-uncased-mrpc',) # can reproduce the reported f1 score of floating point mode
int8_model = OptimizedModel.from_pretrained('Intel/bert-base-uncased-mrpc-int8-qat',)
int8_model.eval()
total_labels = []
pred_labels = []
for i, batch_data in tqdm(enumerate(data_loader)):
print(i)
label = batch_data.pop("label")
logits=int8_model(input_ids=batch_data["input_ids"], token_type_ids=batch_data["token_type_ids"],attention_mask=batch_data["attention_mask"]).logits
pred_label = torch.max(logits,dim=1).indices
total_labels.append(label)
pred_labels.append(pred_label)
total_gt_labels= np.array(torch.cat(total_labels))
total_pred_labels = np.array(torch.cat(pred_labels))
print("f1 score", f1_score(total_gt_labels, total_pred_labels))
set_trace()
print(test.data)
_main()
Hi Amanda95, please check whether your machine support INT8 ISA, for example, AVX512_VNNI.
I run your code on cascade lake and got F1=0.91068. you can try script in huggingface/optimum-intel to avoid mistakes.