Why batch size>1 does not increase model speed

#41
by zokica - opened

If I increase batch size the model speed is the same, or even slower. Why?

    timea = time.time()
    final_text = ""
    batch_size = 5

    def process_batch(batch):
        batch_prompts = [formatting_func(sent) for sent in batch]
        model_inputs = tokenizer(batch_prompts, return_tensors="pt", padding=True, truncation=True).to("cuda:0")
        
        print("model_inputs",model_inputs)
        
        
        timeb = time.time() 
        with torch.no_grad():
            outputs = ft_model.generate(**model_inputs, max_new_tokens=60, do_sample=True, top_k=120, pad_token_id=tokenizer.pad_token_id, use_cache=True)
        
        print(-timeb + time.time())
        
        timeb = time.time() 
          
        decoded_texts = tokenizer.batch_decode(outputs, skip_special_tokens=True)
        
        print(-timeb + time.time())
        
        return decoded_texts

    for i in range(0, len(sents), batch_size):
        batch = sents[i:i + batch_size]
        batch_texts = process_batch(batch)

Sign up or log in to comment