tiiuae/falcon-40b · about evaluating on humaneval

I launch falcon-40b, and use helm to evaluate humaneval.

How I launch falcon-40b

tokenizer = AutoTokenizer.from_pretrained(args.ckpt_dir, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(args.ckpt_dir, trust_remote_code=True,device_map="auto",torch_dtype=torch.bfloat16)
model.eval()
lock = threading.Lock()
app = FastAPI()
class Config(BaseModel):
    prompts: List[str]
    max_gen_len: int
    temperature: float = 0.8
    top_p: float = 0.95
    stop_sequences: List[str]
    top_k_per_token: int = 1
    repetition_penalty: float = 1
    length_penalty: float = 1
    



@app

	.post("/generate")
async def create_item(config: Config):
    lock.acquire()
    try:
        global pipleline, tokenizer
        prompts = config.prompts
        max_new_tokens = config.max_gen_len
        top_p = config.top_p
        temperature = config.temperature
        inputs = tokenizer.encode(prompts[0], return_tensors="pt").cuda()
        print(inputs)
        attention_mask = torch.ones(inputs.shape).cuda()
        outputs = model.generate(inputs,max_new_tokens=max_new_tokens,
                                       num_beams=1,
                                       attention_mask=attention_mask,
                                       top_p=top_p,
                                       temperature=temperature,
                                       repetition_penalty=config.repetition_penalty,
                                       length_penalty=config.length_penalty,
                                       do_sample=False if temperature == 0 else True,
                                       eos_token_id=tokenizer.eos_token_id,
                                       pad_token_id=tokenizer.pad_token_id)
        response = tokenizer.decode(outputs[0, inputs.shape[1]:], skip_special_tokens=True)
        now = datetime.datetime.now()
        time = now.strftime("%Y-%m-%d %H:%M:%S")
        answer = {
            "completions": [
            {
                "text":response
            }
            ],
        }
        return answer
    finally:
        lock.release()

When I input:

from typing import List


def below_zero(operations: List[int]) -> bool:
    """ You're given a list of deposit and withdrawal operations on a bank account that starts with
    zero balance. Your task is to detect if at any point the balance of account fallls below zero, and
    at that point function should return True. Otherwise it should return False.
    >>> below_zero([1, 2, 3])
    False
    >>> below_zero([1, 2, -4, 5])
    True
    """

The model can decode normally.

But when I append a "\n" in the end of input code:

from typing import List


def below_zero(operations: List[int]) -> bool:
    """ You're given a list of deposit and withdrawal operations on a bank account that starts with
    zero balance. Your task is to detect if at any point the balance of account fallls below zero, and
    at that point function should return True. Otherwise it should return False.
    >>> below_zero([1, 2, 3])
    False
    >>> below_zero([1, 2, -4, 5])
    True
    """
# empty line here

it will decode nothing.

I would like to ask what caused this problem？