Edit model card

Code:
(just run it, and the model weights will be auto download)

Github:https://github.com/CrazyBoyM/CodeLLaMA-chat

# from Firefly
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch


def main():
    model_name = 'shareAI/CodeLLaMA-chat-13b-Chinese'

    device = 'cuda'
    max_new_tokens = 500    # max token for reply.
    history_max_len = 1000  # max token in history
    top_p = 0.9
    temperature = 0.35
    repetition_penalty = 1.0

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        trust_remote_code=True,
        low_cpu_mem_usage=True,
        torch_dtype=torch.float16,
        device_map='auto'
    ).to(device).eval()
    tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        trust_remote_code=True,
        use_fast=False
    )


    history_token_ids = torch.tensor([[]], dtype=torch.long)

    user_input = input('User:')
    while True:
        input_ids = tokenizer(user_input, return_tensors="pt", add_special_tokens=False).input_ids
        eos_token_id = torch.tensor([[tokenizer.eos_token_id]], dtype=torch.long)
        user_input_ids = torch.concat([input_ids, eos_token_id], dim=1)
        history_token_ids = torch.concat((history_token_ids, user_input_ids), dim=1)
        model_input_ids = history_token_ids[:, -history_max_len:].to(device)
        with torch.no_grad():
            outputs = model.generate(
                input_ids=model_input_ids, max_new_tokens=max_new_tokens, do_sample=True, top_p=top_p,
                temperature=temperature, repetition_penalty=repetition_penalty, eos_token_id=tokenizer.eos_token_id
            )
        model_input_ids_len = model_input_ids.size(1)
        response_ids = outputs[:, model_input_ids_len:]
        history_token_ids = torch.concat((history_token_ids, response_ids.cpu()), dim=1)
        response = tokenizer.batch_decode(response_ids)
        print("Bot:" + response[0].strip().replace(tokenizer.eos_token, ""))
        user_input = input('User:')


if __name__ == '__main__':
    main()
Downloads last month
23
Inference API
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Datasets used to train shareAI/CodeLlama-13b-English-Chat