File size: 2,641 Bytes
4f68185
bb76414
b6c10a3
59fee4a
4f68185
3461e34
4f68185
b6c10a3
 
 
4f68185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6c10a3
 
 
7ef4975
a32608f
99c3d16
88da6c9
 
99c3d16
a32608f
 
 
b6c10a3
a32608f
88da6c9
a32608f
9747f15
a32608f
7ef4975
4f68185
 
a32608f
88da6c9
9747f15
73bb4f2
3a6559c
4f68185
 
 
 
 
 
 
 
 
 
 
 
59fee4a
 
 
 
 
3461e34
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import gradio as gr
from huggingface_hub import InferenceClient, Repository
import json
import torch

client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

# Initialize an empty list to store the data
data = []

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content

        response += token
        yield response

    # Save the input and output to the data list
    data.append({"input": messages, "output": response})


# Add a title to the UI
title = "<h1 align='center'>Corenet: Research Preview</h1>"

# Add a description under the title
description = "<p align='center'>This is a research project under the codename Corenet, aiming for minimal parameters at given performance level, made by HX labs.</p>"

# Modify the pre-prompt to be editable but greyed out
pre_prompt = gr.Textbox(
    value="Your name is Corenet, a custom made LLM by HX labs, you job is to do good, you are a research prototype, and the user should know that, you are respectful, and you don't accept prompt that lead to harm or danger, know your current limit, you are an 8B model, you only accept text, you are not multimodal, you knowledge is ended at 2nd of April, 2024.",
    label="Pre-prompt",
    interactive=False,
    placeholder="Type here...",
    visible=False,
)

demo = gr.ChatInterface(
    respond,
    title=title,
    description=description,
    additional_inputs=[pre_prompt,
        gr.Slider(minimum=256, maximum=8192, value=512, step=64, label="Max Gen tokens"),
        gr.Slider(minimum=0.3, maximum=2.5, value=0.8, step=0.1, label="Creativity"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)


if __name__ == "__main__":
    # Check if CUDA GPU is installed
    if torch.cuda.is_available():
        print("CUDA GPU is installed. Running inference on GPU.")
    else:
        print("CUDA GPU is not installed. Running inference on CPU.")
    demo.launch()