from __future__ import annotations from typing import Iterable import gradio as Gradio from gradio.themes.utils import colors, fonts, sizes from gradio.themes import Base from llama_cpp import Llama from huggingface_hub import hf_hub_download hf_hub_download(repo_id="TheBloke/dolphin-2_6-phi-2-GGUF", filename="dolphin-2_6-phi-2.Q5_K_S.gguf", local_dir=".") llm = Llama(model_path="./dolphin-2_6-phi-2.Q5_K_S.gguf", n_ctx=1024) ins = '''<|im_start|>user {question}<|im_end|> <|im_start|>assistant ''' class BlueTheme(Gradio.themes.Soft): def __init__( self, *, primary_hue: colors.Color | str = colors.purple, secondary_hue: colors.Color | str = colors.blue, neutral_hue: colors.Color | str = colors.neutral, spacing_size: sizes.Size | str = sizes.spacing_md, radius_size: sizes.Size | str = sizes.radius_md, font: fonts.Font | str | Iterable[fonts.Font | str] = ( fonts.GoogleFont("Space Grotesk"), "ui-sans-serif", "sans-serif", ), font_mono: fonts.Font | str | Iterable[fonts.Font | str] = ( fonts.GoogleFont("Space Mono"), "ui-monospace", "monospace", ), ): super().__init__( primary_hue=primary_hue, secondary_hue=secondary_hue, neutral_hue=neutral_hue, spacing_size=spacing_size, radius_size=radius_size, font=font, font_mono=font_mono, ) super().set( button_primary_background_fill="linear-gradient(90deg, *primary_300, *secondary_400)", button_primary_background_fill_hover="linear-gradient(90deg, *primary_200, *secondary_300)", button_primary_text_color="white", button_primary_background_fill_dark="linear-gradient(90deg, *primary_600, *secondary_800)", block_shadow="*shadow_drop_lg", button_shadow="*shadow_drop_lg", input_background_fill="zinc", input_border_color="*secondary_300", input_shadow="*shadow_drop", input_shadow_focus="*shadow_drop_lg", ) custom_theme = BlueTheme() def generate(instruction): prompt = ins.format(question=instruction) response = llm.create_completion(prompt, stream=True, stop=['<|im_start|>user', '<|im_end|>'], repeat_penalty=1.1) result = "" for output in response: result += output['choices'][0]['text'] yield result examples = [ "How do dogs bark?", "Why are apples red?", "How do I make a campfire?", "Why do cats love to chirp at something?" ] def process_example(args): for x in generate(args): pass return x css = ".generating {visibility: hidden}" with Gradio.Blocks(theme=custom_theme, analytics_enabled=False, css=css) as demo: with Gradio.Column(): Gradio.Markdown( """ # ð“„¿ [Corvus] Dolphin-2.6 (Phi-2) Type in the box below and click the button to generate answers to your most pressing questions! """) with Gradio.Tabs(): with Gradio.Tab(label="Prompt"): usr_prompt = Gradio.components.Textbox(placeholder="Why do cats meow?", label="Prompt", info="What things do you want to ask the chat assistant?"); with Gradio.Tab(label="Configuration (disabled)"): top_p = Gradio.components.Slider(value=0.4, maximum=1, interactive=True, label="Top-P (Nucleus sampling)", info="Represents the token diversity threshold for CogniForge."); top_k = Gradio.components.Slider(value=40, maximum=100, interactive=True, label="Top-K sampling", info="Represents the token probability threshold for CogniForge."); temp = Gradio.components.Slider(value=0.7, maximum=2, interactive=True, label="Temperature", info="Represents the creativity threshold for CogniForge."); with Gradio.Group(): output = Gradio.components.Textbox(value="", label="Output", info="Corvus' output"); Gradio.Examples( examples=examples, inputs=[usr_prompt], cache_examples=False, fn=process_example, outputs=[output], ); submit = Gradio.Button("Generate", variant="primary") submit.click(generate, inputs=[usr_prompt], outputs=[output], concurrency_limit=1) demo.launch(debug=True)