Spaces:
Runtime error
Runtime error
File size: 3,798 Bytes
c85864b 58754bb c85864b ecc07c2 c85864b 58754bb c85864b 58754bb c85864b ecc07c2 c85864b 58754bb c85864b 58754bb ecc07c2 58754bb c85864b 58754bb c85864b ecc07c2 c85864b ecc07c2 58754bb ecc07c2 c85864b ecc07c2 58754bb c85864b 58754bb c85864b b8a51c2 dbb3fca c85864b 58754bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import gradio as gr
import requests
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
from MonsterAPIClient import MClient
from MonsterAPIClient import MODELS_TO_DATAMODEL
client = MClient()
# Available models list
MODELS_TO_SERVE = ['llama2-7b-chat','mpt-7b-instruct','falcon-7b-instruct']
def generate_model_output(model: str, input_text: str, temp: float = 0.98) -> str:
"""
Generate output from a specific model.
Parameters:
model (str): The name of the model.
input_text (str): The input prompt for the model.
temp (float, optional): The temperature value for text generation. Defaults to 0.98.
Returns:
str: The generated output text.
"""
try:
response = client.get_response(model, {
"prompt": input_text,
"temp": temp,
})
output = client.wait_and_get_result(response['process_id'])
return model, output['text']
except Exception as e:
return model, f"Error occurred: {str(e)}"
def generate_output(selected_models: list, input_text: str, temp: float = 0.98,
available_models: list = MODELS_TO_SERVE) -> list:
"""
Generate outputs from selected models using Monster API.
Parameters:
selected_models (list): List of selected model names.
input_text (str): The input prompt for the models.
temp (float, optional): The temperature value for text generation. Defaults to 0.98.
available_models (list, optional): List of available model names. Defaults to global variable.
Returns:
list: List of generated output texts corresponding to each model.
"""
outputs = {}
with ThreadPoolExecutor() as executor:
future_to_model = {executor.submit(generate_model_output, model, input_text, temp): model for model in selected_models}
for future in tqdm(as_completed(future_to_model), total=len(selected_models)):
model, output = future.result()
outputs[model] = output
ret_outputs = []
for model in available_models:
if model not in outputs:
ret_outputs.append("Model not selected!")
else:
ret_outputs.append(outputs[model])
return ret_outputs
output_components = [gr.outputs.Textbox(label=model) for model in MODELS_TO_SERVE]
checkboxes = gr.inputs.CheckboxGroup(MODELS_TO_SERVE, label="Select models to generate outputs:")
textbox = gr.inputs.Textbox(label="Input Prompt")
temp = gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.98, label="Temperature", step=0.01)
input_text = gr.Interface(
fn=generate_output,
inputs=[
checkboxes,
textbox,
temp
],
outputs=output_components,
live=False,
capture_session=True,
title="LLM Evaluation powered by MonsterAPI",
description="""This HuggingFace Space has been designed to help you evaluate the output of LLMs like Llama 2 7B, Falcon-7B and MPT-7B in parallel. These models are hosted on [MonsterAPI](https://monsterapi.ai/?utm_source=llm-evaluation&utm_medium=referral) - An AI infrastructure platform built for easily accessing AI models via scalable APIs and [finetuning LLMs](https://docs.monsterapi.ai/fine-tune-a-large-language-model-llm) at very low cost with our no-code implementation. MonsterAPI is powered by our low cost and highly scalable GPU computing platform - [Q Blocks](https://www.qblocks.cloud?utm_source=llm-evaluation&utm_medium=referral). These LLMs are accessible via scalable REST APIs. Checkout our [API documentation](https://documenter.getpostman.com/view/13759598/2s8ZDVZ3Yi) to integrate them in your AI powered applications.""",
css="body {background-color: black}"
)
# Launch the Gradio app
input_text.launch()
|