Spaces:
Runtime error
Runtime error
File size: 1,059 Bytes
578c6ba d073849 578c6ba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
import spaces
import gradio as gr
from cartesia_pytorch import ReneLMHeadModel
from transformers import AutoTokenizer
#import subprocess
#subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
# Load model and tokenizer
model = ReneLMHeadModel.from_pretrained("cartesia-ai/Rene-v0.1-1.3b-pytorch").half().cuda()
tokenizer = AutoTokenizer.from_pretrained("allenai/OLMo-1B-hf")
# Define the function to generate text
@spaces.GPU(duration=120)
def generate_text(input_text):
inputs = tokenizer([input_text], return_tensors="pt")
outputs = model.generate(inputs.input_ids.cuda(), max_length=50, top_k=100, top_p=0.99)
out_message = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
return out_message
# Create Gradio interface
interface = gr.Interface(
fn=generate_text,
inputs="text",
outputs="text",
title="ReneLM Text Generator",
description="Generate text using ReneLMHeadModel from a prompt."
)
# Launch the Gradio app
interface.launch()
|