indonesian-whisperer

Runtime error

App Files Files Community

cahya commited on Mar 18, 2023

Commit

4ac6ada

•

1 Parent(s): e84c607

add bloomz

Browse files

Files changed (3) hide show

Dockerfile +1 -0
app/api.py +19 -7
app/config.json +4 -1

Dockerfile CHANGED Viewed

@@ -8,6 +8,7 @@ RUN apt-get update && apt-get install -y \
     libxmlsec1-dev libffi-dev liblzma-dev git-lfs ffmpeg libsm6 libxext6 cmake  \
     libgl1-mesa-glx curl nginx espeak-ng openssl libssl-dev libbz2-dev \
     libncurses5-dev libreadline-dev \
     && rm -rf /var/lib/apt/lists/* && git lfs install
 RUN wget https://github.com/tsl0922/ttyd/releases/download/1.7.3/ttyd.x86_64 -O /usr/local/bin/ttyd && \

     libxmlsec1-dev libffi-dev liblzma-dev git-lfs ffmpeg libsm6 libxext6 cmake  \
     libgl1-mesa-glx curl nginx espeak-ng openssl libssl-dev libbz2-dev \
     libncurses5-dev libreadline-dev \
+    vim lynx haproxy \
     && rm -rf /var/lib/apt/lists/* && git lfs install
 RUN wget https://github.com/tsl0922/ttyd/releases/download/1.7.3/ttyd.x86_64 -O /usr/local/bin/ttyd && \

app/api.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from fastapi import FastAPI, WebSocket
 from fastapi.responses import HTMLResponse
 from fastapi import Form, Depends, HTTPException, status
-from transformers import pipeline, set_seed, AutoConfig, AutoTokenizer, GPT2LMHeadModel
 import torch
 import os
 import time
@@ -68,7 +68,13 @@ async def websocket_endpoint(websocket: WebSocket):
 @app.post("/api/indochat/v1")
-async def indochat(
         text: str = Form(default="", description="The Prompt"),
         decoding_method: str = Form(default="Sampling", description="Decoding method"),
         min_length: int = Form(default=50, description="Minimal length of the generated text"),
@@ -102,13 +108,13 @@ async def indochat(
         max_penalty = 1.5
         repetition_penalty = max(min_penalty + (1.0 - temperature) * (max_penalty - min_penalty), 0.8)
     prompt = f"User: {text}\nAssistant: "
-    input_ids = tokenizer(prompt, return_tensors='pt').input_ids.to(device)
-    model.eval()
     print("Generating text...")
     print(f"max_length: {max_length}, do_sample: {do_sample}, top_k: {top_k}, top_p: {top_p}, "
           f"temperature: {temperature}, repetition_penalty: {repetition_penalty}, penalty_alpha: {penalty_alpha}")
     time_start = time.time()
-    sample_outputs = model.generate(input_ids,
                                     penalty_alpha=penalty_alpha,
                                     do_sample=do_sample,
                                     num_beams=num_beams,
@@ -134,7 +140,7 @@ def get_text_generator(model_name: str, device: str = "cpu"):
     print(f"hf_auth_token: {hf_auth_token}")
     print(f"Loading model with device: {device}...")
     tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_auth_token)
-    model = GPT2LMHeadModel.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id,
                                             use_auth_token=hf_auth_token)
     model.to(device)
     print("Model loaded")
@@ -147,4 +153,10 @@ def get_config():
 config = get_config()
 device = "cuda" if torch.cuda.is_available() else "cpu"
-model, tokenizer = get_text_generator(model_name=config["model_name"], device=device)

 from fastapi import FastAPI, WebSocket
 from fastapi.responses import HTMLResponse
 from fastapi import Form, Depends, HTTPException, status
+from transformers import pipeline, set_seed, AutoConfig, AutoTokenizer, AutoModelForCausalLM
 import torch
 import os
 import time
 @app.post("/api/indochat/v1")
+async def indochat(**kwargs):
+    return text_generate("indochat-tiny", kwargs)
+@app.post("/api/text-generator/v1")
+async def text_generate(
+        model_name: str = Form(default="", description="The model name"),
         text: str = Form(default="", description="The Prompt"),
         decoding_method: str = Form(default="Sampling", description="Decoding method"),
         min_length: int = Form(default=50, description="Minimal length of the generated text"),
         max_penalty = 1.5
         repetition_penalty = max(min_penalty + (1.0 - temperature) * (max_penalty - min_penalty), 0.8)
     prompt = f"User: {text}\nAssistant: "
+    input_ids = text_generator[model_name]["tokenizer"](prompt, return_tensors='pt').input_ids.to(device)
+    text_generator[model_name]["model"].eval()
     print("Generating text...")
     print(f"max_length: {max_length}, do_sample: {do_sample}, top_k: {top_k}, top_p: {top_p}, "
           f"temperature: {temperature}, repetition_penalty: {repetition_penalty}, penalty_alpha: {penalty_alpha}")
     time_start = time.time()
+    sample_outputs = text_generator[model_name]["model"].generate(input_ids,
                                     penalty_alpha=penalty_alpha,
                                     do_sample=do_sample,
                                     num_beams=num_beams,
     print(f"hf_auth_token: {hf_auth_token}")
     print(f"Loading model with device: {device}...")
     tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_auth_token)
+    model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id,
                                             use_auth_token=hf_auth_token)
     model.to(device)
     print("Model loaded")
 config = get_config()
 device = "cuda" if torch.cuda.is_available() else "cpu"
+text_generator = {}
+for model_name in config["text-generator"]:
+    model, tokenizer = get_text_generator(model_name=config["text-generator"][model_name], device=device)
+    text_generator[model_name] = {
+        "model": model,
+        "tokenizer": tokenizer
+    }

app/config.json CHANGED Viewed

@@ -1,3 +1,6 @@
 {
-  "model_name": "cahya/indochat-tiny"
 }

 {
+  "text-generator": {
+    "indochat-tiny": "cahya/indochat-tiny",
+    "bloomz-1b1-instruct": "cahya/bloomz-1b1-instruct"
+  }
 }