Spaces:
Sleeping
Sleeping
# import firebase_admin | |
# from firebase_admin import credentials | |
# from firebase_admin import firestore | |
import io | |
from fastapi import FastAPI, File, UploadFile | |
from werkzeug.utils import secure_filename | |
# import speech_recognition as sr | |
import subprocess | |
import os | |
import requests | |
import random | |
import pandas as pd | |
from pydub import AudioSegment | |
from datetime import datetime | |
from datetime import date | |
import numpy as np | |
# from sklearn.ensemble import RandomForestRegressor | |
import shutil | |
import json | |
# from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline | |
from pydantic import BaseModel | |
from typing import Annotated | |
# from transformers import BertTokenizerFast, EncoderDecoderModel | |
import torch | |
import re | |
# from transformers import AutoTokenizer, T5ForConditionalGeneration | |
from fastapi import Form | |
# from transformers import AutoModelForSequenceClassification | |
# from transformers import TFAutoModelForSequenceClassification | |
# from transformers import AutoTokenizer, AutoConfig | |
import numpy as np | |
import threading | |
import random | |
import string | |
import time | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM,pipeline | |
device = "cpu" | |
tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base") | |
model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base").to(device) | |
def paraphrase( | |
question, | |
num_beams=5, | |
num_beam_groups=5, | |
num_return_sequences=1, | |
repetition_penalty=10.0, | |
diversity_penalty=3.0, | |
no_repeat_ngram_size=2, | |
temperature=0.7, | |
max_length=10000 | |
): | |
input_ids = tokenizer( | |
f'paraphrase: {question}', | |
return_tensors="pt", padding="longest", | |
max_length=max_length, | |
truncation=True, | |
).input_ids | |
outputs = model.generate( | |
input_ids, temperature=temperature, repetition_penalty=repetition_penalty, | |
num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size, | |
num_beams=num_beams, num_beam_groups=num_beam_groups, | |
max_length=max_length, diversity_penalty=diversity_penalty | |
) | |
res = tokenizer.batch_decode(outputs, skip_special_tokens=True) | |
return res | |
class Query(BaseModel): | |
text: str | |
class Query2(BaseModel): | |
text: str | |
host:str | |
from fastapi import FastAPI, Request, Depends, UploadFile, File | |
from fastapi.exceptions import HTTPException | |
from fastapi.middleware.cors import CORSMiddleware | |
from fastapi.responses import JSONResponse | |
# now = datetime.now() | |
# UPLOAD_FOLDER = '/files' | |
# ALLOWED_EXTENSIONS = {'txt', 'pdf', 'png', | |
# 'jpg', 'jpeg', 'gif', 'ogg', 'mp3', 'wav'} | |
app = FastAPI() | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=['*'], | |
allow_credentials=True, | |
allow_methods=['*'], | |
allow_headers=['*'], | |
) | |
# cred = credentials.Certificate('key.json') | |
# app1 = firebase_admin.initialize_app(cred) | |
# db = firestore.client() | |
# data_frame = pd.read_csv('data.csv') | |
async def startup_event(): | |
print("on startup") | |
async def get_answer(q: Query ): | |
text = q.text | |
x= paraphrase(text) | |
return x[0] | |
async def get_answer2(q: Query2 ): | |
text = q.text | |
host= q.host | |
N = 20 | |
res = ''.join(random.choices(string.ascii_uppercase + | |
string.digits, k=N)) | |
res= res+ str(time.time()) | |
id= res | |
t = threading.Thread(target=do_ML, args=(id,text,host)) | |
t.start() | |
return JSONResponse({"id":id}) | |
def do_ML(id:str,text:str,host:str): | |
try: | |
x= paraphrase(text) | |
result=x[0] | |
data={"id":id,"result":result} | |
x=requests.post(host,data= data) | |
print(x.text) | |
except: | |
print("Error occured id="+id) | |