import firebase_admin from firebase_admin import credentials from firebase_admin import firestore import io from fastapi import FastAPI, File, UploadFile from werkzeug.utils import secure_filename import speech_recognition as sr import subprocess import os import requests import random import pandas as pd from pydub import AudioSegment from datetime import datetime from datetime import date import numpy as np from sklearn.ensemble import RandomForestRegressor import shutil import json # from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline from pydantic import BaseModel from typing import Annotated # from transformers import BertTokenizerFast, EncoderDecoderModel import torch import re from transformers import AutoTokenizer, T5ForConditionalGeneration from fastapi import Form class Query(BaseModel): text: str WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip())) model_name = "JulesBelveze/t5-small-headline-generator" tokenizer = AutoTokenizer.from_pretrained(model_name) model = T5ForConditionalGeneration.from_pretrained(model_name) from fastapi import FastAPI, Request, Depends, UploadFile, File from fastapi.exceptions import HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse # now = datetime.now() # UPLOAD_FOLDER = '/files' # ALLOWED_EXTENSIONS = {'txt', 'pdf', 'png', # 'jpg', 'jpeg', 'gif', 'ogg', 'mp3', 'wav'} app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=['*'], allow_credentials=True, allow_methods=['*'], allow_headers=['*'], ) # cred = credentials.Certificate('key.json') # app1 = firebase_admin.initialize_app(cred) # db = firestore.client() # data_frame = pd.read_csv('data.csv') @app.on_event("startup") async def startup_event(): print("on startup") @app.post("/") async def get_answer(q: Query ): long_text = q.text input_ids = tokenizer( [WHITESPACE_HANDLER(long_text)], return_tensors="pt", padding="max_length", truncation=True, max_length=384 )["input_ids"] output_ids = model.generate( input_ids=input_ids, max_length=84, no_repeat_ngram_size=2, num_beams=4 )[0] summary = tokenizer.decode( output_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False ) return summary return "hello"