Spaces:
Running
Running
import os | |
import json | |
from datetime import datetime | |
from typing import List, Dict | |
import requests | |
from fastapi import FastAPI, HTTPException | |
from fastapi.staticfiles import StaticFiles | |
from fastapi.responses import FileResponse | |
from pydantic import BaseModel | |
import plotly.graph_objs as go | |
from apscheduler.schedulers.asyncio import AsyncIOScheduler | |
from huggingface_hub import AsyncInferenceClient | |
app = FastAPI() | |
# Configuration | |
models = [ | |
"meta-llama/Meta-Llama-3.1-8B-Instruct", | |
"meta-llama/Meta-Llama-3.1-70B-Instruct", | |
"meta-llama/Meta-Llama-3-8B-Instruct", | |
"meta-llama/Meta-Llama-3-70B-Instruct", | |
"meta-llama/Llama-Guard-3-8B", | |
"meta-llama/Llama-2-7b-chat-hf", | |
"meta-llama/Llama-2-13b-chat-hf", | |
"deepseek-ai/DeepSeek-Coder-V2-Instruct", | |
"mistralai/Mistral-7B-Instruct-v0.3", | |
"mistralai/Mixtral-8x7B-Instruct-v0.1", | |
] | |
LOG_FILE = "api_logs.json" | |
CHECK_INTERVAL = 60 # 1 minute | |
client = AsyncInferenceClient(token=os.environ["HF_INFERENCE_API_TOKEN"]) | |
# Ensure log file exists | |
if not os.path.exists(LOG_FILE): | |
with open(LOG_FILE, "w") as f: | |
json.dump([], f) | |
class LogEntry(BaseModel): | |
model: str | |
success: bool | |
timestamp: str | |
status_code: int | |
async def check_apis(): | |
results = [] | |
for model in models: | |
try: | |
response = await client.chat_completion( | |
messages=[{"role": "user", "content": "What is the capital of France?"}], | |
max_tokens=10, | |
) | |
success = response.status_code == 200 | |
except requests.RequestException: | |
success = False | |
results.append(LogEntry( | |
model=model, | |
success=success, | |
timestamp=datetime.now().isoformat(), | |
status_code=response.status_code | |
)) | |
with open(LOG_FILE, "r+") as f: | |
logs = json.load(f) | |
logs.extend([result.dict() for result in results]) | |
f.seek(0) | |
json.dump(logs, f) | |
async def start_scheduler(): | |
scheduler = AsyncIOScheduler() | |
scheduler.add_job(check_apis, 'interval', minutes=1) | |
scheduler.start() | |
async def index(): | |
return FileResponse("static/index.html") | |
async def get_logs(): | |
with open(LOG_FILE, "r") as f: | |
logs = json.load(f) | |
return logs | |
async def get_chart_data(): | |
with open(LOG_FILE, "r") as f: | |
logs = json.load(f) | |
chart_data = {} | |
for log in logs: | |
model = log['model'] | |
if model not in chart_data: | |
chart_data[model] = {'x': [], 'y': []} | |
chart_data[model]['x'].append(log['timestamp']) | |
chart_data[model]['y'].append(1 if log['success'] else 0) | |
return chart_data | |
# Mount the static files directory | |
app.mount("/static", StaticFiles(directory="static"), name="static") | |
if __name__ == "__main__": | |
import uvicorn | |
uvicorn.run(app, host="0.0.0.0", port=7860) |