import os import json from datetime import datetime from typing import List, Dict import requests from fastapi import FastAPI, HTTPException from fastapi.staticfiles import StaticFiles from fastapi.responses import FileResponse from pydantic import BaseModel import plotly.graph_objs as go from apscheduler.schedulers.asyncio import AsyncIOScheduler from huggingface_hub import AsyncInferenceClient app = FastAPI() # Configuration models = [ "meta-llama/Meta-Llama-3.1-8B-Instruct", "meta-llama/Meta-Llama-3.1-70B-Instruct", "meta-llama/Meta-Llama-3-8B-Instruct", "meta-llama/Meta-Llama-3-70B-Instruct", "meta-llama/Llama-Guard-3-8B", "meta-llama/Llama-2-7b-chat-hf", "meta-llama/Llama-2-13b-chat-hf", "deepseek-ai/DeepSeek-Coder-V2-Instruct", "mistralai/Mistral-7B-Instruct-v0.3", "mistralai/Mixtral-8x7B-Instruct-v0.1", ] LOG_FILE = "api_logs.json" CHECK_INTERVAL = 60 # 1 minute client = AsyncInferenceClient(token=os.environ["HF_INFERENCE_API_TOKEN"]) # Ensure log file exists if not os.path.exists(LOG_FILE): with open(LOG_FILE, "w") as f: json.dump([], f) class LogEntry(BaseModel): model: str success: bool timestamp: str status_code: int async def check_apis(): results = [] for model in models: try: response = await client.chat_completion( messages=[{"role": "user", "content": "What is the capital of France?"}], max_tokens=10, ) success = response.status_code == 200 except requests.RequestException: success = False results.append(LogEntry( model=model, success=success, timestamp=datetime.now().isoformat(), status_code=response.status_code )) with open(LOG_FILE, "r+") as f: logs = json.load(f) logs.extend([result.dict() for result in results]) f.seek(0) json.dump(logs, f) @app.on_event("startup") async def start_scheduler(): scheduler = AsyncIOScheduler() scheduler.add_job(check_apis, 'interval', minutes=1) scheduler.start() @app.get("/") async def index(): return FileResponse("static/index.html") @app.get("/api/logs", response_model=List[LogEntry]) async def get_logs(): with open(LOG_FILE, "r") as f: logs = json.load(f) return logs @app.get("/api/chart-data", response_model=Dict[str, Dict[str, List]]) async def get_chart_data(): with open(LOG_FILE, "r") as f: logs = json.load(f) chart_data = {} for log in logs: model = log['model'] if model not in chart_data: chart_data[model] = {'x': [], 'y': []} chart_data[model]['x'].append(log['timestamp']) chart_data[model]['y'].append(1 if log['success'] else 0) return chart_data # Mount the static files directory app.mount("/static", StaticFiles(directory="static"), name="static") if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)