Spaces:

etownsupport
/

splade

Runtime error

App Files Files Community

etownsupport commited on Jun 12

Commit

0951ee0

•

1 Parent(s): 4e0c5c9

Upload 5 files

Browse files

Files changed (5) hide show

Dockerfile +27 -0
app.py +10 -0
etown_mxbai/__init__.py +7 -0
etown_mxbai/router.py +70 -0
requirements.txt +6 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,27 @@

+# Use an official Python runtime as a parent image
+FROM python:3.10.9
+# Set the working directory in the container to /app
+WORKDIR /app
+# Create a directory for Hugging Face cache and set broad permissions
+RUN mkdir -p /app/hf_cache
+RUN chmod -R 777 /app/hf_cache
+# Set environment variable for Hugging Face home
+ENV HF_HOME=/app/hf_cache
+# Copy the requirements file into the container at /app
+COPY ./requirements.txt /app/requirements.txt
+# Install any needed packages specified in requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
+# Copy the rest of the application into the container at /app
+COPY . /app
+# Make port 7860 available to the world outside this container
+EXPOSE 7860
+# Define the command to run the app using uvicorn
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from etown_mxbai import app
+# # etown_mxbai/app.py
+# from fastapi import FastAPI
+# app = FastAPI()
+# @app.get("/")
+# async def read_root():
+#     return {"message": "Hello World"}

etown_mxbai/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from fastapi import FastAPI
+app = FastAPI(title="mixedbread-ai/mxbai-embed-large-v1 embeddings")
+from etown_mxbai import router

etown_mxbai/router.py ADDED Viewed

	@@ -0,0 +1,70 @@

+from pydantic import BaseModel
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+# from sentence_transformers import SentenceTransformer
+# from sentence_transformers.util import cos_sim
+from typing import List
+import os, platform, time
+from transformers import AutoTokenizer
+import fastembed
+from fastembed import SparseEmbedding, SparseTextEmbedding, TextEmbedding
+import numpy as np
+sparse_model_name = "prithvida/Splade_PP_en_v1"
+sparse_model = SparseTextEmbedding(model_name=sparse_model_name, batch_size=32)
+class Validation(BaseModel):
+    prompt: List[str]
+from etown_mxbai import app
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.post("/api/generate", summary="Generate embeddings", tags=["Generate"])
+def inference(item: Validation):
+    try:
+        start_time = time.time()
+        embeddings = list(sparse_model.embed(item.prompt, batch_size=5))   # Assuming 'model' is defined elsewhere
+        serializable_embeddings = []
+        for embedding in embeddings:
+            # Assuming embedding object has attributes values and indices
+            if isinstance(embedding, SparseEmbedding):
+                values = embedding.values
+                indices = embedding.indices
+                serializable_embeddings.append({
+                    "values": values.tolist() if isinstance(values, np.ndarray) else values,
+                    "indices": indices.tolist() if isinstance(indices, np.ndarray) else indices
+                })
+            else:
+                # Fallback for other types of embeddings
+                serializable_embeddings.append({
+                    "values": embedding.tolist() if isinstance(embedding, np.ndarray) else str(embedding),
+                    "indices": list(range(len(embedding))) if isinstance(embedding, (np.ndarray, list)) else []
+                })
+        end_time = time.time()
+        time_taken = end_time - start_time  # Calculate the time taken
+        return JSONResponse(content={
+                "embeddings": serializable_embeddings,
+                "time_taken": f"{time_taken:.2f} seconds",
+                "Number_of_sentence_processed": len(item.prompt),  # Assuming you want to count words, not characters
+                "Model_response_space" : "prithvida/Splade_PP_en_v1",
+                "status_code" : 200
+            })
+    except Exception as e:
+        print(f"An error occurred: {str(e)}")  # Simple print statement for logging; consider using proper logging
+        return JSONResponse(content={
+                "error": "An error occurred during processing.",
+                "details": str(e),
+                "Model_response_space" : "prithvida/Splade_PP_en_v1",
+                "status_code" : 500
+            })

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+fastapi
+uvicorn
+requests
+pydantic
+transformers
+fastembed