etownsupport commited on
Commit
0951ee0
1 Parent(s): 4e0c5c9

Upload 5 files

Browse files
Files changed (5) hide show
  1. Dockerfile +27 -0
  2. app.py +10 -0
  3. etown_mxbai/__init__.py +7 -0
  4. etown_mxbai/router.py +70 -0
  5. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.10.9
3
+
4
+ # Set the working directory in the container to /app
5
+ WORKDIR /app
6
+
7
+ # Create a directory for Hugging Face cache and set broad permissions
8
+ RUN mkdir -p /app/hf_cache
9
+ RUN chmod -R 777 /app/hf_cache
10
+
11
+ # Set environment variable for Hugging Face home
12
+ ENV HF_HOME=/app/hf_cache
13
+
14
+ # Copy the requirements file into the container at /app
15
+ COPY ./requirements.txt /app/requirements.txt
16
+
17
+ # Install any needed packages specified in requirements.txt
18
+ RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
19
+
20
+ # Copy the rest of the application into the container at /app
21
+ COPY . /app
22
+
23
+ # Make port 7860 available to the world outside this container
24
+ EXPOSE 7860
25
+
26
+ # Define the command to run the app using uvicorn
27
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from etown_mxbai import app
2
+
3
+ # # etown_mxbai/app.py
4
+ # from fastapi import FastAPI
5
+
6
+ # app = FastAPI()
7
+
8
+ # @app.get("/")
9
+ # async def read_root():
10
+ # return {"message": "Hello World"}
etown_mxbai/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+
3
+ app = FastAPI(title="mixedbread-ai/mxbai-embed-large-v1 embeddings")
4
+
5
+ from etown_mxbai import router
6
+
7
+
etown_mxbai/router.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.responses import JSONResponse
4
+ # from sentence_transformers import SentenceTransformer
5
+ # from sentence_transformers.util import cos_sim
6
+ from typing import List
7
+ import os, platform, time
8
+ from transformers import AutoTokenizer
9
+ import fastembed
10
+ from fastembed import SparseEmbedding, SparseTextEmbedding, TextEmbedding
11
+ import numpy as np
12
+
13
+
14
+ sparse_model_name = "prithvida/Splade_PP_en_v1"
15
+ sparse_model = SparseTextEmbedding(model_name=sparse_model_name, batch_size=32)
16
+
17
+ class Validation(BaseModel):
18
+ prompt: List[str]
19
+
20
+ from etown_mxbai import app
21
+
22
+ app.add_middleware(
23
+ CORSMiddleware,
24
+ allow_origins=["*"],
25
+ allow_credentials=True,
26
+ allow_methods=["*"],
27
+ allow_headers=["*"],
28
+ )
29
+
30
+ @app.post("/api/generate", summary="Generate embeddings", tags=["Generate"])
31
+ def inference(item: Validation):
32
+ try:
33
+ start_time = time.time()
34
+ embeddings = list(sparse_model.embed(item.prompt, batch_size=5)) # Assuming 'model' is defined elsewhere
35
+
36
+ serializable_embeddings = []
37
+ for embedding in embeddings:
38
+ # Assuming embedding object has attributes values and indices
39
+ if isinstance(embedding, SparseEmbedding):
40
+ values = embedding.values
41
+ indices = embedding.indices
42
+ serializable_embeddings.append({
43
+ "values": values.tolist() if isinstance(values, np.ndarray) else values,
44
+ "indices": indices.tolist() if isinstance(indices, np.ndarray) else indices
45
+ })
46
+ else:
47
+ # Fallback for other types of embeddings
48
+ serializable_embeddings.append({
49
+ "values": embedding.tolist() if isinstance(embedding, np.ndarray) else str(embedding),
50
+ "indices": list(range(len(embedding))) if isinstance(embedding, (np.ndarray, list)) else []
51
+ })
52
+
53
+ end_time = time.time()
54
+ time_taken = end_time - start_time # Calculate the time taken
55
+
56
+ return JSONResponse(content={
57
+ "embeddings": serializable_embeddings,
58
+ "time_taken": f"{time_taken:.2f} seconds",
59
+ "Number_of_sentence_processed": len(item.prompt), # Assuming you want to count words, not characters
60
+ "Model_response_space" : "prithvida/Splade_PP_en_v1",
61
+ "status_code" : 200
62
+ })
63
+ except Exception as e:
64
+ print(f"An error occurred: {str(e)}") # Simple print statement for logging; consider using proper logging
65
+ return JSONResponse(content={
66
+ "error": "An error occurred during processing.",
67
+ "details": str(e),
68
+ "Model_response_space" : "prithvida/Splade_PP_en_v1",
69
+ "status_code" : 500
70
+ })
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ requests
4
+ pydantic
5
+ transformers
6
+ fastembed