ffreemt
commited on
Commit
•
cde2d91
1
Parent(s):
0e238ed
Update
Browse files- Dockerfile +4 -3
- m3_server.py +3 -1
Dockerfile
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
FROM python:3.10
|
2 |
ENV PIP_ROOT_USER_ACTION=ignore \
|
3 |
TZ=Asia/Shanghai
|
|
|
4 |
|
5 |
WORKDIR /app
|
6 |
COPY . .
|
@@ -20,8 +21,8 @@ RUN pip install --no-cache-dir --upgrade pip && \
|
|
20 |
# CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
21 |
# CMD ["TRANSFORMERS_CACHE=./", "infinity_emb", "--model-name-or-path", "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "--port", "7860"]
|
22 |
|
23 |
-
# x ["sh", "-c", "'FOO=BAR python app.py'"]
|
24 |
# CMD ["python", "m3_server.py"]
|
|
|
|
|
25 |
|
26 |
-
|
27 |
-
CMD ["sh", "-c", "HF_HOME=/tmp/cache", "python", "m3_server.py"]
|
|
|
1 |
FROM python:3.10
|
2 |
ENV PIP_ROOT_USER_ACTION=ignore \
|
3 |
TZ=Asia/Shanghai
|
4 |
+
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
5 |
|
6 |
WORKDIR /app
|
7 |
COPY . .
|
|
|
21 |
# CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
22 |
# CMD ["TRANSFORMERS_CACHE=./", "infinity_emb", "--model-name-or-path", "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "--port", "7860"]
|
23 |
|
|
|
24 |
# CMD ["python", "m3_server.py"]
|
25 |
+
# x ["sh", "-c", "'FOO=BAR python m3_server.py'"]
|
26 |
+
# CMD ["sh", "-c", "HF_HOME=/tmp/cache", "python", "m3_server.py"]
|
27 |
|
28 |
+
CMD ["sh", "start-m3-server.sh"] # OK
|
|
m3_server.py
CHANGED
@@ -14,10 +14,12 @@ from FlagEmbedding import BGEM3FlagModel
|
|
14 |
from pydantic import BaseModel
|
15 |
from starlette.status import HTTP_504_GATEWAY_TIMEOUT
|
16 |
|
|
|
17 |
Path("/tmp/cache").mkdir(exist_ok=True)
|
18 |
os.environ["HF_HOME"] = "/tmp/cache"
|
19 |
os.environ["TRANSFORMERS_CACHE"] = "/tmp/cache"
|
20 |
# does not quite work
|
|
|
21 |
|
22 |
batch_size = 2 # gpu batch_size in order of your available vram
|
23 |
max_request = 10 # max request for future improvements on api calls / gpu batches (for now is pretty basic)
|
@@ -242,5 +244,5 @@ async def rerank(request: RerankRequest):
|
|
242 |
|
243 |
if __name__ == "__main__":
|
244 |
import uvicorn
|
245 |
-
|
246 |
uvicorn.run(app, host="0.0.0.0", port=port)
|
|
|
14 |
from pydantic import BaseModel
|
15 |
from starlette.status import HTTP_504_GATEWAY_TIMEOUT
|
16 |
|
17 |
+
_ = """
|
18 |
Path("/tmp/cache").mkdir(exist_ok=True)
|
19 |
os.environ["HF_HOME"] = "/tmp/cache"
|
20 |
os.environ["TRANSFORMERS_CACHE"] = "/tmp/cache"
|
21 |
# does not quite work
|
22 |
+
# """
|
23 |
|
24 |
batch_size = 2 # gpu batch_size in order of your available vram
|
25 |
max_request = 10 # max request for future improvements on api calls / gpu batches (for now is pretty basic)
|
|
|
244 |
|
245 |
if __name__ == "__main__":
|
246 |
import uvicorn
|
247 |
+
print("started")
|
248 |
uvicorn.run(app, host="0.0.0.0", port=port)
|