Spaces:

yash009
/

textgeneration

Runtime error

Yash Sachdeva commited on Mar 10

Commit

9bf2007

•

1 Parent(s): e5e2748

quuestion_paper

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -6,8 +6,9 @@ COPY . .
 # Set the working directory to /
 WORKDIR /
 # Install requirements.txt
 RUN pip install --no-cache-dir --upgrade -r /requirements.txt
 # Start the FastAPI app on port 7860, the default port expected by Spaces
-CMD ["uvicorn", "question_paper:app", "--host", "0.0.0.0", "--port", "7860"]

 # Set the working directory to /
 WORKDIR /
+RUN pip install transformers
 # Install requirements.txt
 RUN pip install --no-cache-dir --upgrade -r /requirements.txt
 # Start the FastAPI app on port 7860, the default port expected by Spaces
+CMD ["uvicorn", "question_paper:app", "--host", "0.0.0.0", "--port", "7860"]

question_paper.py CHANGED Viewed

@@ -2,33 +2,35 @@ import time
 import copy
 import asyncio
 import requests
 from fastapi import FastAPI, Request
-from llama_cpp import Llama
 from sse_starlette import EventSourceResponse
 # Load the model
-print("Loading model...")
-llm = Llama(model_path="./llama-2-13b-chat.ggmlv3.q4_1.bin") # change based on the location of models
-print("Model loaded!")
 app = FastAPI()
 @app.get("/llama")
-async def llama(request: Request, question:str):
-    stream = llm(
-        f"""{question}""",
-        max_tokens=100,
-        stop=["\n", " Q:"],
-        stream=True,
     )
-    async def async_generator():
-        for item in stream:
-            yield item
-    async def server_sent_events():
-        async for item in async_generator():
-            if await request.is_disconnected():
-                break
-            result = copy.deepcopy(item)
-            text = result["choices"][0]["text"]
-            yield {"data": text}
-    return EventSourceResponse(server_sent_events())

 import copy
 import asyncio
 import requests
+import transformers
+import torch
 from fastapi import FastAPI, Request
 from sse_starlette import EventSourceResponse
+from transformers import AutoTokenizer
 # Load the model
 app = FastAPI()
+model = "meta-llama/Llama-2-70b"
 @app.get("/llama")
+def llama():
+    tokenizer = AutoTokenizer.from_pretrained(model)
+    pipeline = transformers.pipeline("text-generation" ,model=model ,torch_dtype=torch.float16 ,device_map="auto" , )
+    sequences = pipeline(
+        'I liked "Breaking Bad" and "Band of Brothers". Do you have any recommendations of other shows I might like?\n',
+        do_sample=True,
+        top_k=10,
+        num_return_sequences=1,
+        eos_token_id=tokenizer.eos_token_id,
+        max_length=200,
     )
+    for seq in sequences:
+        print(f"Result: {seq['generated_text']}")
+    return sequences