Yash Sachdeva commited on
Commit
9bf2007
1 Parent(s): e5e2748

quuestion_paper

Browse files
Files changed (2) hide show
  1. Dockerfile +2 -1
  2. question_paper.py +24 -22
Dockerfile CHANGED
@@ -6,8 +6,9 @@ COPY . .
6
  # Set the working directory to /
7
  WORKDIR /
8
 
 
9
  # Install requirements.txt
10
  RUN pip install --no-cache-dir --upgrade -r /requirements.txt
11
 
12
  # Start the FastAPI app on port 7860, the default port expected by Spaces
13
- CMD ["uvicorn", "question_paper:app", "--host", "0.0.0.0", "--port", "7860"]
 
6
  # Set the working directory to /
7
  WORKDIR /
8
 
9
+ RUN pip install transformers
10
  # Install requirements.txt
11
  RUN pip install --no-cache-dir --upgrade -r /requirements.txt
12
 
13
  # Start the FastAPI app on port 7860, the default port expected by Spaces
14
+ CMD ["uvicorn", "question_paper:app", "--host", "0.0.0.0", "--port", "7860"]
question_paper.py CHANGED
@@ -2,33 +2,35 @@ import time
2
  import copy
3
  import asyncio
4
  import requests
 
 
5
 
6
  from fastapi import FastAPI, Request
7
- from llama_cpp import Llama
8
  from sse_starlette import EventSourceResponse
 
 
 
9
  # Load the model
10
- print("Loading model...")
11
- llm = Llama(model_path="./llama-2-13b-chat.ggmlv3.q4_1.bin") # change based on the location of models
12
- print("Model loaded!")
13
 
14
  app = FastAPI()
15
-
16
  @app.get("/llama")
17
- async def llama(request: Request, question:str):
18
- stream = llm(
19
- f"""{question}""",
20
- max_tokens=100,
21
- stop=["\n", " Q:"],
22
- stream=True,
 
 
 
 
 
 
23
  )
24
- async def async_generator():
25
- for item in stream:
26
- yield item
27
- async def server_sent_events():
28
- async for item in async_generator():
29
- if await request.is_disconnected():
30
- break
31
- result = copy.deepcopy(item)
32
- text = result["choices"][0]["text"]
33
- yield {"data": text}
34
- return EventSourceResponse(server_sent_events())
 
2
  import copy
3
  import asyncio
4
  import requests
5
+ import transformers
6
+ import torch
7
 
8
  from fastapi import FastAPI, Request
 
9
  from sse_starlette import EventSourceResponse
10
+
11
+ from transformers import AutoTokenizer
12
+
13
  # Load the model
 
 
 
14
 
15
  app = FastAPI()
16
+ model = "meta-llama/Llama-2-70b"
17
  @app.get("/llama")
18
+ def llama():
19
+ tokenizer = AutoTokenizer.from_pretrained(model)
20
+
21
+ pipeline = transformers.pipeline("text-generation" ,model=model ,torch_dtype=torch.float16 ,device_map="auto" , )
22
+
23
+ sequences = pipeline(
24
+ 'I liked "Breaking Bad" and "Band of Brothers". Do you have any recommendations of other shows I might like?\n',
25
+ do_sample=True,
26
+ top_k=10,
27
+ num_return_sequences=1,
28
+ eos_token_id=tokenizer.eos_token_id,
29
+ max_length=200,
30
  )
31
+
32
+ for seq in sequences:
33
+ print(f"Result: {seq['generated_text']}")
34
+
35
+ return sequences
36
+