alexkueck commited on
Commit
5ffd14f
1 Parent(s): 453eb35

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -3
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import requests
2
- from huggingface_hub import InferenceClient, login
3
  from transformers import AutoTokenizer
4
  from langchain.chat_models import ChatOpenAI
5
  import os, sys, json
@@ -85,6 +85,32 @@ print ("Inf.Client")
85
  #API_URL = "https://api-inference.huggingface.co/models/argilla/notux-8x7b-v1"
86
  HEADERS = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"}
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  ##############################################
89
  # tokenizer for generating prompt
90
  ##############################################
@@ -315,7 +341,7 @@ def generate(text, history, rag_option, model_option, k=3, top_p=0.6, temperatu
315
  try:
316
  if (model_option == "HF1"):
317
  #Anfrage an InferenceEndpoint1 ----------------------------
318
- API_URL = "https://ih7lj8onsvp1wbh0.us-east-1.aws.endpoints.huggingface.cloud" #"https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
319
  print("HF1")
320
  else:
321
  API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-180B-chat"
@@ -350,7 +376,7 @@ def generate(text, history, rag_option, model_option, k=3, top_p=0.6, temperatu
350
  "inputs": prompt,
351
  "options": {"max_new_tokens": max_new_tokens},
352
  }
353
- response= requests.post(API_URL, headers=HEADERS, json=data)
354
  if response != None:
355
  result = response.json()
356
  print("result:------------------")
 
1
  import requests
2
+ from huggingface_hub import InferenceClient, login, create_inference_endpoint
3
  from transformers import AutoTokenizer
4
  from langchain.chat_models import ChatOpenAI
5
  import os, sys, json
 
85
  #API_URL = "https://api-inference.huggingface.co/models/argilla/notux-8x7b-v1"
86
  HEADERS = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"}
87
 
88
+ ##############################################
89
+ #Inference Endpoint
90
+ ##############################################
91
+ endpoint = create_inference_endpoint(
92
+ "smaug-72b-v0-1-bmw",
93
+ repository="abacusai/Smaug-72B-v0.1",
94
+ framework="pytorch",
95
+ task="text-generation",
96
+ accelerator="gpu",
97
+ vendor="aws",
98
+ region="us-east-1",
99
+ type="protected",
100
+ instance_size="medium",
101
+ instance_type="g5.2xlarge",
102
+ custom_image={
103
+ "health_route": "/health",
104
+ "env": {
105
+ "MAX_BATCH_PREFILL_TOKENS": "2048",
106
+ "MAX_INPUT_LENGTH": "1024",
107
+ "MAX_TOTAL_TOKENS": "1512",
108
+ "MODEL_ID": "/repository"
109
+ },
110
+ "url": "https://ih7lj8onsvp1wbh0.us-east-1.aws.endpoints.huggingface.cloud",
111
+ },
112
+ )
113
+
114
  ##############################################
115
  # tokenizer for generating prompt
116
  ##############################################
 
341
  try:
342
  if (model_option == "HF1"):
343
  #Anfrage an InferenceEndpoint1 ----------------------------
344
+ API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
345
  print("HF1")
346
  else:
347
  API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-180B-chat"
 
376
  "inputs": prompt,
377
  "options": {"max_new_tokens": max_new_tokens},
378
  }
379
+ response= endpoint.client.text_generation(prompt) #requests.post(API_URL, headers=HEADERS, json=data)
380
  if response != None:
381
  result = response.json()
382
  print("result:------------------")