Spaces:

alexkueck
/

TestInferenceAPI

Sleeping

App Files Files Community

alexkueck commited on Apr 15

Commit

f179d98

•

1 Parent(s): 5ffd14f

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -25

app.py CHANGED Viewed

@@ -85,31 +85,7 @@ print ("Inf.Client")
 #API_URL = "https://api-inference.huggingface.co/models/argilla/notux-8x7b-v1"
 HEADERS = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"}
-##############################################
-#Inference Endpoint
-##############################################
-endpoint = create_inference_endpoint(
-    "smaug-72b-v0-1-bmw",
-    repository="abacusai/Smaug-72B-v0.1",
-    framework="pytorch",
-    task="text-generation",
-    accelerator="gpu",
-    vendor="aws",
-    region="us-east-1",
-    type="protected",
-    instance_size="medium",
-    instance_type="g5.2xlarge",
-    custom_image={
-        "health_route": "/health",
-        "env": {
-            "MAX_BATCH_PREFILL_TOKENS": "2048",
-            "MAX_INPUT_LENGTH": "1024",
-            "MAX_TOTAL_TOKENS": "1512",
-            "MODEL_ID": "/repository"
-        },
-        "url": "https://ih7lj8onsvp1wbh0.us-east-1.aws.endpoints.huggingface.cloud",
-    },
-)
 ##############################################
 # tokenizer for generating prompt
@@ -376,6 +352,31 @@ def generate(text, history, rag_option, model_option,  k=3, top_p=0.6, temperatu
                 "inputs": prompt,
                 "options": {"max_new_tokens": max_new_tokens},
             }
             response= endpoint.client.text_generation(prompt)   #requests.post(API_URL, headers=HEADERS, json=data)
             if response != None:
                 result = response.json()

 #API_URL = "https://api-inference.huggingface.co/models/argilla/notux-8x7b-v1"
 HEADERS = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"}
 ##############################################
 # tokenizer for generating prompt
                 "inputs": prompt,
                 "options": {"max_new_tokens": max_new_tokens},
             }
+            ##############################################
+            #Inference Endpoint
+            ##############################################
+            endpoint = create_inference_endpoint(
+                "smaug-72b-v0-1-bmw",
+                repository="abacusai/Smaug-72B-v0.1",
+                framework="pytorch",
+                task="text-generation",
+                accelerator="gpu",
+                vendor="aws",
+                region="us-east-1",
+                type="protected",
+                instance_size="medium",
+                instance_type="g5.2xlarge",
+                custom_image={
+                    "health_route": "/health",
+                    "env": {
+                        "MAX_BATCH_PREFILL_TOKENS": "2048",
+                        "MAX_INPUT_LENGTH": "1024",
+                        "MAX_TOTAL_TOKENS": "1512",
+                        "MODEL_ID": "/repository"
+                    },
+                    "url": "https://ih7lj8onsvp1wbh0.us-east-1.aws.endpoints.huggingface.cloud",
+                },
+)
             response= endpoint.client.text_generation(prompt)   #requests.post(API_URL, headers=HEADERS, json=data)
             if response != None:
                 result = response.json()