Spaces:
Running
Running
:gem: [Feature] Add use_cache parameter, and set default temperature to 0.5
Browse files- apis/chat_api.py +6 -1
- networks/message_streamer.py +6 -1
apis/chat_api.py
CHANGED
@@ -88,13 +88,17 @@ class ChatAPIApp:
|
|
88 |
description="(list) Messages",
|
89 |
)
|
90 |
temperature: Union[float, None] = Field(
|
91 |
-
default=0,
|
92 |
description="(float) Temperature",
|
93 |
)
|
94 |
max_tokens: Union[int, None] = Field(
|
95 |
default=-1,
|
96 |
description="(int) Max tokens",
|
97 |
)
|
|
|
|
|
|
|
|
|
98 |
stream: bool = Field(
|
99 |
default=True,
|
100 |
description="(bool) Stream",
|
@@ -113,6 +117,7 @@ class ChatAPIApp:
|
|
113 |
temperature=item.temperature,
|
114 |
max_new_tokens=item.max_tokens,
|
115 |
api_key=api_key,
|
|
|
116 |
)
|
117 |
if item.stream:
|
118 |
event_source_response = EventSourceResponse(
|
|
|
88 |
description="(list) Messages",
|
89 |
)
|
90 |
temperature: Union[float, None] = Field(
|
91 |
+
default=0.5,
|
92 |
description="(float) Temperature",
|
93 |
)
|
94 |
max_tokens: Union[int, None] = Field(
|
95 |
default=-1,
|
96 |
description="(int) Max tokens",
|
97 |
)
|
98 |
+
use_cache: bool = Field(
|
99 |
+
default=False,
|
100 |
+
description="(bool) Use cache",
|
101 |
+
)
|
102 |
stream: bool = Field(
|
103 |
default=True,
|
104 |
description="(bool) Stream",
|
|
|
117 |
temperature=item.temperature,
|
118 |
max_new_tokens=item.max_tokens,
|
119 |
api_key=api_key,
|
120 |
+
use_cache=item.use_cache,
|
121 |
)
|
122 |
if item.stream:
|
123 |
event_source_response = EventSourceResponse(
|
networks/message_streamer.py
CHANGED
@@ -61,9 +61,10 @@ class MessageStreamer:
|
|
61 |
def chat_response(
|
62 |
self,
|
63 |
prompt: str = None,
|
64 |
-
temperature: float = 0,
|
65 |
max_new_tokens: int = None,
|
66 |
api_key: str = None,
|
|
|
67 |
):
|
68 |
# https://huggingface.co/docs/api-inference/detailed_parameters?code=curl
|
69 |
# curl --proxy http://<server>:<port> https://api-inference.huggingface.co/models/<org>/<model_name> -X POST -d '{"inputs":"who are you?","parameters":{"max_new_token":64}}' -H 'Content-Type: application/json' -H 'Authorization: Bearer <HF_TOKEN>'
|
@@ -105,6 +106,7 @@ class MessageStreamer:
|
|
105 |
# huggingface_hub/inference/_text_generation.py:
|
106 |
# class TextGenerationRequest > param `stream`
|
107 |
# https://huggingface.co/docs/text-generation-inference/conceptual/streaming#streaming-with-curl
|
|
|
108 |
self.request_body = {
|
109 |
"inputs": prompt,
|
110 |
"parameters": {
|
@@ -112,6 +114,9 @@ class MessageStreamer:
|
|
112 |
"max_new_tokens": max_new_tokens,
|
113 |
"return_full_text": False,
|
114 |
},
|
|
|
|
|
|
|
115 |
"stream": True,
|
116 |
}
|
117 |
|
|
|
61 |
def chat_response(
|
62 |
self,
|
63 |
prompt: str = None,
|
64 |
+
temperature: float = 0.5,
|
65 |
max_new_tokens: int = None,
|
66 |
api_key: str = None,
|
67 |
+
use_cache: bool = False,
|
68 |
):
|
69 |
# https://huggingface.co/docs/api-inference/detailed_parameters?code=curl
|
70 |
# curl --proxy http://<server>:<port> https://api-inference.huggingface.co/models/<org>/<model_name> -X POST -d '{"inputs":"who are you?","parameters":{"max_new_token":64}}' -H 'Content-Type: application/json' -H 'Authorization: Bearer <HF_TOKEN>'
|
|
|
106 |
# huggingface_hub/inference/_text_generation.py:
|
107 |
# class TextGenerationRequest > param `stream`
|
108 |
# https://huggingface.co/docs/text-generation-inference/conceptual/streaming#streaming-with-curl
|
109 |
+
# https://huggingface.co/docs/api-inference/detailed_parameters#text-generation-task
|
110 |
self.request_body = {
|
111 |
"inputs": prompt,
|
112 |
"parameters": {
|
|
|
114 |
"max_new_tokens": max_new_tokens,
|
115 |
"return_full_text": False,
|
116 |
},
|
117 |
+
"options": {
|
118 |
+
"use_cache": use_cache,
|
119 |
+
},
|
120 |
"stream": True,
|
121 |
}
|
122 |
|