litellmlope / litellm /tests /test_async_fn.py
ka1kuk's picture
Upload 235 files
7db0ae4 verified
raw
history blame
8.61 kB
#### What this tests ####
# This tests the the acompletion function #
import sys, os
import pytest
import traceback
import asyncio, logging
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import completion, acompletion, acreate
litellm.num_retries = 3
def test_sync_response_anyscale():
litellm.set_verbose = False
user_message = "Hello, how are you?"
messages = [{"content": user_message, "role": "user"}]
try:
response = completion(
model="anyscale/mistralai/Mistral-7B-Instruct-v0.1",
messages=messages,
timeout=5,
)
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"An exception occurred: {e}")
# test_sync_response_anyscale()
def test_async_response_openai():
import asyncio
litellm.set_verbose = True
async def test_get_response():
user_message = "Hello, how are you?"
messages = [{"content": user_message, "role": "user"}]
try:
response = await acompletion(
model="gpt-3.5-turbo", messages=messages, timeout=5
)
print(f"response: {response}")
print(f"response ms: {response._response_ms}")
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"An exception occurred: {e}")
print(e)
asyncio.run(test_get_response())
# test_async_response_openai()
def test_async_response_azure():
import asyncio
litellm.set_verbose = True
async def test_get_response():
user_message = "What do you know?"
messages = [{"content": user_message, "role": "user"}]
try:
response = await acompletion(
model="azure/gpt-turbo",
messages=messages,
base_url=os.getenv("CLOUDFLARE_AZURE_BASE_URL"),
api_key=os.getenv("AZURE_FRANCE_API_KEY"),
)
print(f"response: {response}")
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"An exception occurred: {e}")
asyncio.run(test_get_response())
# test_async_response_azure()
def test_async_anyscale_response():
import asyncio
litellm.set_verbose = True
async def test_get_response():
user_message = "Hello, how are you?"
messages = [{"content": user_message, "role": "user"}]
try:
response = await acompletion(
model="anyscale/mistralai/Mistral-7B-Instruct-v0.1",
messages=messages,
timeout=5,
)
# response = await response
print(f"response: {response}")
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"An exception occurred: {e}")
asyncio.run(test_get_response())
# test_async_anyscale_response()
@pytest.mark.skip(reason="Flaky test-cloudflare is very unstable")
def test_async_completion_cloudflare():
try:
litellm.set_verbose = True
async def test():
response = await litellm.acompletion(
model="cloudflare/@cf/meta/llama-2-7b-chat-int8",
messages=[{"content": "what llm are you", "role": "user"}],
max_tokens=5,
num_retries=3,
)
print(response)
return response
response = asyncio.run(test())
text_response = response["choices"][0]["message"]["content"]
assert len(text_response) > 1 # more than 1 chars in response
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_async_completion_cloudflare()
@pytest.mark.skip(reason="Flaky test")
def test_get_cloudflare_response_streaming():
import asyncio
async def test_async_call():
user_message = "write a short poem in one sentence"
messages = [{"content": user_message, "role": "user"}]
try:
litellm.set_verbose = False
response = await acompletion(
model="cloudflare/@cf/meta/llama-2-7b-chat-int8",
messages=messages,
stream=True,
num_retries=3, # cloudflare ai workers is EXTREMELY UNSTABLE
)
print(type(response))
import inspect
is_async_generator = inspect.isasyncgen(response)
print(is_async_generator)
output = ""
i = 0
async for chunk in response:
print(chunk)
token = chunk["choices"][0]["delta"].get("content", "")
if token == None:
continue # openai v1.0.0 returns content=None
output += token
assert output is not None, "output cannot be None."
assert isinstance(output, str), "output needs to be of type str"
assert len(output) > 0, "Length of output needs to be greater than 0."
print(f"output: {output}")
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"An exception occurred: {e}")
asyncio.run(test_async_call())
@pytest.mark.asyncio
async def test_hf_completion_tgi():
# litellm.set_verbose=True
try:
response = await acompletion(
model="huggingface/HuggingFaceH4/zephyr-7b-beta",
messages=[{"content": "Hello, how are you?", "role": "user"}],
)
# Add any assertions here to check the response
print(response)
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_get_cloudflare_response_streaming()
def test_get_response_streaming():
import asyncio
async def test_async_call():
user_message = "write a short poem in one sentence"
messages = [{"content": user_message, "role": "user"}]
try:
litellm.set_verbose = True
response = await acompletion(
model="gpt-3.5-turbo", messages=messages, stream=True, timeout=5
)
print(type(response))
import inspect
is_async_generator = inspect.isasyncgen(response)
print(is_async_generator)
output = ""
i = 0
async for chunk in response:
token = chunk["choices"][0]["delta"].get("content", "")
if token == None:
continue # openai v1.0.0 returns content=None
output += token
assert output is not None, "output cannot be None."
assert isinstance(output, str), "output needs to be of type str"
assert len(output) > 0, "Length of output needs to be greater than 0."
print(f"output: {output}")
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"An exception occurred: {e}")
asyncio.run(test_async_call())
# test_get_response_streaming()
def test_get_response_non_openai_streaming():
import asyncio
litellm.set_verbose = True
litellm.num_retries = 0
async def test_async_call():
user_message = "Hello, how are you?"
messages = [{"content": user_message, "role": "user"}]
try:
response = await acompletion(
model="anyscale/mistralai/Mistral-7B-Instruct-v0.1",
messages=messages,
stream=True,
timeout=5,
)
print(type(response))
import inspect
is_async_generator = inspect.isasyncgen(response)
print(is_async_generator)
output = ""
i = 0
async for chunk in response:
token = chunk["choices"][0]["delta"].get("content", None)
if token == None:
continue
print(token)
output += token
print(f"output: {output}")
assert output is not None, "output cannot be None."
assert isinstance(output, str), "output needs to be of type str"
assert len(output) > 0, "Length of output needs to be greater than 0."
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"An exception occurred: {e}")
return response
asyncio.run(test_async_call())
# test_get_response_non_openai_streaming()