Spaces:

akashkj
/

H2OGPT

Runtime error

App Files Files Community

H2OGPT / client /h2ogpt_client /core.py

akashkj

Upload folder using huggingface_hub

3f7cfab over 1 year ago

raw

history blame

12.5 kB

	import asyncio
	import collections
	from typing import Any, Dict, List, Optional, OrderedDict, Tuple

	import gradio_client # type: ignore

	from h2ogpt_client import enums


	class Client:
	def __init__(self, server_url: str, huggingface_token: Optional[str] = None):
	self._client = gradio_client.Client(
	src=server_url, hf_token=huggingface_token, serialize=False, verbose=False
	)
	self._text_completion = TextCompletion(self)
	self._chat_completion = ChatCompletion(self)

	@property
	def text_completion(self) -> "TextCompletion":
	return self._text_completion

	@property
	def chat_completion(self) -> "ChatCompletion":
	return self._chat_completion

	def _predict(self, *args, api_name: str) -> Any:
	return self._client.submit(*args, api_name=api_name).result()

	async def _predict_async(self, *args, api_name: str) -> str:
	return await asyncio.wrap_future(self._client.submit(*args, api_name=api_name))


	class TextCompletion:
	"""Text completion"""

	def __init__(self, client: Client):
	self._client = client

	def create(
	self,
	prompt: str,
	prompt_type: enums.PromptType = enums.PromptType.plain,
	input_context_for_instruction: str = "",
	enable_sampler=False,
	temperature: float = 1.0,
	top_p: float = 1.0,
	top_k: int = 40,
	beams: float = 1.0,
	early_stopping: bool = False,
	min_output_length: int = 0,
	max_output_length: int = 128,
	max_time: int = 180,
	repetition_penalty: float = 1.07,
	number_returns: int = 1,
	system_pre_context: str = "",
	langchain_mode: enums.LangChainMode = enums.LangChainMode.DISABLED,
	) -> str:
	"""
	Creates a new text completion.

	:param prompt: text prompt to generate completions for
	:param prompt_type: type of the prompt
	:param input_context_for_instruction: input context for instruction
	:param enable_sampler: enable or disable the sampler, required for use of
	temperature, top_p, top_k
	:param temperature: What sampling temperature to use, between 0 and 3.
	Lower values will make it more focused and deterministic, but may lead
	to repeat. Higher values will make the output more creative, but may
	lead to hallucinations.
	:param top_p: cumulative probability of tokens to sample from
	:param top_k: number of tokens to sample from
	:param beams: Number of searches for optimal overall probability.
	Higher values uses more GPU memory and compute.
	:param early_stopping: whether to stop early or not in beam search
	:param min_output_length: minimum output length
	:param max_output_length: maximum output length
	:param max_time: maximum time to search optimal output
	:param repetition_penalty: penalty for repetition
	:param number_returns:
	:param system_pre_context: directly pre-appended without prompt processing
	:param langchain_mode: LangChain mode
	:return: response from the model
	"""
	# Not exposed parameters.
	instruction = "" # empty when chat_mode is False
	input = "" # only chat_mode is True
	stream_output = False
	prompt_dict = "" # empty as prompt_type cannot be 'custom'
	chat_mode = False
	langchain_top_k_docs = 4 # number of document chunks; not public
	langchain_enable_chunk = True # whether to chunk documents; not public
	langchain_chunk_size = 512 # chunk size for document chunking; not public
	langchain_document_choice = ["All"]

	return self._client._predict(
	instruction,
	input,
	system_pre_context,
	stream_output,
	prompt_type.value,
	prompt_dict,
	temperature,
	top_p,
	top_k,
	beams,
	max_output_length,
	min_output_length,
	early_stopping,
	max_time,
	repetition_penalty,
	number_returns,
	enable_sampler,
	chat_mode,
	prompt,
	input_context_for_instruction,
	langchain_mode.value,
	langchain_top_k_docs,
	langchain_enable_chunk,
	langchain_chunk_size,
	langchain_document_choice,
	api_name="/submit_nochat",
	)

	async def create_async(
	self,
	prompt: str,
	prompt_type: enums.PromptType = enums.PromptType.plain,
	input_context_for_instruction: str = "",
	enable_sampler=False,
	temperature: float = 1.0,
	top_p: float = 1.0,
	top_k: int = 40,
	beams: float = 1.0,
	early_stopping: bool = False,
	min_output_length: int = 0,
	max_output_length: int = 128,
	max_time: int = 180,
	repetition_penalty: float = 1.07,
	number_returns: int = 1,
	system_pre_context: str = "",
	langchain_mode: enums.LangChainMode = enums.LangChainMode.DISABLED,
	) -> str:
	"""
	Creates a new text completion asynchronously.

	:param prompt: text prompt to generate completions for
	:param prompt_type: type of the prompt
	:param input_context_for_instruction: input context for instruction
	:param enable_sampler: enable or disable the sampler, required for use of
	temperature, top_p, top_k
	:param temperature: What sampling temperature to use, between 0 and 3.
	Lower values will make it more focused and deterministic, but may lead
	to repeat. Higher values will make the output more creative, but may
	lead to hallucinations.
	:param top_p: cumulative probability of tokens to sample from
	:param top_k: number of tokens to sample from
	:param beams: Number of searches for optimal overall probability.
	Higher values uses more GPU memory and compute.
	:param early_stopping: whether to stop early or not in beam search
	:param min_output_length: minimum output length
	:param max_output_length: maximum output length
	:param max_time: maximum time to search optimal output
	:param repetition_penalty: penalty for repetition
	:param number_returns:
	:param system_pre_context: directly pre-appended without prompt processing
	:param langchain_mode: LangChain mode
	:return: response from the model
	"""
	# Not exposed parameters.
	instruction = "" # empty when chat_mode is False
	input = "" # only chat_mode is True
	stream_output = False
	prompt_dict = "" # empty as prompt_type cannot be 'custom'
	chat_mode = False
	langchain_top_k_docs = 4 # number of document chunks; not public
	langchain_enable_chunk = True # whether to chunk documents; not public
	langchain_chunk_size = 512 # chunk size for document chunking; not public
	langchain_document_choice = ["All"] # not public

	return await self._client._predict_async(
	instruction,
	input,
	system_pre_context,
	stream_output,
	prompt_type.value,
	prompt_dict,
	temperature,
	top_p,
	top_k,
	beams,
	max_output_length,
	min_output_length,
	early_stopping,
	max_time,
	repetition_penalty,
	number_returns,
	enable_sampler,
	chat_mode,
	prompt,
	input_context_for_instruction,
	langchain_mode.value,
	langchain_top_k_docs,
	langchain_enable_chunk,
	langchain_chunk_size,
	langchain_document_choice,
	api_name="/submit_nochat",
	)


	class ChatCompletion:
	"""Chat completion"""

	def __init__(self, client: Client):
	self._client = client

	def create(
	self,
	prompt_type: enums.PromptType = enums.PromptType.plain,
	input_context_for_instruction: str = "",
	enable_sampler=False,
	temperature: float = 1.0,
	top_p: float = 1.0,
	top_k: int = 40,
	beams: float = 1.0,
	early_stopping: bool = False,
	min_output_length: int = 0,
	max_output_length: int = 128,
	max_time: int = 180,
	repetition_penalty: float = 1.07,
	number_returns: int = 1,
	system_pre_context: str = "",
	langchain_mode: enums.LangChainMode = enums.LangChainMode.DISABLED,
	) -> "ChatContext":
	"""
	Creates a new text completion asynchronously.

	:param prompt_type: type of the prompt
	:param input_context_for_instruction: input context for instruction
	:param enable_sampler: enable or disable the sampler, required for use of
	temperature, top_p, top_k
	:param temperature: What sampling temperature to use, between 0 and 3.
	Lower values will make it more focused and deterministic, but may lead
	to repeat. Higher values will make the output more creative, but may
	lead to hallucinations.
	:param top_p: cumulative probability of tokens to sample from
	:param top_k: number of tokens to sample from
	:param beams: Number of searches for optimal overall probability.
	Higher values uses more GPU memory and compute.
	:param early_stopping: whether to stop early or not in beam search
	:param min_output_length: minimum output length
	:param max_output_length: maximum output length
	:param max_time: maximum time to search optimal output
	:param repetition_penalty: penalty for repetition
	:param number_returns:
	:param system_pre_context: directly pre-appended without prompt processing
	:param langchain_mode: LangChain mode
	:return: a chat context with given parameters
	"""
	kwargs = collections.OrderedDict(
	instruction=None, # future prompts
	input="", # ??
	system_pre_context=system_pre_context,
	stream_output=False,
	prompt_type=prompt_type.value,
	prompt_dict="", # empty as prompt_type cannot be 'custom'
	temperature=temperature,
	top_p=top_p,
	top_k=top_k,
	beams=beams,
	max_output_length=max_output_length,
	min_output_length=min_output_length,
	early_stopping=early_stopping,
	max_time=max_time,
	repetition_penalty=repetition_penalty,
	number_returns=number_returns,
	enable_sampler=enable_sampler,
	chat_mode=True,
	instruction_nochat="", # empty when chat_mode is True
	input_context_for_instruction=input_context_for_instruction,
	langchain_mode=langchain_mode.value,
	langchain_top_k_docs=4, # number of document chunks; not public
	langchain_enable_chunk=True, # whether to chunk documents; not public
	langchain_chunk_size=512, # chunk size for document chunking; not public
	langchain_document_choice=["All"], # not public
	chatbot=[], # chat history
	)
	return ChatContext(self._client, kwargs)


	class ChatContext:
	""" "Chat context"""

	def __init__(self, client: Client, kwargs: OrderedDict[str, Any]):
	self._client = client
	self._kwargs = kwargs

	def chat(self, prompt: str) -> Dict[str, str]:
	"""
	Chat with the GPT.

	:param prompt: text prompt to generate completions for
	:returns chat reply
	"""
	self._kwargs["instruction"] = prompt
	self._kwargs["chatbot"] += [[prompt, None]]
	response: Tuple[List[List[str]], str] = self._client._predict(
	*self._kwargs.values(), api_name="/instruction_bot"
	)
	self._kwargs["chatbot"][-1][1] = response[0][-1][1]
	return {"user": response[0][-1][0], "gpt": response[0][-1][1]}

	def chat_history(self) -> List[Dict[str, str]]:
	"""Returns the full chat history."""
	return [{"user": i[0], "gpt": i[1]} for i in self._kwargs["chatbot"]]