Spaces:

XufengDuan
/

HumanLikeness

Running

App Files Files Community

HumanLikeness / src /backend /util.py

XufengDuan

update scripts

e157bd5 4 months ago

raw

history blame

4 kB

	def is_summary_valid(summary: str) -> bool:
	"""
	Checks if the summary is valid.

	A summary is valid if it is not empty and contains at least five words.

	Args:
	summary (str): The summary to check.

	Returns:
	bool: True if the summary is valid, False otherwise.
	"""
	if isinstance(summary, str):
	words = summary.split()
	if len(words) >= 5:
	return True
	# print(summary)
	return False


	def create_pairs(df):
	"""
	Creates pairs of source and summary from the dataframe.

	Args:
	df (DataFrame): The dataframe containing source and summary columns.

	Returns:
	list: A list of pairs [source, summary].
	"""
	pairs = []
	for _, row in df.iterrows():
	pairs.append([row['source'], row['summary']])

	return pairs


	# def format_results(model_name: str, revision: str, precision: str,
	# factual_consistency_rate: float, hallucination_rate: float,
	# answer_rate: float, avg_summary_len: float) -> dict:
	# """
	# Formats the evaluation results into a structured dictionary.
	#
	# Args:
	# model_name (str): The name of the evaluated model.
	# revision (str): The revision hash of the model.
	# precision (str): The precision with which the evaluation was run.
	# factual_consistency_rate (float): The factual consistency rate.
	# hallucination_rate (float): The hallucination rate.
	# answer_rate (float): The answer rate.
	# avg_summary_len (float): The average summary length.
	#
	# Returns:
	# dict: A dictionary containing the structured evaluation results.
	# """
	# results = {
	# "config": {
	# "model_dtype": precision, # Precision with which you ran the evaluation
	# "model_name": model_name, # Name of the model
	# "model_sha": revision # Hash of the model
	# },
	# "results": {
	# "hallucination_rate": {
	# "hallucination_rate": round(hallucination_rate,3)
	# },
	# "factual_consistency_rate": {
	# "factual_consistency_rate": round(factual_consistency_rate,1)
	# },
	# "answer_rate": {
	# "answer_rate": round(answer_rate*100,1)
	# },
	# "average_summary_length": {
	# "average_summary_length": round(avg_summary_len,1)
	# },
	# }
	# }
	#
	# return results

	def format_results(model_name: str, revision: str, precision: str, overall_js: float, overall_ci: tuple, **experiment_scores) -> dict:
	"""
	Formats the evaluation results into a structured dictionary.

	Args:
	model_name (str): The name of the evaluated model.
	revision (str): The revision hash of the model.
	precision (str): The precision with which the evaluation was run.
	overall_js (float): The overall average JS divergence.
	overall_ci (tuple): The confidence interval for the overall JS divergence.
	experiment_scores: Experiment-specific scores and confidence intervals (E1, E1_ci, E2, E2_ci, ...).

	Returns:
	dict: A dictionary containing the structured evaluation results.
	"""
	# Initialize the base structure
	results = {
	"config": {
	"model_dtype": precision, # Precision with which you ran the evaluation
	"model_name": model_name, # Name of the model
	"model_sha": revision # Hash of the model
	},
	"results": {
	"overall_js_divergence": overall_js, # Overall JS divergence
	"overall_confidence_interval": overall_ci, # Confidence interval for the overall JS divergence
	}
	}

	# Add experiment-specific results to the dictionary
	for exp_name, score in experiment_scores.items():
	results["results"][exp_name] = score # Add each experiment score and its CI

	return results