Spaces:

auto-academic
/

auto-draft

Running

auto-draft / auto_backgrounds.py

shaocongma

Re-format prompts using Langchain.

c9efba3 over 1 year ago

5.76 kB

	import os.path
	import json
	from utils.references import References
	from utils.file_operations import hash_name, make_archive, copy_templates
	from section_generator import section_generation_bg, keywords_generation, figures_generation, section_generation
	import logging
	import time

	TOTAL_TOKENS = 0
	TOTAL_PROMPTS_TOKENS = 0
	TOTAL_COMPLETION_TOKENS = 0


	def log_usage(usage, generating_target, print_out=True):
	global TOTAL_TOKENS
	global TOTAL_PROMPTS_TOKENS
	global TOTAL_COMPLETION_TOKENS

	prompts_tokens = usage['prompt_tokens']
	completion_tokens = usage['completion_tokens']
	total_tokens = usage['total_tokens']

	TOTAL_TOKENS += total_tokens
	TOTAL_PROMPTS_TOKENS += prompts_tokens
	TOTAL_COMPLETION_TOKENS += completion_tokens

	message = f"For generating {generating_target}, {total_tokens} tokens have been used ({prompts_tokens} for prompts; {completion_tokens} for completion). " \
	f"{TOTAL_TOKENS} tokens have been used in total.\n\n"
	if print_out:
	print(message)
	logging.info(message)

	def _generation_setup(title, description="", template="ICLR2022", model="gpt-4",
	tldr=False, max_kw_refs=4, max_num_refs=10):
	print("Generation setup...")
	paper = {}
	paper_body = {}

	# Create a copy in the outputs folder.
	bibtex_path, destination_folder = copy_templates(template, title)
	logging.basicConfig(level=logging.INFO, filename=os.path.join(destination_folder, "generation.log") )

	# Generate keywords and references
	print("Initialize the paper information ...")
	input_dict = {"title": title, "description": description}
	# keywords, usage = keywords_generation(input_dict, model="gpt-3.5-turbo", max_kw_refs=max_kw_refs)
	keywords, usage = keywords_generation(input_dict) #todo: handle format error here
	print(f"keywords: {keywords}")
	log_usage(usage, "keywords")

	# generate keywords dictionary
	keywords = {keyword:max_kw_refs for keyword in keywords}
	# tmp = {}
	# for keyword in json.loads(keywords):
	# tmp[keyword] = max_kw_refs
	# keywords = tmp
	print(f"keywords: {keywords}")

	ref = References()
	ref.collect_papers(keywords, tldr=tldr)
	# todo: use `all_paper_ids` to check if all citations are in this list
	# in tex_processing, remove all duplicated ids
	# find most relevant papers; max_num_refs
	all_paper_ids = ref.to_bibtex(bibtex_path)

	print(f"The paper information has been initialized. References are saved to {bibtex_path}.")

	paper["title"] = title
	paper["description"] = description
	paper["references"] = ref.to_prompts()
	paper["body"] = paper_body
	paper["bibtex"] = bibtex_path
	return paper, destination_folder, all_paper_ids



	def generate_backgrounds(title, description="", template="ICLR2022", model="gpt-4"):
	paper, destination_folder, _ = _generation_setup(title, description, template, model)

	for section in ["introduction", "related works", "backgrounds"]:
	try:
	usage = section_generation_bg(paper, section, destination_folder, model=model)
	log_usage(usage, section)
	except Exception as e:
	message = f"Failed to generate {section}. {type(e).__name__} was raised: {e}"
	print(message)
	logging.info(message)
	print(f"The paper '{title}' has been generated. Saved to {destination_folder}.")

	input_dict = {"title": title, "description": description, "generator": "generate_backgrounds"}
	filename = hash_name(input_dict) + ".zip"
	return make_archive(destination_folder, filename)


	def fake_generator(title, description="", template="ICLR2022", model="gpt-4"):
	"""
	This function is used to test the whole pipeline without calling OpenAI API.
	"""
	input_dict = {"title": title, "description": description, "generator": "generate_draft"}
	filename = hash_name(input_dict) + ".zip"
	return make_archive("sample-output.pdf", filename)


	def generate_draft(title, description="", template="ICLR2022", model="gpt-4", tldr=True, max_kw_refs=4):
	paper, destination_folder, _ = _generation_setup(title, description, template, model, tldr, max_kw_refs)
	raise
	# todo: `list_of_methods` failed to be generated; find a solution ...
	# print("Generating figures ...")
	# usage = figures_generation(paper, destination_folder, model="gpt-3.5-turbo")
	# log_usage(usage, "figures")

	# for section in ["introduction", "related works", "backgrounds", "methodology", "experiments", "conclusion", "abstract"]:
	for section in ["introduction", "related works", "backgrounds", "methodology", "experiments", "conclusion", "abstract"]:
	max_attempts = 4
	attempts_count = 0
	while attempts_count < max_attempts:
	try:
	usage = section_generation(paper, section, destination_folder, model=model)
	log_usage(usage, section)
	break
	except Exception as e:
	message = f"Failed to generate {section}. {type(e).__name__} was raised: {e}"
	print(message)
	logging.info(message)
	attempts_count += 1
	time.sleep(20)

	input_dict = {"title": title, "description": description, "generator": "generate_draft"}
	filename = hash_name(input_dict) + ".zip"
	return make_archive(destination_folder, filename)


	if __name__ == "__main__":
	import openai
	openai.api_key = os.getenv("OPENAI_API_KEY")

	title = "Using interpretable boosting algorithms for modeling environmental and agricultural data"
	description = ""
	output = generate_draft(title, description, tldr=True, max_kw_refs=10)
	print(output)