Spaces:
Running
Running
def get_action_prompt(instruction, clickable_infos, width, height, summary_history, action_history, last_summary, last_action, add_info, error_flag, completed_content, memory): | |
prompt = "### Background ###\n" | |
prompt += f"This image is a phone screenshot. Its width is {width} pixels and its height is {height} pixels. The user\'s instruction is: {instruction}.\n\n" | |
prompt += "### Screenshot information ###\n" | |
prompt += "In order to help you better perceive the content in this screenshot, we extract some information on the current screenshot through system files. " | |
prompt += "This information consists of two parts: coordinates; content. " | |
prompt += "The format of the coordinates is [x, y], x is the pixel from left to right and y is the pixel from top to bottom; the content is a text or an icon description respectively. " | |
prompt += "The information is as follow:\n" | |
for clickable_info in clickable_infos: | |
if clickable_info['text'] != "" and clickable_info['text'] != "icon: None" and clickable_info['coordinates'] != (0, 0): | |
prompt += f"{clickable_info['coordinates']}; {clickable_info['text']}\n" | |
prompt += "Please note that this information is not necessarily accurate. You need to combine the screenshot to understand." | |
prompt += "\n\n" | |
if add_info != "": | |
prompt += "### Hint ###\n" | |
prompt += "There are hints to help you complete the user\'s instructions. The hints are as follow:\n" | |
prompt += add_info | |
prompt += "\n\n" | |
if len(action_history) > 0: | |
prompt += "### History operations ###\n" | |
prompt += "Before reaching this page, some operations have been completed. You need to refer to the completed operations to decide the next operation. These operations are as follow:\n" | |
for i in range(len(action_history)): | |
prompt += f"Step-{i+1}: [Operation: " + summary_history[i].split(" to ")[0].strip() + "; Action: " + action_history[i] + "]\n" | |
prompt += "\n" | |
if completed_content != "": | |
prompt += "### Progress ###\n" | |
prompt += "After completing the history operations, you have the following thoughts about the progress of user\'s instruction completion:\n" | |
prompt += "Completed contents:\n" + completed_content + "\n\n" | |
if memory != "": | |
prompt += "### Memory ###\n" | |
prompt += "During the operations, you record the following contents on the screenshot for use in subsequent operations:\n" | |
prompt += "Memory:\n" + memory + "\n" | |
if error_flag: | |
prompt += "### Last operation ###\n" | |
prompt += f"You previously wanted to perform the operation \"{last_summary}\" on this page and executed the Action \"{last_action}\". But you find that this operation does not meet your expectation. You need to reflect and revise your operation this time." | |
prompt += "\n\n" | |
prompt += "### Response requirements ###\n" | |
prompt += "Now you need to combine all of the above to perform just one action on the current page. You must choose one of the six actions below:\n" | |
prompt += "Open app (app name): If the current page is desktop, you can use this action to open the app named \"app name\" on the desktop.\n" | |
prompt += "Tap (x, y): Tap the position (x, y) in current page.\n" | |
prompt += "Swipe (x1, y1), (x2, y2): Swipe from position (x1, y1) to position (x2, y2).\n" | |
prompt += "Type (text): Type the \"text\" in the input box.\n" | |
prompt += "Home: Return to home page.\n" | |
prompt += "Stop: If you think all the requirements of user\'s instruction have been completed and no further operation is required, you can choose this action to terminate the operation process." | |
prompt += "\n\n" | |
prompt += "### Output format ###\n" | |
prompt += "Your output consists of the following three parts:\n" | |
prompt += "### Thought ###\nThink about the requirements that have been completed in previous operations and the requirements that need to be completed in the next one operation.\n" | |
prompt += "### Action ###\nYou can only choose one from the six actions above. Make sure that the coordinates or text in the \"()\".\n" | |
prompt += "### Operation ###\nPlease generate a brief natural language description for the operation in Action based on your Thought." | |
return prompt | |
def get_reflect_prompt(instruction, clickable_infos1, clickable_infos2, width, height, summary, action, add_info): | |
prompt = f"These images are two phone screenshots before and after an operation. Their widths are {width} pixels and their heights are {height} pixels.\n\n" | |
prompt += "In order to help you better perceive the content in this screenshot, we extract some information on the current screenshot through system files. " | |
prompt += "The information consists of two parts, consisting of format: coordinates; content. " | |
prompt += "The format of the coordinates is [x, y], x is the pixel from left to right and y is the pixel from top to bottom; the content is a text or an icon description respectively " | |
prompt += "The keyboard status is whether the keyboard of the current page is activated." | |
prompt += "\n\n" | |
prompt += "### Before the current operation ###\n" | |
prompt += "Screenshot information:\n" | |
for clickable_info in clickable_infos1: | |
if clickable_info['text'] != "" and clickable_info['text'] != "icon: None" and clickable_info['coordinates'] != (0, 0): | |
prompt += f"{clickable_info['coordinates']}; {clickable_info['text']}\n" | |
prompt += "\n" | |
prompt += "### After the current operation ###\n" | |
prompt += "Screenshot information:\n" | |
for clickable_info in clickable_infos2: | |
if clickable_info['text'] != "" and clickable_info['text'] != "icon: None" and clickable_info['coordinates'] != (0, 0): | |
prompt += f"{clickable_info['coordinates']}; {clickable_info['text']}\n" | |
prompt += "\n" | |
prompt += "### Current operation ###\n" | |
prompt += f"The user\'s instruction is: {instruction}. You also need to note the following requirements: {add_info}. In the process of completing the requirements of instruction, an operation is performed on the phone. Below are the details of this operation:\n" | |
prompt += "Operation thought: " + summary.split(" to ")[0].strip() + "\n" | |
prompt += "Operation action: " + action | |
prompt += "\n\n" | |
prompt += "### Response requirements ###\n" | |
prompt += "Now you need to output the following content based on the screenshots before and after the current operation:\n" | |
prompt += "Whether the result of the \"Operation action\" meets your expectation of \"Operation thought\"?\n" | |
prompt += "A: The result of the \"Operation action\" meets my expectation of \"Operation thought\".\n" | |
prompt += "B: The \"Operation action\" results in a wrong page and I need to return to the previous page.\n" | |
prompt += "C: The \"Operation action\" produces no changes." | |
prompt += "\n\n" | |
prompt += "### Output format ###\n" | |
prompt += "Your output format is:\n" | |
prompt += "### Thought ###\nYour thought about the question\n" | |
prompt += "### Answer ###\nA or B or C" | |
return prompt | |
def get_memory_prompt(insight): | |
if insight != "": | |
prompt = "### Important content ###\n" | |
prompt += insight | |
prompt += "\n\n" | |
prompt += "### Response requirements ###\n" | |
prompt += "Please think about whether there is any content closely related to ### Important content ### on the current page? If there is, please output the content. If not, please output \"None\".\n\n" | |
else: | |
prompt = "### Response requirements ###\n" | |
prompt += "Please think about whether there is any content closely related to user\'s instrcution on the current page? If there is, please output the content. If not, please output \"None\".\n\n" | |
prompt += "### Output format ###\n" | |
prompt += "Your output format is:\n" | |
prompt += "### Important content ###\nThe content or None. Please do not repeatedly output the information in ### Memory ###." | |
return prompt | |
def get_process_prompt(instruction, thought_history, summary_history, action_history, completed_content, add_info): | |
prompt = "### Background ###\n" | |
prompt += f"There is an user\'s instruction which is: {instruction}. You are a mobile phone operating assistant and are operating the user\'s mobile phone.\n\n" | |
if add_info != "": | |
prompt += "### Hint ###\n" | |
prompt += "There are hints to help you complete the user\'s instructions. The hints are as follow:\n" | |
prompt += add_info | |
prompt += "\n\n" | |
if len(thought_history) > 1: | |
prompt += "### History operations ###\n" | |
prompt += "To complete the requirements of user\'s instruction, you have performed a series of operations. These operations are as follow:\n" | |
for i in range(len(summary_history)): | |
operation = summary_history[i].split(" to ")[0].strip() | |
prompt += f"Step-{i+1}: [Operation thought: " + operation + "; Operation action: " + action_history[i] + "]\n" | |
prompt += "\n" | |
prompt += "### Progress thinking ###\n" | |
prompt += "After completing the history operations, you have the following thoughts about the progress of user\'s instruction completion:\n" | |
prompt += "Completed contents:\n" + completed_content + "\n\n" | |
prompt += "### Response requirements ###\n" | |
prompt += "Now you need to update the \"Completed contents\". Completed contents is a general summary of the current contents that have been completed based on the ### History operations ###.\n\n" | |
prompt += "### Output format ###\n" | |
prompt += "Your output format is:\n" | |
prompt += "### Completed contents ###\nUpdated Completed contents. Don\'t output the purpose of any operation. Just summarize the contents that have been actually completed in the ### History operations ###." | |
else: | |
prompt += "### Current operation ###\n" | |
prompt += "To complete the requirements of user\'s instruction, you have performed an operation. Your operation thought and action of this operation are as follows:\n" | |
prompt += f"Operation thought: {thought_history[-1]}\n" | |
operation = summary_history[-1].split(" to ")[0].strip() | |
prompt += f"Operation action: {operation}\n\n" | |
prompt += "### Response requirements ###\n" | |
prompt += "Now you need to combine all of the above to generate the \"Completed contents\".\n" | |
prompt += "Completed contents is a general summary of the current contents that have been completed. You need to first focus on the requirements of user\'s instruction, and then summarize the contents that have been completed.\n\n" | |
prompt += "### Output format ###\n" | |
prompt += "Your output format is:\n" | |
prompt += "### Completed contents ###\nGenerated Completed contents. Don\'t output the purpose of any operation. Just summarize the contents that have been actually completed in the ### Current operation ###.\n" | |
prompt += "(Please use English to output)" | |
return prompt |