""" The terminal interface is just a view. Just handles the very top layer. If you were to build a frontend this would be a way to do it. """ try: import readline except ImportError: pass import os import platform import random import re import subprocess import time from ..core.utils.scan_code import scan_code from ..core.utils.system_debug_info import system_info from ..core.utils.truncate_output import truncate_output from .components.code_block import CodeBlock from .components.message_block import MessageBlock from .magic_commands import handle_magic_command from .utils.check_for_package import check_for_package from .utils.display_markdown_message import display_markdown_message from .utils.display_output import display_output from .utils.find_image_path import find_image_path from .utils.cli_input import cli_input # Add examples to the readline history examples = [ "How many files are on my desktop?", "What time is it in Seattle?", "Make me a simple Pomodoro app.", "Open Chrome and go to YouTube.", "Can you set my system to light mode?", ] random.shuffle(examples) try: for example in examples: readline.add_history(example) except: # If they don't have readline, that's fine pass def terminal_interface(interpreter, message): # Auto run and offline (this.. this isn't right) don't display messages. # Probably worth abstracting this to something like "debug_cli" at some point. if not interpreter.auto_run and not interpreter.offline: interpreter_intro_message = [ "**Open Interpreter** will require approval before running code." ] if interpreter.safe_mode == "ask" or interpreter.safe_mode == "auto": if not check_for_package("semgrep"): interpreter_intro_message.append( f"**Safe Mode**: {interpreter.safe_mode}\n\n>Note: **Safe Mode** requires `semgrep` (`pip install semgrep`)" ) else: interpreter_intro_message.append("Use `interpreter -y` to bypass this.") interpreter_intro_message.append("Press `CTRL-C` to exit.") display_markdown_message("\n\n".join(interpreter_intro_message) + "\n") if message: interactive = False else: interactive = True active_block = None voice_subprocess = None while True: if interactive: ### This is the primary input for Open Interpreter. message = cli_input("> ").strip() if interpreter.multi_line else input("> ").strip() try: # This lets users hit the up arrow key for past messages readline.add_history(message) except: # If the user doesn't have readline (may be the case on windows), that's fine pass if isinstance(message, str): # This is for the terminal interface being used as a CLI — messages are strings. # This won't fire if they're in the python package, display=True, and they passed in an array of messages (for example). if message == "": # Ignore empty messages when user presses enter without typing anything continue if message.startswith("%") and interactive: handle_magic_command(interpreter, message) continue # Many users do this if message.strip() == "interpreter --local": print("Please exit this conversation, then run `interpreter --local`.") continue if message.strip() == "pip install --upgrade open-interpreter": print( "Please exit this conversation, then run `pip install --upgrade open-interpreter`." ) continue if interpreter.llm.supports_vision or interpreter.llm.vision_renderer != None: # Is the input a path to an image? Like they just dragged it into the terminal? image_path = find_image_path(message) ## If we found an image, add it to the message if image_path: # Add the text interpreter's message history interpreter.messages.append( { "role": "user", "type": "message", "content": message, } ) # Pass in the image to interpreter in a moment message = { "role": "user", "type": "image", "format": "path", "content": image_path, } try: for chunk in interpreter.chat(message, display=False, stream=True): yield chunk # Is this for thine eyes? if "recipient" in chunk and chunk["recipient"] != "user": continue if interpreter.verbose: print("Chunk in `terminal_interface`:", chunk) # Comply with PyAutoGUI fail-safe for OS mode # so people can turn it off by moving their mouse to a corner if interpreter.os: if ( chunk.get("format") == "output" and "failsafeexception" in chunk["content"].lower() ): print("Fail-safe triggered (mouse in one of the four corners).") break if "end" in chunk and active_block: active_block.refresh(cursor=False) if chunk["type"] in [ "message", "console", ]: # We don't stop on code's end — code + console output are actually one block. active_block.end() active_block = None # Assistant message blocks if chunk["type"] == "message": if "start" in chunk: active_block = MessageBlock() render_cursor = True if "content" in chunk: active_block.message += chunk["content"] if "end" in chunk and interpreter.os: last_message = interpreter.messages[-1]["content"] # Remove markdown lists and the line above markdown lists lines = last_message.split("\n") i = 0 while i < len(lines): # Match markdown lists starting with hyphen, asterisk or number if re.match(r"^\s*([-*]|\d+\.)\s", lines[i]): del lines[i] if i > 0: del lines[i - 1] i -= 1 else: i += 1 message = "\n".join(lines) # Replace newlines with spaces, escape double quotes and backslashes sanitized_message = ( message.replace("\\", "\\\\") .replace("\n", " ") .replace('"', '\\"') ) # Display notification in OS mode if interpreter.os: interpreter.computer.os.notify(sanitized_message) # Speak message aloud if platform.system() == "Darwin" and interpreter.speak_messages: if voice_subprocess: voice_subprocess.terminate() voice_subprocess = subprocess.Popen( [ "osascript", "-e", f'say "{sanitized_message}" using "Fred"', ] ) else: pass # User isn't on a Mac, so we can't do this. You should tell them something about that when they first set this up. # Or use a universal TTS library. # Assistant code blocks elif chunk["role"] == "assistant" and chunk["type"] == "code": if "start" in chunk: active_block = CodeBlock() active_block.language = chunk["format"] render_cursor = True if "content" in chunk: active_block.code += chunk["content"] # Execution notice if chunk["type"] == "confirmation": if not interpreter.auto_run: # OI is about to execute code. The user wants to approve this # End the active code block so you can run input() below it if active_block: active_block.refresh(cursor=False) active_block.end() active_block = None code_to_run = chunk["content"] language = code_to_run["format"] code = code_to_run["content"] should_scan_code = False if not interpreter.safe_mode == "off": if interpreter.safe_mode == "auto": should_scan_code = True elif interpreter.safe_mode == "ask": response = input( " Would you like to scan this code? (y/n)\n\n " ) print("") # <- Aesthetic choice if response.strip().lower() == "y": should_scan_code = True if should_scan_code: scan_code(code, language, interpreter) response = input( " Would you like to run this code? (y/n)\n\n " ) print("") # <- Aesthetic choice if response.strip().lower() == "y": # Create a new, identical block where the code will actually be run # Conveniently, the chunk includes everything we need to do this: active_block = CodeBlock() active_block.margin_top = False # <- Aesthetic choice active_block.language = language active_block.code = code else: # User declined to run code. interpreter.messages.append( { "role": "user", "type": "message", "content": "I have declined to run this code.", } ) break # Computer can display visual types to user, # Which sometimes creates more computer output (e.g. HTML errors, eventually) if ( chunk["role"] == "computer" and "content" in chunk and ( chunk["type"] == "image" or ("format" in chunk and chunk["format"] == "html") or ("format" in chunk and chunk["format"] == "javascript") ) ): if interpreter.os and interpreter.verbose == False: # We don't display things to the user in OS control mode, since we use vision to communicate the screen to the LLM so much. # But if verbose is true, we do display it! continue # Display and give extra output back to the LLM extra_computer_output = display_output(chunk) # We're going to just add it to the messages directly, not changing `recipient` here. # Mind you, the way we're doing this, this would make it appear to the user if they look at their conversation history, # because we're not adding "recipient: assistant" to this block. But this is a good simple solution IMO. # we just might want to change it in the future, once we're sure that a bunch of adjacent type:console blocks will be rendered normally to text-only LLMs # and that if we made a new block here with "recipient: assistant" it wouldn't add new console outputs to that block (thus hiding them from the user) if ( interpreter.messages[-1].get("format") != "output" or interpreter.messages[-1]["role"] != "computer" or interpreter.messages[-1]["type"] != "console" ): # If the last message isn't a console output, make a new block interpreter.messages.append( { "role": "computer", "type": "console", "format": "output", "content": extra_computer_output, } ) else: # If the last message is a console output, simply append the extra output to it interpreter.messages[-1]["content"] += ( "\n" + extra_computer_output ) interpreter.messages[-1]["content"] = interpreter.messages[-1][ "content" ].strip() # Console if chunk["type"] == "console": render_cursor = False if "format" in chunk and chunk["format"] == "output": active_block.output += "\n" + chunk["content"] active_block.output = ( active_block.output.strip() ) # ^ Aesthetic choice # Truncate output active_block.output = truncate_output( active_block.output, interpreter.max_output ) if "format" in chunk and chunk["format"] == "active_line": active_block.active_line = chunk["content"] # Display action notifications if we're in OS mode if interpreter.os and active_block.active_line != None: action = "" code_lines = active_block.code.split("\n") if active_block.active_line < len(code_lines): action = code_lines[active_block.active_line].strip() if action.startswith("computer"): description = None # Extract arguments from the action start_index = action.find("(") end_index = action.rfind(")") if start_index != -1 and end_index != -1: # (If we found both) arguments = action[start_index + 1 : end_index] else: arguments = None # NOTE: Do not put the text you're clicking on screen # (unless we figure out how to do this AFTER taking the screenshot) # otherwise it will try to click this notification! if any(action.startswith(text) for text in [ "computer.screenshot", "computer.display.screenshot", "computer.display.view", "computer.view" ]): description = "Viewing screen..." elif action == "computer.mouse.click()": description = "Clicking..." elif action.startswith("computer.mouse.click("): if "icon=" in arguments: text_or_icon = "icon" else: text_or_icon = "text" description = f"Clicking {text_or_icon}..." elif action.startswith("computer.mouse.move("): if "icon=" in arguments: text_or_icon = "icon" else: text_or_icon = "text" if ( "click" in active_block.code ): # This could be better description = f"Clicking {text_or_icon}..." else: description = f"Mousing over {text_or_icon}..." elif action.startswith("computer.keyboard.write("): description = f"Typing {arguments}." elif action.startswith("computer.keyboard.hotkey("): description = f"Pressing {arguments}." elif action.startswith("computer.keyboard.press("): description = f"Pressing {arguments}." elif action == "computer.os.get_selected_text()": description = f"Getting selected text." if description: interpreter.computer.os.notify(description) if "start" in chunk: # We need to make a code block if we pushed out an HTML block first, which would have closed our code block. if not isinstance(active_block, CodeBlock): if active_block: active_block.end() active_block = CodeBlock() if active_block: active_block.refresh(cursor=render_cursor) # (Sometimes -- like if they CTRL-C quickly -- active_block is still None here) if "active_block" in locals(): if active_block: active_block.end() active_block = None time.sleep(0.1) if not interactive: # Don't loop break except KeyboardInterrupt: # Exit gracefully if "active_block" in locals() and active_block: active_block.end() active_block = None if interactive: # (this cancels LLM, returns to the interactive "> " input) continue else: break except: if interpreter.debug: system_info(interpreter) raise