diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index 7b95401dc46245ac339fc25059d4a56d90b4cde5..99d7cf2acb6a9b2d0f503f78941376e63c0c8467 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,276 @@ ---- -license: apache-2.0 ---- +# ISOPro: Pro Tools for Intelligent Simulation Orchestration for Large Language Models + +ISOPRO is a powerful and flexible Python package designed for creating, managing, and analyzing simulations involving Large Language Models (LLMs). It provides a comprehensive suite of tools for reinforcement learning, conversation simulations, adversarial testing, custom environment creation, and advanced orchestration of multi-agent systems. + +## Features + +- **Custom Environment Creation**: Easily create and manage custom simulation environments for LLMs +- **Conversation Simulation**: Simulate and analyze conversations with AI agents using various user personas +- **Adversarial Testing**: Conduct adversarial simulations to test the robustness of LLM-based systems +- **Reinforcement Learning**: Implement and experiment with RL algorithms in LLM contexts +- **Workflow Automation**: Learn and replicate UI workflows from video demonstrations +- **Car Environment Simulation**: Train and evaluate RL agents in driving scenarios +- **Utility Functions**: Analyze simulation results, calculate LLM metrics, and more +- **Flexible Integration**: Works with popular LLM platforms like OpenAI's GPT models, Claude (Anthropic), and Hugging Face models +- **Orchestration Simulation**: Manage and execute complex multi-agent simulations with different execution modes + +## Installation + +You can install isopro using pip: + +```bash +pip install isopro +``` + +For workflow simulation features, ensure you have the required dependencies: + +```bash +pip install opencv-python numpy torch stable-baselines3 gymnasium tqdm +``` + +If you plan to use Claude capabilities: + +```bash +export ANTHROPIC_API_KEY=your_api_key_here +``` + +## Usage + +### Adversarial Simulation + +Test the robustness of AI models against adversarial attacks. + +```python +from isopro.adversarial_simulation import AdversarialSimulator, AdversarialEnvironment +from isopro.agents.ai_agent import AI_Agent +import anthropic + +class ClaudeAgent(AI_Agent): + def __init__(self, name): + super().__init__(name) + self.client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) + + def run(self, input_data): + response = self.client.messages.create( + model="claude-3-opus-20240229", + max_tokens=100, + messages=[{"role": "user", "content": input_data['text']}] + ) + return response.content[0].text + +# Create the AdversarialEnvironment +adv_env = AdversarialEnvironment( + agent_wrapper=ClaudeAgent("Claude Agent"), + num_adversarial_agents=2, + attack_types=["textbugger", "deepwordbug"], + attack_targets=["input", "output"] +) + +# Set up the adversarial simulator +simulator = AdversarialSimulator(adv_env) + +# Run the simulation +input_data = ["What is the capital of France?", "How does photosynthesis work?"] +simulation_results = simulator.run_simulation(input_data, num_steps=1) +``` + +### Conversation Simulation + +Simulate conversations between an AI assistant and various user personas. + +```python +from isopro.conversation_simulation.conversation_simulator import ConversationSimulator + +# Initialize the ConversationSimulator +simulator = ConversationSimulator( + ai_prompt="You are an AI assistant created to be helpful, harmless, and honest. You are a customer service agent for a tech company. Respond politely and professionally." +) + +# Run a simulation with a predefined persona +conversation_history = simulator.run_simulation("upset", num_turns=3) + +# Run a simulation with a custom persona +custom_persona = { + "name": "Techie Customer", + "characteristics": ["tech-savvy", "impatient", "detail-oriented"], + "message_templates": [ + "I've tried rebooting my device, but the error persists. Can you help?", + "What's the latest update on the cloud service outage?", + "I need specifics on the API rate limits for the enterprise plan." + ] +} + +custom_conversation = simulator.run_custom_simulation(**custom_persona, num_turns=3) +``` + +### Workflow Simulation + +Automate UI workflows by learning from video demonstrations. + +```python +from isopro.workflow_simulation import WorkflowAutomation, AgentConfig + +# Basic workflow automation +automation = WorkflowAutomation( + video="path/to/workflow.mp4", + config="config.json", + output="output_dir", + logs="logs_dir" +) +automation.run() + +# Advanced configuration +agent_config = AgentConfig( + learning_rate=3e-4, + pretrain_epochs=10, + use_demonstration=True, + use_reasoning=True +) + +simulator = WorkflowSimulator( + video_path="path/to/video.mp4", + agent_config=agent_config, + viz_config=visualization_config, + validation_config=validation_config, + output_dir="output" +) + +training_results = simulator.train_agents() +evaluation_results = simulator.evaluate_agents() +``` + +### Car Reinforcement Learning + +Train and evaluate RL agents in driving scenarios. + +```python +from isopro.car_simulation import CarRLEnvironment, LLMCarRLWrapper, CarVisualization + +# Create the car environment with LLM integration +env = CarRLEnvironment() +llm_env = LLMCarRLWrapper(env) + +# Initialize visualization +viz = CarVisualization(env) + +# Train and visualize +observation = llm_env.reset() +for step in range(1000): + action = llm_env.get_action(observation) + observation, reward, done, info = llm_env.step(action) + viz.render(observation) + + if done: + observation = llm_env.reset() +``` + +### Reinforcement Learning with LLM + +Integrate Large Language Models with reinforcement learning environments. + +```python +import gymnasium as gym +from isopro.rl.rl_agent import RLAgent +from isopro.rl.rl_environment import LLMRLEnvironment +from stable_baselines3 import PPO +from isopro.rl.llm_cartpole_wrapper import LLMCartPoleWrapper + +agent_prompt = """You are an AI trained to play the CartPole game. +Your goal is to balance a pole on a moving cart for as long as possible. +You will receive observations about the cart's position, velocity, pole angle, and angular velocity. +Based on these, you should decide whether to move the cart left or right.""" + +env = LLMCartPoleWrapper(agent_prompt, llm_call_limit=100, api_key=os.getenv("ANTHROPIC_API_KEY")) +rl_agent = RLAgent("LLM_CartPole_Agent", env, algorithm='PPO') + +# Train the model +model.learn(total_timesteps=2) + +# Test the model +obs, _ = env.reset() +for _ in range(1000): + action, _ = model.predict(obs, deterministic=True) + obs, reward, done, _, _ = env.step(action) + if done: + obs, _ = env.reset() +``` + +### AI Orchestration + +Orchestrate multiple AI agents to work together on complex tasks. + +```python +from isopro.orchestration_simulation import OrchestrationEnv +from isopro.orchestration_simulation.components import LLaMAAgent, AnalysisAgent, WritingAgent +from isopro.orchestration_simulation.evaluator import Evaluator + +# Create the orchestration environment +env = OrchestrationEnv() + +# Add agents to the environment +env.add_component(LLaMAAgent("Research", "conduct thorough research on the impact of artificial intelligence on job markets")) +env.add_component(AnalysisAgent("Analysis")) +env.add_component(WritingAgent("Writing")) + +# Define the task +task = "Prepare a comprehensive report on the impact of artificial intelligence on job markets in the next decade." + +# Run simulations in different modes +modes = ['parallel', 'sequence', 'node'] +results = {} + +for mode in modes: + result = env.run_simulation(mode=mode, input_data={'task': task, 'run_order': 'first'}) + results[mode] = result + +# Evaluate the results +evaluator = Evaluator() +best_mode = evaluator.evaluate(results) +print(f"The best execution mode for this task was: {best_mode}") +``` + +## Documentation + +For more detailed information on each module and its usage, please refer to the [full documentation](https://isopro.readthedocs.io). + +## Examples + +The [isopro examples](https://github.com/iso-ai/isopro_examples) repository contains Jupyter notebooks with detailed examples: + +- `adversarial_example.ipynb`: Demonstrates adversarial testing of language models +- `conversation_simulation_example.ipynb`: Shows how to simulate conversations with various user personas +- `workflow_automation_example.ipynb`: Illustrates automated UI workflow learning +- `car_rl_example.ipynb`: Demonstrates car environment training scenarios +- `run_cartpole_example.ipynb`: Illustrates the integration of LLMs with reinforcement learning +- `orchestrator_example.ipynb`: Provides a tutorial on using the AI orchestration capabilities + +## Contributing + +We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for more details. + +## License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. + +## Support + +If you encounter any problems or have any questions, please [open an issue](https://github.com/iso-ai/isopro/issues) on our GitHub repository. + +## Citation + +If you use ISOPRO in your research, please cite it as follows: + +``` +@software{isopro2024, + author = {Jazmia Henry}, + title = {ISOPRO: Intelligent Simulation Orchestration for Large Language Models}, + year = {2024}, + publisher = {GitHub}, + journal = {GitHub repository}, + howpublished = {\url{https://github.com/iso-ai/isopro}} +} +``` + +## Contact + +For questions or support, please open an issue on our [GitHub issue tracker](https://github.com/iso-ai/isopro/issues). diff --git a/isopro/.DS_Store b/isopro/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..68c60c3fa4ce9bf39d0e5cf667b22f80a904f213 Binary files /dev/null and b/isopro/.DS_Store differ diff --git a/isopro/__init__.py b/isopro/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..353b9b8db901d1a2b4c5405a3880357c3344a7ad --- /dev/null +++ b/isopro/__init__.py @@ -0,0 +1,84 @@ +# isopro/__init__.py + +""" +isopro: Intelligent Simulation Orchestration for LLMs + +This package provides tools for creating, managing, and analyzing simulations +involving Large Language Models (LLMs), including reinforcement learning, +conversation simulations, and adversarial testing. +""" + +__version__ = "0.1.5" + +# Core components +from .environments.simulation_environment import SimulationEnvironment +from .environments.custom_environment import CustomEnvironment +from .environments.llm_orchestrator import LLMOrchestrator +from .agents.ai_agent import AI_Agent +from .base.base_component import BaseComponent +from .wrappers.simulation_wrapper import SimulationWrapper +from .rl.rl_environment import BaseRLEnvironment +from .rl.rl_agent import RLAgent +from .conversation_simulation import ConversationSimulator, ConversationEnvironment, ConversationAgent +from .adversarial_simulation import AdversarialSimulator, AdversarialEnvironment, AdversarialAgent +from .orchestration_simulation import LLaMAAgent, SubAgent, OrchestrationEnv, AI_AgentException, ComponentException, AI_Agent + +# Workflow simulation components +from .workflow_simulation import ( + WorkflowSimulator, + WorkflowEnvironment, + WorkflowState, + UIElement, + UIElementDetector, + MotionDetector, + EpisodeMetrics, + AgentConfig, + VisualizationConfig, + ValidationConfig, + WorkflowAutomation +) + +# Car RL components +from .car_simulator import CarRLEnvironment, LLMCarRLWrapper, CarVisualization + +__all__ = [ + # Core components + "LLaMAAgent", + "SubAgent", + "OrchestrationEnv", + "AI_AgentException", + "ComponentException", + "AI_Agent", + "SimulationEnvironment", + "CustomEnvironment", + "LLMOrchestrator", + "AI_Agent", + "BaseComponent", + "SimulationWrapper", + "BaseRLEnvironment", + "RLAgent", + "ConversationSimulator", + "ConversationEnvironment", + "ConversationAgent", + "AdversarialSimulator", + "AdversarialEnvironment", + "AdversarialAgent", + + # Workflow components + "WorkflowSimulator", + "WorkflowEnvironment", + "WorkflowState", + "UIElement", + "UIElementDetector", + "MotionDetector", + "EpisodeMetrics", + "AgentConfig", + "VisualizationConfig", + "ValidationConfig", + "WorkflowAutomation", + + # Car RL components + "CarRLEnvironment", + "LLMCarRLWrapper", + "CarVisualization" +] \ No newline at end of file diff --git a/isopro/__pycache__/__init__.cpython-38.pyc b/isopro/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2de508a515275dc4d85fe36d62b7430311392308 Binary files /dev/null and b/isopro/__pycache__/__init__.cpython-38.pyc differ diff --git a/isopro/adversarial_simulation/__init__.py b/isopro/adversarial_simulation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..81fe956e098985a572f141c40ef776f3578a9328 --- /dev/null +++ b/isopro/adversarial_simulation/__init__.py @@ -0,0 +1,18 @@ +""" +Adversarial Simulation Module + +This module provides tools for simulating adversarial attacks on AI models. +""" + +from .adversarial_environment import AdversarialEnvironment +from .adversarial_agent import AdversarialAgent +from .adversarial_simulator import AdversarialSimulator +from .attack_utils import get_available_attacks, create_attack + +__all__ = [ + "AdversarialEnvironment", + "AdversarialAgent", + "AdversarialSimulator", + "get_available_attacks", + "create_attack", +] \ No newline at end of file diff --git a/isopro/adversarial_simulation/__pycache__/__init__.cpython-38.pyc b/isopro/adversarial_simulation/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..09cdf30e4ab6c6bb637450cd855db749ad8126af Binary files /dev/null and b/isopro/adversarial_simulation/__pycache__/__init__.cpython-38.pyc differ diff --git a/isopro/adversarial_simulation/__pycache__/adversarial_agent.cpython-38.pyc b/isopro/adversarial_simulation/__pycache__/adversarial_agent.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e740f29ca300830f8b0d1d4c2e84a504036b9ce9 Binary files /dev/null and b/isopro/adversarial_simulation/__pycache__/adversarial_agent.cpython-38.pyc differ diff --git a/isopro/adversarial_simulation/__pycache__/adversarial_environment.cpython-38.pyc b/isopro/adversarial_simulation/__pycache__/adversarial_environment.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fd904886c06f5e25b00a4177217c45379e16fab7 Binary files /dev/null and b/isopro/adversarial_simulation/__pycache__/adversarial_environment.cpython-38.pyc differ diff --git a/isopro/adversarial_simulation/__pycache__/adversarial_envrionment.cpython-38.pyc b/isopro/adversarial_simulation/__pycache__/adversarial_envrionment.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..90eafd0780d1cffa7afb0f428668a2bb54c426af Binary files /dev/null and b/isopro/adversarial_simulation/__pycache__/adversarial_envrionment.cpython-38.pyc differ diff --git a/isopro/adversarial_simulation/__pycache__/adversarial_simulator.cpython-38.pyc b/isopro/adversarial_simulation/__pycache__/adversarial_simulator.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..78fb92f4f537c8e093fcf3a26f2c530bc51f75e0 Binary files /dev/null and b/isopro/adversarial_simulation/__pycache__/adversarial_simulator.cpython-38.pyc differ diff --git a/isopro/adversarial_simulation/__pycache__/attack_utils.cpython-38.pyc b/isopro/adversarial_simulation/__pycache__/attack_utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d7c00223061934ab0b14a45dbb20d3bc7dfae46c Binary files /dev/null and b/isopro/adversarial_simulation/__pycache__/attack_utils.cpython-38.pyc differ diff --git a/isopro/adversarial_simulation/adversarial_agent.py b/isopro/adversarial_simulation/adversarial_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..f5d41b5cd1e9c2363e5e8b74ecedef3e4fc8f49d --- /dev/null +++ b/isopro/adversarial_simulation/adversarial_agent.py @@ -0,0 +1,51 @@ +""" +Adversarial Agent + +This module defines the AdversarialAgent class, which can apply various attacks to input or output text. +""" + +from typing import Dict, Any +from isopro.agents.ai_agent import AI_Agent +import logging + +logger = logging.getLogger(__name__) + +class AdversarialAgent(AI_Agent): + def __init__(self, name: str, attack, target: str = "input"): + """ + Initialize the AdversarialAgent. + + Args: + name (str): The name of the agent. + attack (callable): The attack function to apply. + target (str): The target of the attack, either "input" or "output". + """ + super().__init__(name) + self.attack = attack + self.target = target + logger.info(f"Initialized AdversarialAgent '{name}' targeting {target}") + + def run(self, input_data: Dict[str, Any]) -> Dict[str, Any]: + """ + Apply the adversarial attack to the input or output data. + + Args: + input_data (Dict[str, Any]): The input data containing 'text' and 'output' keys. + + Returns: + Dict[str, Any]: The perturbed data. + """ + logger.info(f"Running adversarial agent: {self.name}") + if self.target == "input": + if input_data.get('text'): + input_data['text'] = self.attack(input_data['text']) + else: + logger.warning("Input text is empty or missing. Skipping attack.") + elif self.target == "output": + if input_data.get('output'): + input_data['output'] = self.attack(input_data['output']) + else: + logger.warning("Output text is empty or missing. Skipping attack.") + else: + raise ValueError(f"Invalid target: {self.target}") + return input_data \ No newline at end of file diff --git a/isopro/adversarial_simulation/adversarial_environment.py b/isopro/adversarial_simulation/adversarial_environment.py new file mode 100644 index 0000000000000000000000000000000000000000..f8540db3d28f76377069cfa75ebd5f155f6d6672 --- /dev/null +++ b/isopro/adversarial_simulation/adversarial_environment.py @@ -0,0 +1,81 @@ +""" +Adversarial Environment + +This module defines the AdversarialEnvironment class, which manages adversarial agents and applies attacks to the simulation state. +""" + +import random +from typing import List, Dict, Any +from isopro.environments.simulation_environment import SimulationEnvironment +from .adversarial_agent import AdversarialAgent +from .attack_utils import get_model_and_tokenizer, create_attack, get_available_attacks +import logging + +logger = logging.getLogger(__name__) + +class AdversarialEnvironment(SimulationEnvironment): + def __init__(self, agent_wrapper, num_adversarial_agents: int = 1, attack_types: List[str] = None, attack_targets: List[str] = None): + """ + Initialize the AdversarialEnvironment. + + Args: + agent_wrapper: The wrapped agent to pass the adversarially modified state to. + num_adversarial_agents (int): The number of adversarial agents to create. + attack_types (List[str], optional): The types of attacks to use. If None, all available attacks will be used. + attack_targets (List[str], optional): The targets for the attacks ("input", "output", or both). If None, both will be used. + """ + super().__init__() + self.agent_wrapper = agent_wrapper + self.num_adversarial_agents = num_adversarial_agents + self.attack_types = attack_types or get_available_attacks() + self.attack_targets = attack_targets or ["input", "output"] + self.model, self.tokenizer = get_model_and_tokenizer() + self._create_adversarial_agents() + logger.info(f"Initialized AdversarialEnvironment with {num_adversarial_agents} agents") + + def _create_adversarial_agents(self): + """Create adversarial agents with random attack types and targets.""" + for i in range(self.num_adversarial_agents): + attack_type = random.choice(self.attack_types) + attack_target = random.choice(self.attack_targets) + attack = create_attack(attack_type, self.model, self.tokenizer) + agent = AdversarialAgent(name=f"Adversarial Agent {i+1} ({attack_type}, {attack_target})", attack=attack, target=attack_target) + self.add_agent(agent) + logger.info(f"Created {self.num_adversarial_agents} adversarial agents") + + def step(self, sim_state: Dict[str, Any]) -> Dict[str, Any]: + """ + Apply adversarial attacks and step the environment. + + Args: + sim_state (Dict[str, Any]): The current simulation state. + + Returns: + Dict[str, Any]: The updated simulation state after applying attacks and stepping the wrapped agent. + """ + # Apply adversarial attacks + for agent in self.agents: + sim_state = agent.run(sim_state) + + # Pass the adversarially modified state to the wrapped agent + return self.agent_wrapper.step(sim_state) + + def reset(self): + """Reset the environment and recreate adversarial agents.""" + super().reset() + self._create_adversarial_agents() + logger.info("Reset AdversarialEnvironment and recreated agents") + + def get_attack_distribution(self) -> Dict[str, int]: + """ + Get the distribution of attack types and targets among the adversarial agents. + + Returns: + Dict[str, int]: A dictionary containing the count of each attack type and target. + """ + attack_counts = {f"{attack_type}_{target}": 0 for attack_type in self.attack_types for target in self.attack_targets} + for agent in self.agents: + attack_type, target = agent.name.split('(')[-1].split(')')[0].split(', ') + attack_counts[f"{attack_type}_{target}"] += 1 + logger.info(f"Current attack distribution: {attack_counts}") + return attack_counts \ No newline at end of file diff --git a/isopro/adversarial_simulation/adversarial_simulator.py b/isopro/adversarial_simulation/adversarial_simulator.py new file mode 100644 index 0000000000000000000000000000000000000000..fb9398f927100a4b05e973e076bf05d5ea13ac31 --- /dev/null +++ b/isopro/adversarial_simulation/adversarial_simulator.py @@ -0,0 +1,47 @@ +""" +Adversarial Simulator + +This module provides a high-level interface for running adversarial simulations. +""" + +from typing import List, Dict, Any +import logging + +logger = logging.getLogger(__name__) + +class AdversarialSimulator: + def __init__(self, environment): + """ + Initialize the AdversarialSimulator. + + Args: + environment: The AdversarialEnvironment to use in the simulation. + """ + self.environment = environment + logger.info("Initialized AdversarialSimulator") + + def run_simulation(self, input_data: List[str], num_steps: int = 1) -> List[Dict[str, Any]]: + """ + Run an adversarial simulation. + + Args: + input_data (List[str]): The list of input texts to use in the simulation. + num_steps (int): The number of steps to run the simulation for each input. + + Returns: + List[Dict[str, Any]]: A list of simulation results, including original and perturbed inputs and outputs. + """ + results = [] + for text in input_data: + sim_state = {"text": text, "output": ""} + original_output = self.environment.agent_wrapper.run({"text": text}) + for _ in range(num_steps): + sim_state = self.environment.step(sim_state) + results.append({ + "original_input": text, + "perturbed_input": sim_state["text"], + "original_output": original_output, + "perturbed_output": sim_state["output"] + }) + logger.info(f"Completed simulation with {len(input_data)} inputs and {num_steps} steps each") + return results \ No newline at end of file diff --git a/isopro/adversarial_simulation/attack_utils.py b/isopro/adversarial_simulation/attack_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..02a8fd90b3acb4d19ef88fe59236d58b3fa3d67c --- /dev/null +++ b/isopro/adversarial_simulation/attack_utils.py @@ -0,0 +1,65 @@ +""" +Attack Utilities + +This module provides utility functions for creating and managing adversarial attacks. +""" + +import torch +from typing import Tuple, Callable +from transformers import AutoModelForSequenceClassification, AutoTokenizer +from isoadverse.attacks.text_fgsm import text_fgsm_attack +from isoadverse.attacks.text_pgd import text_pgd_attack +from isoadverse.attacks.textbugger import textbugger_attack +from isoadverse.attacks.deepwordbug import deepwordbug_attack +import logging + +logger = logging.getLogger(__name__) + +def get_model_and_tokenizer(model_name: str = 'bert-base-uncased') -> Tuple[torch.nn.Module, torch.nn.Module]: + """ + Load a pre-trained model and tokenizer. + + Args: + model_name (str): The name of the model to load. + + Returns: + Tuple[torch.nn.Module, torch.nn.Module]: The loaded model and tokenizer. + """ + model = AutoModelForSequenceClassification.from_pretrained(model_name) + tokenizer = AutoTokenizer.from_pretrained(model_name) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model.to(device) + logger.info(f"Loaded model {model_name} on {device}") + return model, tokenizer + +def create_attack(attack_type: str, model: torch.nn.Module, tokenizer: torch.nn.Module) -> Callable: + """ + Create an attack function based on the specified attack type. + + Args: + attack_type (str): The type of attack to create. + model (torch.nn.Module): The model to use for the attack. + tokenizer (torch.nn.Module): The tokenizer to use for the attack. + + Returns: + Callable: The attack function. + """ + if attack_type == "fgsm": + return lambda x: text_fgsm_attack(model, tokenizer, x, torch.tensor([1]), epsilon=0.3) + elif attack_type == "pgd": + return lambda x: text_pgd_attack(model, tokenizer, x, torch.tensor([1]), epsilon=0.3, alpha=0.1, num_steps=10) + elif attack_type == "textbugger": + return lambda x: textbugger_attack(x, num_bugs=5) + elif attack_type == "deepwordbug": + return lambda x: deepwordbug_attack(x, num_bugs=5) + else: + raise ValueError(f"Unknown attack type: {attack_type}") + +def get_available_attacks() -> list: + """ + Get a list of available attack types. + + Returns: + list: A list of available attack types. + """ + return ["fgsm", "pgd", "textbugger", "deepwordbug"] \ No newline at end of file diff --git a/isopro/adversarial_simulation/main.py b/isopro/adversarial_simulation/main.py new file mode 100644 index 0000000000000000000000000000000000000000..b1456a3959495bfea6a76133df95427978e86297 --- /dev/null +++ b/isopro/adversarial_simulation/main.py @@ -0,0 +1,124 @@ +import logging +from typing import List +from .adversarial_simulator import AdversarialSimulator +from .adversarial_environment import AdversarialEnvironment +from isopro.utils.analyze_adversarial_sim import analyze_adversarial_results, summarize_adversarial_impact +from isopro.agents.ai_agent import AI_Agent +import anthropic +import os +from dotenv import load_dotenv +import json +from datetime import datetime +import numpy as np +import torch + +load_dotenv() + +# Set up logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class ClaudeAgent(AI_Agent): + def __init__(self, name): + super().__init__(name) + self.client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) + + def run(self, input_data): + response = self.client.messages.create( + model="claude-3-opus-20240229", + max_tokens=100, + messages=[{"role": "user", "content": input_data['text']}] + ) + return response.content[0].text + + def step(self, sim_state): + sim_state['output'] = self.run(sim_state) + return sim_state + +class NumpyEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, np.floating): + return float(obj) + if isinstance(obj, torch.Tensor): + return obj.tolist() + return super(NumpyEncoder, self).default(obj) + +def setup_logging(log_dir: str, run_id: str) -> None: + os.makedirs(log_dir, exist_ok=True) + log_file = os.path.join(log_dir, f"adv-{run_id}.log") + + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler(log_file), + logging.StreamHandler() + ] + ) + +def save_scores(output_dir: str, run_id: str, analysis_results: dict) -> None: + os.makedirs(output_dir, exist_ok=True) + output_file = os.path.join(output_dir, f"adv-{run_id}.json") + + try: + with open(output_file, 'w') as f: + json.dump(analysis_results, f, indent=2, cls=NumpyEncoder) + + logging.info(f"Saved analysis results to {output_file}") + except Exception as e: + logging.error(f"Error saving analysis results: {str(e)}") + +def get_sample_inputs() -> List[str]: + return [ + "What is the capital of France?", + "How does photosynthesis work?", + "Explain the theory of relativity." + ] + +def main(): + try: + run_id = datetime.now().strftime("%Y%m%d-%H%M%S") + + log_dir = "logs" + setup_logging(log_dir, run_id) + + logger = logging.getLogger(__name__) + logger.info(f"Starting adversarial simulation run {run_id}") + + claude_agent = ClaudeAgent("Claude Agent") + + # Create the AdversarialEnvironment + adv_env = AdversarialEnvironment( + agent_wrapper=claude_agent, + num_adversarial_agents=2, + attack_types=["textbugger", "deepwordbug"], + attack_targets=["input", "output"] + ) + + # Set up the adversarial simulator with the environment + simulator = AdversarialSimulator(adv_env) + + input_data = get_sample_inputs() + + logger.info("Starting adversarial simulation...") + simulation_results = simulator.run_simulation(input_data, num_steps=1) + + logger.info("Analyzing simulation results...") + analysis_results = analyze_adversarial_results(simulation_results) + + summary = summarize_adversarial_impact(analysis_results) + + print("\nAdversarial Simulation Summary:") + print(summary) + + output_dir = "output" + save_scores(output_dir, run_id, analysis_results) + + logger.info("Simulation complete.") + + except Exception as e: + logger.error(f"An error occurred during the simulation: {str(e)}", exc_info=True) + raise + +if __name__ == "__main__": + main() diff --git a/isopro/agents/__init__.py b/isopro/agents/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4443524a404d91dca6e334aba80ce0db27492456 --- /dev/null +++ b/isopro/agents/__init__.py @@ -0,0 +1,7 @@ +""" +Agent classes for the isopro package. +""" + +from .ai_agent import AI_Agent + +__all__ = ["AI_Agent"] \ No newline at end of file diff --git a/isopro/agents/__pycache__/__init__.cpython-38.pyc b/isopro/agents/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5957a18f33faaf7338c2ed329aa697229df2a774 Binary files /dev/null and b/isopro/agents/__pycache__/__init__.cpython-38.pyc differ diff --git a/isopro/agents/__pycache__/ai_agent.cpython-38.pyc b/isopro/agents/__pycache__/ai_agent.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0690eef2783c4f72ebaf31a961bad73f470888e4 Binary files /dev/null and b/isopro/agents/__pycache__/ai_agent.cpython-38.pyc differ diff --git a/isopro/agents/ai_agent.py b/isopro/agents/ai_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..2052f9e5aa9ff8e3936b369b5aa0df8b4fd31a68 --- /dev/null +++ b/isopro/agents/ai_agent.py @@ -0,0 +1,44 @@ +"""AI Agent for Simulation Environment.""" +from ..base.base_component import BaseComponent, agent_component + +@agent_component +class AI_Agent(BaseComponent): + """AI Agent for Simulation Environment.""" + + def __init__(self, name): + """ + Initialize the AI_Agent. + + Args: + name (str): The name of the agent. + """ + super().__init__(name) + self.components = [] + + def add_component(self, component): + """ + Add a component to the agent. + + Args: + component (BaseComponent): The component to add. + """ + if getattr(component, '_is_agent_component', False): + self.components.append(component) + else: + raise ValueError(f"Component {component} is not decorated with @agent_component") + + def run(self, input_data): + """ + Run the agent's components and process input data. + + Args: + input_data (dict): The input data for the agent. + + Returns: + dict: The processed output data. + """ + self.logger.info(f"Running agent: {self.name}") + output = input_data + for component in self.components: + output = component.run(output) + return output \ No newline at end of file diff --git a/isopro/base/__init__.py b/isopro/base/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..04a833d4f7ec5e2d10917eb1cdbb9a33d48534d0 --- /dev/null +++ b/isopro/base/__init__.py @@ -0,0 +1,8 @@ +""" +Base classes for the isopro package. +""" + +from .base_wrapper import BaseWrapper +from .base_component import BaseComponent + +__all__ = ["BaseWrapper", "BaseComponent"] \ No newline at end of file diff --git a/isopro/base/__pycache__/__init__.cpython-38.pyc b/isopro/base/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a431bfe9b486527c7570ca0a47127c2b2be1c87e Binary files /dev/null and b/isopro/base/__pycache__/__init__.cpython-38.pyc differ diff --git a/isopro/base/__pycache__/base_component.cpython-38.pyc b/isopro/base/__pycache__/base_component.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..586f65224b96cc14f83274845573d588bff1d429 Binary files /dev/null and b/isopro/base/__pycache__/base_component.cpython-38.pyc differ diff --git a/isopro/base/__pycache__/base_wrapper.cpython-38.pyc b/isopro/base/__pycache__/base_wrapper.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2439a893b894124e6884ff505b5b22d1d1e28d51 Binary files /dev/null and b/isopro/base/__pycache__/base_wrapper.cpython-38.pyc differ diff --git a/isopro/base/base_component.py b/isopro/base/base_component.py new file mode 100644 index 0000000000000000000000000000000000000000..dd0942a4ab50753290f5cedba40a2ba023da27b5 --- /dev/null +++ b/isopro/base/base_component.py @@ -0,0 +1,34 @@ +"""Base Component for Simulation Environment.""" +from abc import ABC, abstractmethod +from ..utils.logging_utils import setup_logger + +class BaseComponent(ABC): + """Base Component for Simulation Environment.""" + + def __init__(self, name): + """ + Initialize the BaseComponent. + + Args: + name (str): The name of the component. + """ + self.name = name + self.logger = setup_logger(f"{self.__class__.__name__}_{self.name}") + + @abstractmethod + def run(self): + """Execute the component's main functionality.""" + pass + + def __str__(self): + return f"{self.__class__.__name__}({self.name})" + +def agent_component(cls): + """ + Decorator to mark a class as an agent component. + + This decorator can be used to add metadata or perform + additional setup for agent components. + """ + cls._is_agent_component = True + return cls \ No newline at end of file diff --git a/isopro/base/base_wrapper.py b/isopro/base/base_wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..bf190508e00a14596492d15cdad10430d288a53a --- /dev/null +++ b/isopro/base/base_wrapper.py @@ -0,0 +1,82 @@ +"""Base Wrapper for Simulation Environment.""" +from abc import ABC, abstractmethod +import logging +from ..utils.logging_utils import setup_logger + +class BaseWrapper(ABC): + """Base Wrapper for Simulation Environment.""" + + def __init__(self, agent): + """ + Initialize the BaseWrapper. + + Args: + agent: The agent to be wrapped. + """ + self.agent = agent + self.logger = setup_logger(self.__class__.__name__) + + @abstractmethod + def step(self): + """Execute one time step within the environment.""" + pass + + @abstractmethod + def reset(self): + """Reset the state of the environment to an initial state.""" + pass + + @abstractmethod + def render(self): + """Render the environment.""" + pass + + @abstractmethod + def close(self): + """Close the environment, clean up any resources.""" + pass + + @abstractmethod + def convert_to_agent_input(self, sim_state): + """ + Convert simulation state to agent input format. + + Args: + sim_state (dict): The current state of the simulation. + + Returns: + dict: The converted input for the agent. + """ + pass + + @abstractmethod + def convert_from_agent_output(self, agent_output): + """ + Convert agent output to simulation input format. + + Args: + agent_output (dict): The output from the agent. + + Returns: + dict: The converted input for the simulation. + """ + pass + + def __getattr__(self, name): + """ + Attempt to get an attribute from the agent if it's not found in the wrapper. + + Args: + name (str): The name of the attribute. + + Returns: + The requested attribute. + + Raises: + AttributeError: If the attribute is not found in the agent or wrapper. + """ + try: + return getattr(self.agent, name) + except AttributeError: + self.logger.warning(f"Attribute '{name}' not found in agent or wrapper") + raise \ No newline at end of file diff --git a/isopro/car_simulator/__init__.py b/isopro/car_simulator/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2626bc61cae42fdb341285966bebfa7cd87ea030 --- /dev/null +++ b/isopro/car_simulator/__init__.py @@ -0,0 +1,12 @@ +""" +Car Reinforcement Learning Package + +This package contains modules for simulating and visualizing +reinforcement learning agents in a car driving environment. +""" + +from .car_rl_environment import CarRLEnvironment +from .car_llm_agent import LLMCarRLWrapper +from .carviz import CarVisualization + +__all__ = ['CarRLEnvironment', 'LLMCarRLWrapper', 'CarVisualization'] \ No newline at end of file diff --git a/isopro/car_simulator/car_llm_agent.py b/isopro/car_simulator/car_llm_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..98658a1db7684e9af3a009f745a9b37d5f367adf --- /dev/null +++ b/isopro/car_simulator/car_llm_agent.py @@ -0,0 +1,143 @@ +import gymnasium as gym +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv +from stable_baselines3.common.evaluation import evaluate_policy +import numpy as np +import anthropic +import logging +from typing import List, Dict, Any +from .car_rl_environment import CarRLEnvironment +import os +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +# Set up logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +class LLMCarRLWrapper(CarRLEnvironment): + def __init__(self, num_cars=1, time_of_day="12:00", is_rainy=False, is_weekday=True, + agent_prompt="You are an expert driving instructor. Provide concise guidance to improve the RL agent's driving performance.", + llm_call_limit=100, llm_call_frequency=100): + super().__init__(num_cars, time_of_day, is_rainy, is_weekday) + self.agent_prompt = agent_prompt + api_key = os.getenv('ANTHROPIC_API_KEY') + if not api_key: + raise ValueError("ANTHROPIC_API_KEY not found in environment variables") + self.client = anthropic.Anthropic(api_key=api_key) + self.llm_call_count = 0 + self.llm_call_limit = llm_call_limit + self.llm_call_frequency = llm_call_frequency + self.conversation_history: List[Dict[str, str]] = [] + self.step_count = 0 + self.current_guidance = {"action": "unknown"} + + def reset(self, seed=None, options=None): + self.step_count = 0 + self.current_guidance = {"action": "unknown"} + return super().reset(seed=seed) + + def step(self, action): + self.step_count += 1 + + if self.step_count % self.llm_call_frequency == 0 and self.llm_call_count < self.llm_call_limit: + observation, reward, terminated, truncated, info = super().step(action) + self.current_guidance = self._get_llm_guidance(observation, reward, terminated) + self.llm_call_count += 1 + else: + observation, reward, terminated, truncated, info = super().step(action) + + adjusted_action = self._adjust_action_based_on_guidance(action, self.current_guidance) + + return observation, reward, terminated, truncated, info + + def _get_llm_guidance(self, observation, reward, terminated): + user_message = f"Current state: {observation}, Reward: {reward}, Terminated: {terminated}. Provide brief driving advice." + + messages = self.conversation_history + [ + {"role": "user", "content": user_message}, + ] + + try: + response = self.client.messages.create( + model="claude-3-opus-20240229", + max_tokens=50, + system=self.agent_prompt, + messages=messages + ) + + ai_response = response.content[0].text + self.conversation_history.append({"role": "user", "content": user_message}) + self.conversation_history.append({"role": "assistant", "content": ai_response}) + logger.debug(f"LLM guidance: {ai_response}") + return self._parse_llm_guidance(ai_response) + except Exception as e: + logger.error(f"Error getting LLM guidance: {e}") + return {"action": "unknown"} + + def _parse_llm_guidance(self, guidance): + guidance_lower = guidance.lower() + actions = { + "increase speed": {"action": "increase_speed"}, + "decrease speed": {"action": "decrease_speed"}, + "slow down": {"action": "decrease_speed"}, + "turn left": {"action": "turn_left"}, + "turn right": {"action": "turn_right"}, + "stop": {"action": "stop"}, + "start raining": {"environment": "rain", "status": True}, + "increase traffic": {"environment": "traffic", "density": "high"} + } + + for key, value in actions.items(): + if key in guidance_lower: + return value + + return {"action": "unknown"} + + def _adjust_action_based_on_guidance(self, action, guidance): + adjustments = { + "increase_speed": (0, 0.1), + "decrease_speed": (0, -0.1), + "turn_left": (1, -0.1), + "turn_right": (1, 0.1), + } + + if guidance["action"] in adjustments: + index, adjustment = adjustments[guidance["action"]] + action[index] = np.clip(action[index] + adjustment, -1.0, 1.0) + + return action + +def make_env(llm_call_limit): + def _init(): + return LLMCarRLWrapper(num_cars=3, time_of_day="08:00", is_rainy=False, is_weekday=True, + llm_call_limit=llm_call_limit) + return _init + +def train_and_evaluate(env, total_timesteps=100000, eval_episodes=10): + model = PPO("MlpPolicy", env, verbose=1, learning_rate=0.0003, n_steps=2048, + batch_size=64, n_epochs=10, gamma=0.99, gae_lambda=0.95, clip_range=0.2) + + model.learn(total_timesteps=total_timesteps, progress_bar=True) + + mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episodes) + logger.info(f"Mean reward: {mean_reward:.2f} +/- {std_reward:.2f}") + + return model, mean_reward + +def main(): + llm_call_limit = int(os.getenv('LLM_CALL_LIMIT', '10')) # Default to 10 if not set + + env = DummyVecEnv([make_env(llm_call_limit)]) + + model, mean_reward = train_and_evaluate(env) + + model.save("car_rl_llm_ppo_model") + + logger.info("Training and evaluation completed.") + logger.info(f"Final mean reward: {mean_reward:.2f}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/isopro/car_simulator/car_rl_environment.py b/isopro/car_simulator/car_rl_environment.py new file mode 100644 index 0000000000000000000000000000000000000000..d2f37df70a54dd46de8333f900a35b9c658bd7c8 --- /dev/null +++ b/isopro/car_simulator/car_rl_environment.py @@ -0,0 +1,155 @@ +import gymnasium as gym +from gymnasium import spaces +import numpy as np +import torch +import random +from typing import List, Dict, Tuple, Union + +class CarRLEnvironment(gym.Env): + def __init__(self, num_cars=1, time_of_day="12:00", is_rainy=False, is_weekday=True): + super().__init__() + self.num_cars = num_cars + self.time_of_day = self.convert_time(time_of_day) + self.is_rainy = is_rainy + self.is_weekday = is_weekday + self.friction = 0.4 if is_rainy else 0.8 + + # Define action and observation spaces + self.action_space = spaces.Box(low=-1, high=1, shape=(num_cars * 2,), dtype=np.float32) + + # Observation space: [x, y, vx, vy, angle] for each car + [time_of_day, is_rainy, is_weekday] + self.observation_space = spaces.Box( + low=-np.inf, + high=np.inf, + shape=(num_cars * 5 + 3,), + dtype=np.float32 + ) + + self.cars = self.initialize_cars() + + def convert_time(self, time_of_day: Union[str, float]) -> float: + """Convert time to a float between 0 and 24.""" + if isinstance(time_of_day, str): + try: + hours, minutes = map(int, time_of_day.split(':')) + return float(hours + minutes / 60.0) + except ValueError: + print(f"Invalid time format: {time_of_day}. Using default value of 12:00.") + return 12.0 + elif isinstance(time_of_day, (int, float)): + return float(time_of_day) % 24.0 + else: + print(f"Invalid time format: {time_of_day}. Using default value of 12:00.") + return 12.0 + + def initialize_cars(self) -> List[Dict[str, torch.Tensor]]: + """Initialize car parameters.""" + return [ + { + "position": torch.tensor([random.uniform(-1, 1), random.uniform(-1, 1)], dtype=torch.float32), + "velocity": torch.tensor([random.uniform(-0.5, 0.5), random.uniform(-0.5, 0.5)], dtype=torch.float32), + "angle": torch.tensor([random.uniform(-np.pi, np.pi)], dtype=torch.float32) + } for _ in range(self.num_cars) + ] + + def reset(self, seed=None) -> Tuple[np.ndarray, Dict]: + super().reset(seed=seed) + self.cars = self.initialize_cars() + return self.get_observation(), {} + + def get_observation(self) -> np.ndarray: + """Get the current observation of the environment.""" + car_obs = np.concatenate([ + np.concatenate([ + car["position"].numpy(), + car["velocity"].numpy(), + car["angle"].numpy() + ]) for car in self.cars + ]) + env_obs = np.array([ + self.time_of_day, + float(self.is_rainy), + float(self.is_weekday) + ], dtype=np.float32) + return np.concatenate([car_obs, env_obs]).astype(np.float32) + + def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, Dict]: + """ + Take a step in the environment. + + Args: + action (np.ndarray): Array of actions for all cars [acceleration1, steering1, acceleration2, steering2, ...] + + Returns: + observation, reward, terminated, truncated, info + """ + # Ensure action is the correct shape + action = np.array(action).flatten() + if action.shape[0] != self.num_cars * 2: + raise ValueError(f"Action shape {action.shape} does not match expected shape ({self.num_cars * 2},)") + + for i in range(self.num_cars): + car_action = action[i*2:(i+1)*2] + self.apply_action(self.cars[i], car_action) + self.update_physics(self.cars[i]) + + observation = self.get_observation() + reward = self.calculate_reward() + terminated = self.is_terminated() + truncated = False + info = {} + + return observation, reward, terminated, truncated, info + + def apply_action(self, car: Dict[str, torch.Tensor], action: np.ndarray): + """Apply the RL agent's action to the car.""" + if len(action) != 2: + raise ValueError(f"Expected action to have 2 values, got {len(action)}") + + acceleration, steering = action + car["velocity"] += torch.tensor([acceleration, 0.0], dtype=torch.float32) * 0.1 # Scale down the acceleration + car["angle"] += torch.tensor([steering], dtype=torch.float32) * 0.1 # Scale down the steering + + def update_physics(self, car: Dict[str, torch.Tensor], dt: float = 0.1): + """Update car position and velocity using physics simulation.""" + # Update velocity (apply friction) + car["velocity"] *= (1 - self.friction * dt) + + # Update position + car["position"] += car["velocity"] * dt + + # Apply steering + angle = car["angle"].item() + rotation_matrix = torch.tensor([ + [np.cos(angle), -np.sin(angle)], + [np.sin(angle), np.cos(angle)] + ], dtype=torch.float32) + car["velocity"] = torch.matmul(rotation_matrix, car["velocity"]) + + # Bound the position to keep cars on the screen + car["position"] = torch.clamp(car["position"], -1, 1) + + def calculate_reward(self) -> float: + """Calculate the reward based on the current state.""" + reward = 0.0 + for car in self.cars: + # Reward for moving + speed = torch.norm(car["velocity"]).item() + reward += speed * 0.1 + + # Penalty for being close to the edge + distance_from_center = torch.norm(car["position"]).item() + reward -= distance_from_center * 0.1 + + return reward + + def is_terminated(self) -> bool: + """Check if the episode should be terminated.""" + for car in self.cars: + if torch.any(torch.abs(car["position"]) > 1): + return True + return False + + def render(self): + """Render the environment (placeholder for potential future implementation).""" + pass \ No newline at end of file diff --git a/isopro/car_simulator/car_rl_model.zip b/isopro/car_simulator/car_rl_model.zip new file mode 100644 index 0000000000000000000000000000000000000000..21efc9d3e7c117cccf3cfa74a211cbae788a9eb6 --- /dev/null +++ b/isopro/car_simulator/car_rl_model.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85303b6b7e544f04d04cb949709ee37ac956a78f098c0390e2b210448bc446bb +size 164031 diff --git a/isopro/car_simulator/car_rl_training.py b/isopro/car_simulator/car_rl_training.py new file mode 100644 index 0000000000000000000000000000000000000000..c9c65a8a6d42ea9f8f7e2c0f961c07a21e25ee40 --- /dev/null +++ b/isopro/car_simulator/car_rl_training.py @@ -0,0 +1,38 @@ +import gymnasium as gym +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv +from stable_baselines3.common.evaluation import evaluate_policy +import numpy as np +from .car_rl_environment import CarRLEnvironment + +def make_env(): + """Create and return an instance of the CarRLEnvironment.""" + return CarRLEnvironment(num_cars=3, time_of_day="08:00", is_rainy=False, is_weekday=True) + +# Create a vectorized environment +env = DummyVecEnv([make_env]) + +# Initialize the PPO agent +model = PPO("MlpPolicy", env, verbose=1, learning_rate=0.0003, n_steps=2048, batch_size=64, n_epochs=10, gamma=0.99, gae_lambda=0.95, clip_range=0.2, ent_coef=0.0) + +# Train the agent +total_timesteps = 1_000_000 +model.learn(total_timesteps=total_timesteps, progress_bar=True) + +# Evaluate the trained agent +mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10) +print(f"Mean reward: {mean_reward:.2f} +/- {std_reward:.2f}") + +# Save the trained model +model.save("car_rl_ppo_model") + +# Test the trained agent +obs = env.reset() +for _ in range(1000): + action, _states = model.predict(obs, deterministic=True) + obs, rewards, dones, info = env.step(action) + env.render() + if dones.any(): + obs = env.reset() + +env.close() \ No newline at end of file diff --git a/isopro/car_simulator/carviz.py b/isopro/car_simulator/carviz.py new file mode 100644 index 0000000000000000000000000000000000000000..c7e43d97c817568c8870bab63b07b7f38a6c43bd --- /dev/null +++ b/isopro/car_simulator/carviz.py @@ -0,0 +1,227 @@ +import pygame +import numpy as np +from .car_rl_environment import CarRLEnvironment +from stable_baselines3 import PPO +import math +import random +from datetime import datetime, timedelta + +# Initialize Pygame +pygame.init() + +# Constants +SCREEN_WIDTH = 1000 +SCREEN_HEIGHT = 800 +ROAD_WIDTH = 800 +ROAD_HEIGHT = 600 +CAR_WIDTH = 40 +CAR_HEIGHT = 20 +INFO_BOX_WIDTH = 200 +INFO_BOX_HEIGHT = 120 +UI_PANEL_WIDTH = 200 + +# Colors +WHITE = (255, 255, 255) +BLACK = (0, 0, 0) +GRAY = (200, 200, 200) +RED = (255, 0, 0) +GREEN = (0, 255, 0) +BLUE = (0, 0, 255) +YELLOW = (255, 255, 0) + +class CarVisualization: + def __init__(self, env, model): + self.env = env + self.unwrapped_env = env.envs[0] + self.model = model + self.screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT)) + pygame.display.set_caption("Enhanced Car RL Visualization") + self.clock = pygame.time.Clock() + self.font = pygame.font.Font(None, 24) + self.rain = [self.RainDrop() for _ in range(100)] + self.obstacles = [self.Obstacle() for _ in range(5)] + self.time_of_day = self.float_to_datetime(self.unwrapped_env.time_of_day) + + def float_to_datetime(self, time_float): + """Convert a float time (0-24) to a datetime object.""" + hours = int(time_float) + minutes = int((time_float - hours) * 60) + return datetime.min + timedelta(hours=hours, minutes=minutes) + + def datetime_to_string(self, dt): + """Convert a datetime object to a string in HH:MM format.""" + return dt.strftime("%H:%M") + + def draw_road(self): + road_rect = pygame.Rect((SCREEN_WIDTH - ROAD_WIDTH) // 2, (SCREEN_HEIGHT - ROAD_HEIGHT) // 2, ROAD_WIDTH, ROAD_HEIGHT) + road_color = self.get_road_color() + pygame.draw.rect(self.screen, road_color, road_rect) + + # Draw lane markings + for i in range(1, 3): + y = (SCREEN_HEIGHT - ROAD_HEIGHT) // 2 + i * (ROAD_HEIGHT // 3) + pygame.draw.line(self.screen, WHITE, (road_rect.left, y), (road_rect.right, y), 2) + + def get_road_color(self): + hour = self.time_of_day.hour + if 6 <= hour < 18: # Daytime + return GRAY + elif 18 <= hour < 20 or 4 <= hour < 6: # Dawn/Dusk + return (150, 150, 170) + else: # Night + return (100, 100, 120) + + def draw_car(self, position, angle, color): + x, y = position + x = (x + 1) * ROAD_WIDTH / 2 + (SCREEN_WIDTH - ROAD_WIDTH) // 2 + y = (y + 1) * ROAD_HEIGHT / 2 + (SCREEN_HEIGHT - ROAD_HEIGHT) // 2 + + car_surface = pygame.Surface((CAR_WIDTH, CAR_HEIGHT), pygame.SRCALPHA) + pygame.draw.rect(car_surface, color, (0, 0, CAR_WIDTH, CAR_HEIGHT)) + pygame.draw.polygon(car_surface, BLACK, [(0, 0), (CAR_WIDTH // 2, 0), (0, CAR_HEIGHT)]) + rotated_car = pygame.transform.rotate(car_surface, -math.degrees(angle)) + self.screen.blit(rotated_car, rotated_car.get_rect(center=(x, y))) + + def draw_info_box(self, car_index, position, action, reward): + x, y = position + x = (x + 1) * ROAD_WIDTH / 2 + (SCREEN_WIDTH - ROAD_WIDTH) // 2 + y = (y + 1) * ROAD_HEIGHT / 2 + (SCREEN_HEIGHT - ROAD_HEIGHT) // 2 + + info_box = pygame.Surface((INFO_BOX_WIDTH, INFO_BOX_HEIGHT)) + info_box.fill(WHITE) + pygame.draw.rect(info_box, BLACK, info_box.get_rect(), 2) + + texts = [ + f"Car {car_index + 1}", + f"Acceleration: {action[0]:.2f}", + f"Steering: {action[1]:.2f}", + f"Reward: {reward:.2f}", + f"Speed: {np.linalg.norm(self.unwrapped_env.cars[car_index]['velocity']):.2f}" + ] + + for i, text in enumerate(texts): + text_surface = self.font.render(text, True, BLACK) + info_box.blit(text_surface, (10, 10 + i * 25)) + + self.screen.blit(info_box, (x - INFO_BOX_WIDTH // 2, y - INFO_BOX_HEIGHT - 30)) + + + def draw_rain(self): + for drop in self.rain: + pygame.draw.line(self.screen, (200, 200, 255), (drop.x, drop.y), (drop.x, drop.y + drop.size), drop.size) + drop.fall() + + def draw_obstacles(self): + for obstacle in self.obstacles: + pygame.draw.rect(self.screen, YELLOW, ((SCREEN_WIDTH - ROAD_WIDTH) // 2 + obstacle.x, + (SCREEN_HEIGHT - ROAD_HEIGHT) // 2 + obstacle.y, + obstacle.width, obstacle.height)) + + def draw_ui_panel(self): + panel = pygame.Surface((UI_PANEL_WIDTH, SCREEN_HEIGHT)) + panel.fill(WHITE) + pygame.draw.rect(panel, BLACK, panel.get_rect(), 2) + + texts = [ + f"Time: {self.datetime_to_string(self.time_of_day)}", + f"Rainy: {'Yes' if self.unwrapped_env.is_rainy else 'No'}", + f"Weekday: {'Yes' if self.unwrapped_env.is_weekday else 'No'}", + "Press keys to change:", + "T: Time +1 hour", + "R: Toggle Rain", + "W: Toggle Weekday" + ] + + for i, text in enumerate(texts): + text_surface = self.font.render(text, True, BLACK) + panel.blit(text_surface, (10, 10 + i * 30)) + + self.screen.blit(panel, (SCREEN_WIDTH - UI_PANEL_WIDTH, 0)) + + def handle_events(self): + for event in pygame.event.get(): + if event.type == pygame.QUIT: + return False + elif event.type == pygame.KEYDOWN: + if event.key == pygame.K_t: + self.time_of_day += timedelta(hours=1) + self.unwrapped_env.time_of_day = (self.time_of_day.hour + self.time_of_day.minute / 60) % 24 + elif event.key == pygame.K_r: + self.unwrapped_env.is_rainy = not self.unwrapped_env.is_rainy + elif event.key == pygame.K_w: + self.unwrapped_env.is_weekday = not self.unwrapped_env.is_weekday + return True + + class RainDrop: + def __init__(self): + self.x = random.randint(0, SCREEN_WIDTH) + self.y = random.randint(0, SCREEN_HEIGHT) + self.speed = random.randint(5, 15) + self.size = random.randint(1, 3) + + def fall(self): + self.y += self.speed + if self.y > SCREEN_HEIGHT: + self.y = 0 + self.x = random.randint(0, SCREEN_WIDTH) + + class Obstacle: + def __init__(self): + self.width = random.randint(30, 60) + self.height = random.randint(30, 60) + self.x = random.randint(0, ROAD_WIDTH - self.width) + self.y = random.randint(0, ROAD_HEIGHT - self.height) + + def run_visualization(self, num_episodes=5): + for episode in range(num_episodes): + obs = self.env.reset() + done = False + total_reward = 0 + step = 0 + + while not done: + if not self.handle_events(): + return + + self.screen.fill(WHITE) + self.draw_road() + self.draw_obstacles() + if self.unwrapped_env.is_rainy: + self.draw_rain() + + action, _ = self.model.predict(obs, deterministic=True) + obs, reward, done, info = self.env.step(action) + total_reward += reward[0] + + for i, car in enumerate(self.unwrapped_env.cars): + position = car["position"].numpy() + angle = car["angle"].item() + color = (RED, GREEN, BLUE)[i % 3] # Cycle through colors for different cars + self.draw_car(position, angle, color) + self.draw_info_box(i, position, action[0][i*2:(i+1)*2], reward[0]) + + self.draw_ui_panel() + pygame.display.flip() + self.clock.tick(30) + step += 1 + + if done[0]: + break + + print(f"Episode {episode + 1} finished. Total reward: {total_reward:.2f}") + + pygame.quit() + + +def main(): + # Create and train the model (you might want to load a pre-trained model instead) + env = CarRLEnvironment(num_cars=3, time_of_day="08:00", is_rainy=False, is_weekday=True) + model = PPO("MlpPolicy", env, verbose=1) + model.learn(total_timesteps=10000) # Adjust as needed + + # Create and run the visualization + viz = CarVisualization(env, model) + viz.run_visualization() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/isopro/car_simulator/llm_main.py b/isopro/car_simulator/llm_main.py new file mode 100644 index 0000000000000000000000000000000000000000..dbcfe69f1dd1cc21dff84c813872efcfb5676e58 --- /dev/null +++ b/isopro/car_simulator/llm_main.py @@ -0,0 +1,74 @@ +import argparse +import os +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv +from .car_llm_agent import LLMCarRLWrapper +from .car_rl_environment import CarRLEnvironment +from .carviz import CarVisualization +from stable_baselines3.common.evaluation import evaluate_policy +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +def parse_arguments(): + parser = argparse.ArgumentParser(description="Car RL Simulation with LLM Integration and Visualization") + parser.add_argument("--num_cars", type=int, default=3, help="Number of cars in the simulation") + parser.add_argument("--time_of_day", type=str, default="08:00", help="Initial time of day (HH:MM format)") + parser.add_argument("--is_rainy", action="store_true", help="Set initial weather to rainy") + parser.add_argument("--is_weekday", action="store_true", help="Set initial day to weekday") + parser.add_argument("--train_steps", type=int, default=100000, help="Number of training steps") + parser.add_argument("--visualize_episodes", type=int, default=5, help="Number of episodes to visualize") + parser.add_argument("--load_model", type=str, help="Path to a pre-trained model to load") + parser.add_argument("--llm_call_limit", type=int, default=1000, help="Maximum number of LLM API calls") + parser.add_argument("--llm_call_frequency", type=int, default=100, help="Frequency of LLM calls (in steps)") + return parser.parse_args() + +def make_env(num_cars, time_of_day, is_rainy, is_weekday, llm_call_limit, llm_call_frequency): + def _init(): + return LLMCarRLWrapper(num_cars=num_cars, time_of_day=time_of_day, is_rainy=is_rainy, + is_weekday=is_weekday, llm_call_limit=llm_call_limit, + llm_call_frequency=llm_call_frequency) + return _init + +def train_and_evaluate(env, total_timesteps, eval_episodes=10): + model = PPO("MlpPolicy", env, verbose=1, learning_rate=0.0003, n_steps=2048, + batch_size=64, n_epochs=10, gamma=0.99, gae_lambda=0.95, clip_range=0.2) + + model.learn(total_timesteps=total_timesteps, progress_bar=True) + + mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episodes) + print(f"Mean reward: {mean_reward:.2f} +/- {std_reward:.2f}") + + return model, mean_reward + +def main(): + args = parse_arguments() + + # Ensure the ANTHROPIC_API_KEY is set + if not os.getenv('ANTHROPIC_API_KEY'): + raise ValueError("ANTHROPIC_API_KEY not found in environment variables") + + # Create the vectorized environment with LLM integration + env = DummyVecEnv([make_env(args.num_cars, args.time_of_day, args.is_rainy, args.is_weekday, + args.llm_call_limit, args.llm_call_frequency)]) + + # Create or load the RL agent + if args.load_model and os.path.exists(args.load_model): + print(f"Loading pre-trained model from {args.load_model}") + model = PPO.load(args.load_model, env=env) + else: + print("Creating and training a new model") + model, mean_reward = train_and_evaluate(env, total_timesteps=args.train_steps) + + # Save the trained model + model.save("car_rl_llm_model") + print("Model saved as car_rl_llm_model") + print(f"Final mean reward: {mean_reward:.2f}") + + # Run the visualization + viz = CarVisualization(env, model) + viz.run_visualization(num_episodes=args.visualize_episodes) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/isopro/car_simulator/main.py b/isopro/car_simulator/main.py new file mode 100644 index 0000000000000000000000000000000000000000..21621e3d2c4b5601046bf28bc51a476d284cd833 --- /dev/null +++ b/isopro/car_simulator/main.py @@ -0,0 +1,48 @@ +import argparse +import os +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv +from .car_rl_environment import CarRLEnvironment +from .carviz import CarVisualization + +def parse_arguments(): + parser = argparse.ArgumentParser(description="Car RL Simulation and Visualization") + parser.add_argument("--num_cars", type=int, default=3, help="Number of cars in the simulation") + parser.add_argument("--time_of_day", type=str, default="08:00", help="Initial time of day (HH:MM format)") + parser.add_argument("--is_rainy", action="store_true", help="Set initial weather to rainy") + parser.add_argument("--is_weekday", action="store_true", help="Set initial day to weekday") + parser.add_argument("--train_steps", type=int, default=10000, help="Number of training steps") + parser.add_argument("--visualize_episodes", type=int, default=5, help="Number of episodes to visualize") + parser.add_argument("--load_model", type=str, help="Path to a pre-trained model to load") + return parser.parse_args() + +def make_env(num_cars, time_of_day, is_rainy, is_weekday): + def _init(): + return CarRLEnvironment(num_cars=num_cars, time_of_day=time_of_day, is_rainy=is_rainy, is_weekday=is_weekday) + return _init + +def main(): + args = parse_arguments() + + # Create the vectorized environment + env = DummyVecEnv([make_env(args.num_cars, args.time_of_day, args.is_rainy, args.is_weekday)]) + + # Create or load the RL agent + if args.load_model and os.path.exists(args.load_model): + print(f"Loading pre-trained model from {args.load_model}") + model = PPO.load(args.load_model, env=env) + else: + print("Creating and training a new model") + model = PPO("MlpPolicy", env, verbose=1) + model.learn(total_timesteps=args.train_steps) + + # Save the trained model + model.save("car_rl_model") + print("Model saved as car_rl_model") + + # Run the visualization + viz = CarVisualization(env, model) + viz.run_visualization(num_episodes=args.visualize_episodes) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/isopro/conversation_simulation/README.md b/isopro/conversation_simulation/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c063b1211b91cc66c6d42f8f985a9f1d9948c06b --- /dev/null +++ b/isopro/conversation_simulation/README.md @@ -0,0 +1,252 @@ +# Conversation Simulator + +This module is part of the `isopro` package and simulates conversations between an AI assistant (either Claude or GPT-4) and various user personas. It's designed to test and demonstrate how the AI handles different types of customer service scenarios. + +## Project Structure + +The Conversation Simulator is located in the `conversation_simulator` folder within the `isopro` package: + +``` +isopro/ +└── conversation_simulator/ + ├── main.py + ├── conversation_simulator.ipynb + ├── conversation_agent.py + ├── conversation_environment.py + ├── custom_persona.py + └── user_personas.py +``` + +## Prerequisites + +Before you begin, ensure you have met the following requirements: + +* You have installed Python 3.7 or later. +* You have an Anthropic API key (for Claude) and/or an OpenAI API key (for GPT-4). +* You have installed the `isopro` package. +* For the Jupyter notebook, you have Jupyter Notebook or JupyterLab installed. + +## Setting up the Conversation Simulator + +1. If you haven't already, install the `isopro` package: + ``` + pip install isopro + ``` + +2. Create a `.env` file in your project root and add your API keys: + ``` + ANTHROPIC_API_KEY=your_anthropic_api_key_here + OPENAI_API_KEY=your_openai_api_key_here + ``` + +## Running the Conversation Simulator + +You can run the Conversation Simulator either as a Python script or interactively using a Jupyter notebook. + +### Using the Python Script + +1. Basic usage: + ```python + from isopro.conversation_simulator.main import main + + if __name__ == "__main__": + main() + ``` + +2. Running from the command line: + ``` + python -m isopro.conversation_simulator.main + ``` + +### Using the Jupyter Notebook + +Navigate to the `isopro/conversation_simulator/` directory and open the `conversation_simulator.ipynb` file using Jupyter Notebook or JupyterLab. Here's what you'll find in the notebook: + +```python +# Conversation Simulator Jupyter Notebook + +## Setup + +import logging +from logging.handlers import RotatingFileHandler +import os +from datetime import datetime +from dotenv import load_dotenv +from isopro.conversation_simulation.conversation_simulator import ConversationSimulator +from isopro.conversation_simulation.custom_persona import create_custom_persona + +# Load environment variables +load_dotenv() + +# Set up logging +log_directory = "logs" +os.makedirs(log_directory, exist_ok=True) +log_file = os.path.join(log_directory, "conversation_simulator.log") + +# Create a rotating file handler +file_handler = RotatingFileHandler(log_file, maxBytes=1024*1024, backupCount=5) +file_handler.setLevel(logging.DEBUG) +file_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') +file_handler.setFormatter(file_formatter) + +# Create a console handler +console_handler = logging.StreamHandler() +console_handler.setLevel(logging.INFO) +console_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') +console_handler.setFormatter(console_formatter) + +# Set up the logger +logger = logging.getLogger() +logger.setLevel(logging.DEBUG) +logger.addHandler(file_handler) +logger.addHandler(console_handler) + +print("Setup complete.") + +## Helper Functions + +def save_output(content, filename): + """Save the output content to a file.""" + with open(filename, 'w', encoding='utf-8') as f: + f.write(content) + +def get_user_choice(): + """Get user's choice of AI model.""" + while True: + choice = input("Choose AI model (claude/openai): ").lower() + if choice in ['claude', 'openai']: + return choice + print("Invalid choice. Please enter 'claude' or 'openai'.") + +print("Helper functions defined.") + +## Main Simulation Function + +def run_simulation(): + # Get user's choice of AI model + ai_choice = get_user_choice() + + # Set up the appropriate model and API key + if ai_choice == 'claude': + model = "claude-3-opus-20240229" + os.environ["ANTHROPIC_API_KEY"] = os.getenv("ANTHROPIC_API_KEY") + ai_name = "Claude" + else: # openai + model = "gpt-4-1106-preview" + os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY") + ai_name = "GPT-4 Turbo" + + # Initialize the ConversationSimulator + simulator = ConversationSimulator( + ai_prompt=f"You are {ai_name}, an AI assistant created to be helpful, harmless, and honest. You are a customer service agent for a tech company. Respond politely and professionally." + ) + + output_content = f"Conversation Simulator using {ai_name} model: {model}\n\n" + + # Run simulations with different personas + personas = ["upset", "human_request", "inappropriate", "incomplete_info"] + + for persona in personas: + logger.info(f"Running simulation with {persona} persona using {ai_name}") + conversation_history = simulator.run_simulation(persona, num_turns=3) + + output_content += f"\nConversation with {persona} persona:\n" + for message in conversation_history: + output_line = f"{message['role'].capitalize()}: {message['content']}\n" + output_content += output_line + logger.debug(output_line.strip()) + output_content += "\n" + "-"*50 + "\n" + + # Create and run a simulation with a custom persona + custom_persona_name = "Techie Customer" + custom_characteristics = ["tech-savvy", "impatient", "detail-oriented"] + custom_message_templates = [ + "I've tried rebooting my device, but the error persists. Can you help?", + "What's the latest update on the cloud service outage?", + "I need specifics on the API rate limits for the enterprise plan.", + "The latency on your servers is unacceptable. What's being done about it?", + "Can you explain the technical details of your encryption method?" + ] + + logger.info(f"Running simulation with custom persona: {custom_persona_name} using {ai_name}") + custom_conversation = simulator.run_custom_simulation( + custom_persona_name, + custom_characteristics, + custom_message_templates, + num_turns=3 + ) + + output_content += f"\nConversation with {custom_persona_name}:\n" + for message in custom_conversation: + output_line = f"{message['role'].capitalize()}: {message['content']}\n" + output_content += output_line + logger.debug(output_line.strip()) + + # Save the output to a file + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + output_directory = "output" + os.makedirs(output_directory, exist_ok=True) + output_file = os.path.join(output_directory, f"{ai_name.lower()}_conversation_output_{timestamp}.txt") + save_output(output_content, output_file) + logger.info(f"Output saved to {output_file}") + + return output_content + +print("Main simulation function defined.") + +## Run the Simulation + +simulation_output = run_simulation() +print(simulation_output) + +## Analyze the Results + +# Example analysis: Count the number of apologies +apology_count = simulation_output.lower().count("sorry") + simulation_output.lower().count("apologi") +print(f"Number of apologies: {apology_count}") + +# Example analysis: Average length of AI responses +ai_responses = [line.split(": ", 1)[1] for line in simulation_output.split("\n") if line.startswith("Assistant: ")] +avg_response_length = sum(len(response.split()) for response in ai_responses) / len(ai_responses) +print(f"Average length of AI responses: {avg_response_length:.2f} words") + +## Conclusion + +# This notebook demonstrates how to use the Conversation Simulator from the isopro package. +# You can modify the personas, adjust the number of turns, or add your own analysis to +# further explore the capabilities of the AI models in customer service scenarios. +``` + +## Output and Logs + +- Simulation outputs are saved in the `output` directory within your current working directory. +- Logs are saved in the `logs` directory within your current working directory. + +## Customizing the Simulation + +You can customize the simulation by modifying the `main.py` file or the Jupyter notebook: + +- To change the predefined personas, modify the `personas` list. +- To adjust the custom persona, modify the `custom_persona_name`, `custom_characteristics`, and `custom_message_templates` variables. +- To change the number of turns in each conversation, modify the `num_turns` parameter in the `run_simulation` and `run_custom_simulation` method calls. + +In the Jupyter notebook, you can also add new cells for additional analysis or visualization of the results. + +## Troubleshooting + +If you encounter any issues: + +1. Make sure your API keys are correctly set in the `.env` file or environment variables. +2. Check the logs in the `logs` directory for detailed error messages. +3. Ensure you have the latest version of the `isopro` package installed. +4. For Jupyter notebook issues, make sure you have Jupyter installed and are running the notebook from the correct directory. + +If problems persist, please open an issue in the project repository. + +## Contributing + +Contributions to the Conversation Simulator are welcome. Please feel free to submit a Pull Request to the `isopro` repository. + +## License + +This project is licensed under the MIT License - see the LICENSE file in the `isopro` package for details. \ No newline at end of file diff --git a/isopro/conversation_simulation/__init__.py b/isopro/conversation_simulation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..830bd9bba385dde95e6cb2e0c988af8ba4bd05bb --- /dev/null +++ b/isopro/conversation_simulation/__init__.py @@ -0,0 +1,19 @@ +""" +Conversation Simulation Module + +This module provides tools for simulating conversations with AI agents. +""" + +from .conversation_environment import ConversationEnvironment +from .conversation_agent import ConversationAgent +from .user_personas import UserPersona +from .custom_persona import create_custom_persona +from .conversation_simulator import ConversationSimulator + +__all__ = [ + "ConversationEnvironment", + "ConversationAgent", + "UserPersona", + "create_custom_persona", + "ConversationSimulator", +] \ No newline at end of file diff --git a/isopro/conversation_simulation/conversation_agent.py b/isopro/conversation_simulation/conversation_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..811733d7dabd4ad074087f56d6eedbc88772a428 --- /dev/null +++ b/isopro/conversation_simulation/conversation_agent.py @@ -0,0 +1,41 @@ +""" +Conversation Agent + +This module defines the AI agent used in the conversation simulation, using Anthropic's Claude API. +""" + +import anthropic +import os +import logging +from ..agents.ai_agent import AI_Agent +from dotenv import load_dotenv + +logger = logging.getLogger(__name__) + +load_dotenv() + +class ConversationAgent(AI_Agent): + def __init__(self, name, prompt, model="claude-3-opus-20240229"): + super().__init__(name) + self.prompt = prompt + self.model = model + self.client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) + logger.info(f"Initialized ConversationAgent '{name}' with Claude model {model}") + + def generate_response(self, conversation_history): + try: + messages = [{"role": "user" if msg["role"] != "assistant" else "assistant", "content": msg["content"]} + for msg in conversation_history] + + response = self.client.messages.create( + model=self.model, + max_tokens=1000, + system=self.prompt, + messages=messages + ) + ai_message = response.content[0].text.strip() + logger.debug(f"Generated response: {ai_message}") + return ai_message + except Exception as e: + logger.error(f"Error generating response: {e}") + return "I apologize, but I'm having trouble responding at the moment." \ No newline at end of file diff --git a/isopro/conversation_simulation/conversation_environment.py b/isopro/conversation_simulation/conversation_environment.py new file mode 100644 index 0000000000000000000000000000000000000000..2bc24625e8a624d3a19a4c315ff3b7368cd82744 --- /dev/null +++ b/isopro/conversation_simulation/conversation_environment.py @@ -0,0 +1,78 @@ +""" +Conversation Environment + +This module defines the environment for simulating conversations between a Claude-based AI agent and users with various personas. +""" + +import logging +from ..environments.simulation_environment import SimulationEnvironment +from .conversation_agent import ConversationAgent +from .user_personas import UserPersona + +logger = logging.getLogger(__name__) + +class ConversationEnvironment(SimulationEnvironment): + """ + ConversationEnvironment + + This class provides an environment for simulating conversations between Claude-based AI agents and users with various personas. + """ + + def __init__(self, ai_prompt="You are a helpful customer service agent. Respond politely and professionally."): + """ + Initialize the ConversationEnvironment. + + Args: + ai_prompt (str): The prompt to guide the AI agent's behavior. + """ + super().__init__() + self.ai_prompt = ai_prompt + self.ai_agent = None + self.user_persona = None + logger.info("Initialized ConversationEnvironment") + + def set_ai_agent(self, model="claude-3-opus-20240229"): + """ + Set up the Claude-based AI agent for the conversation. + + Args: + model (str): The name of the Claude model to use. + """ + self.ai_agent = ConversationAgent("Customer Service AI", self.ai_prompt, model) + logger.info(f"Set AI agent with Claude model: {model}") + def set_user_persona(self, persona_type, **kwargs): + """ + Set the user persona for the conversation. + + Args: + persona_type (str): The type of user persona to use. + **kwargs: Additional arguments for the user persona. + """ + self.user_persona = UserPersona.create(persona_type, **kwargs) + logger.info(f"Set user persona: {persona_type}") + + def run_conversation(self, num_turns=5): + """ + Run a conversation between the AI agent and the user persona. + + Args: + num_turns (int): The number of conversation turns to simulate. + + Returns: + list: A list of dictionaries containing the conversation history. + """ + if not self.ai_agent or not self.user_persona: + raise ValueError("Both AI agent and user persona must be set before running a conversation.") + + conversation_history = [] + for _ in range(num_turns): + user_message = self.user_persona.generate_message(conversation_history) + conversation_history.append({"role": "user", "content": user_message}) + logger.debug(f"User: {user_message}") + + ai_response = self.ai_agent.generate_response(conversation_history) + conversation_history.append({"role": "assistant", "content": ai_response}) + logger.debug(f"AI: {ai_response}") + + logger.info("Completed conversation simulation") + return conversation_history \ No newline at end of file diff --git a/isopro/conversation_simulation/conversation_simulator.py b/isopro/conversation_simulation/conversation_simulator.py new file mode 100644 index 0000000000000000000000000000000000000000..caae517c3f1f2667f6023947c01f2a056a043dc6 --- /dev/null +++ b/isopro/conversation_simulation/conversation_simulator.py @@ -0,0 +1,67 @@ +""" +Conversation Simulator + +This module provides a high-level interface for running conversation simulations +with different personas and analyzing the results using Anthropic's Claude API. +""" + +import logging +from .conversation_environment import ConversationEnvironment +from .custom_persona import create_custom_persona + +logger = logging.getLogger(__name__) + +class ConversationSimulator: + """ + ConversationSimulator orchestrates conversation simulations with various personas using Claude. + """ + + def __init__(self, ai_prompt="You are a helpful customer service agent. Respond politely and professionally."): + """ + Initialize the ConversationSimulator. + + Args: + ai_prompt (str): The prompt to guide the Claude-based AI agent's behavior. + """ + self.environment = ConversationEnvironment(ai_prompt) + logger.info("Initialized ConversationSimulator with Claude") + + def run_simulation(self, persona_type, num_turns=5, claude_model="claude-3-opus-20240229", **persona_kwargs): + """ + Run a conversation simulation with a specified persona using Claude. + + Args: + persona_type (str): The type of persona to use in the simulation. + num_turns (int): The number of conversation turns to simulate. + claude_model (str): The specific Claude model to use for the simulation. + **persona_kwargs: Additional arguments for creating the persona. + + Returns: + list: A list of dictionaries containing the conversation history. + """ + self.environment.set_ai_agent(model=claude_model) + self.environment.set_user_persona(persona_type, **persona_kwargs) + conversation_history = self.environment.run_conversation(num_turns) + logger.info(f"Completed simulation with {persona_type} persona using Claude model {claude_model}") + return conversation_history + + def run_custom_simulation(self, name, characteristics, message_templates, num_turns=5, claude_model="claude-3-opus-20240229"): + """ + Run a conversation simulation with a custom persona using Claude. + + Args: + name (str): The name of the custom persona. + characteristics (list): A list of characteristics that define the persona. + message_templates (list): A list of message templates the persona can use. + num_turns (int): The number of conversation turns to simulate. + claude_model (str): The specific Claude model to use for the simulation. + + Returns: + list: A list of dictionaries containing the conversation history. + """ + custom_persona = create_custom_persona(name, characteristics, message_templates) + self.environment.set_ai_agent(model=claude_model) + self.environment.user_persona = custom_persona + conversation_history = self.environment.run_conversation(num_turns) + logger.info(f"Completed simulation with custom persona: {name} using Claude model {claude_model}") + return conversation_history \ No newline at end of file diff --git a/isopro/conversation_simulation/custom_persona.py b/isopro/conversation_simulation/custom_persona.py new file mode 100644 index 0000000000000000000000000000000000000000..5cb6ceb2e6f08ec8b848333fe5f31e34fc148ec2 --- /dev/null +++ b/isopro/conversation_simulation/custom_persona.py @@ -0,0 +1,58 @@ +""" +Custom Persona + +This module allows users to create custom personas for the conversation simulation. +""" + +import logging +from .user_personas import UserPersona + +logger = logging.getLogger(__name__) + +class CustomPersona(UserPersona): + """ + CustomPersona allows users to create their own persona with specific characteristics. + """ + + def __init__(self, name, characteristics, message_templates): + """ + Initialize the CustomPersona. + + Args: + name (str): The name of the custom persona. + characteristics (list): A list of characteristics that define the persona. + message_templates (list): A list of message templates the persona can use. + """ + super().__init__(name) + self.characteristics = characteristics + self.message_templates = message_templates + logger.info(f"Created CustomPersona: {name}") + + def generate_message(self, conversation_history): + """ + Generate a message based on the custom persona's characteristics and templates. + + Args: + conversation_history (list): A list of dictionaries containing the conversation history. + + Returns: + str: The generated message. + """ + import random + message = random.choice(self.message_templates) + logger.debug(f"CustomPersona '{self.name}' generated message: {message}") + return message + +def create_custom_persona(name, characteristics, message_templates): + """ + Create a custom persona with the given characteristics and message templates. + + Args: + name (str): The name of the custom persona. + characteristics (list): A list of characteristics that define the persona. + message_templates (list): A list of message templates the persona can use. + + Returns: + CustomPersona: An instance of the custom persona. + """ + return CustomPersona(name, characteristics, message_templates) \ No newline at end of file diff --git a/isopro/conversation_simulation/main.py b/isopro/conversation_simulation/main.py new file mode 100644 index 0000000000000000000000000000000000000000..193e42cab39627998ac7e7c2129470a46a822073 --- /dev/null +++ b/isopro/conversation_simulation/main.py @@ -0,0 +1,117 @@ +import logging +from logging.handlers import RotatingFileHandler +import os +from datetime import datetime +from dotenv import load_dotenv +from .conversation_simulator import ConversationSimulator +from .custom_persona import create_custom_persona + +# Load environment variables +load_dotenv() + +# Set up logging +log_directory = "logs" +os.makedirs(log_directory, exist_ok=True) +log_file = os.path.join(log_directory, "conversation_simulator.log") + +# Create a rotating file handler +file_handler = RotatingFileHandler(log_file, maxBytes=1024*1024, backupCount=5) +file_handler.setLevel(logging.DEBUG) +file_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') +file_handler.setFormatter(file_formatter) + +# Create a console handler +console_handler = logging.StreamHandler() +console_handler.setLevel(logging.INFO) +console_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') +console_handler.setFormatter(console_formatter) + +# Set up the logger +logger = logging.getLogger() +logger.setLevel(logging.DEBUG) +logger.addHandler(file_handler) +logger.addHandler(console_handler) + +def save_output(content, filename): + """Save the output content to a file.""" + with open(filename, 'w', encoding='utf-8') as f: + f.write(content) + +def get_user_choice(): + """Get user's choice of AI model.""" + while True: + choice = input("Choose AI model (claude/openai): ").lower() + if choice in ['claude', 'openai']: + return choice + print("Invalid choice. Please enter 'claude' or 'openai'.") + +def main(): + # Get user's choice of AI model + ai_choice = get_user_choice() + + # Set up the appropriate model and API key + if ai_choice == 'claude': + model = "claude-3-opus-20240229" + os.environ["ANTHROPIC_API_KEY"] = os.getenv("ANTHROPIC_API_KEY") + ai_name = "Claude" + else: # openai + model = "gpt-4-1106-preview" + os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY") + ai_name = "GPT-4 Turbo" + + # Initialize the ConversationSimulator + simulator = ConversationSimulator( + ai_prompt=f"You are {ai_name}, an AI assistant created to be helpful, harmless, and honest. You are a customer service agent for a tech company. Respond politely and professionally." + ) + + output_content = f"Conversation Simulator using {ai_name} model: {model}\n\n" + + # Run simulations with different personas + personas = ["upset", "human_request", "inappropriate", "incomplete_info"] + + for persona in personas: + logger.info(f"Running simulation with {persona} persona using {ai_name}") + conversation_history = simulator.run_simulation(persona, num_turns=3) + + output_content += f"\nConversation with {persona} persona:\n" + for message in conversation_history: + output_line = f"{message['role'].capitalize()}: {message['content']}\n" + output_content += output_line + logger.debug(output_line.strip()) + output_content += "\n" + "-"*50 + "\n" + + # Create and run a simulation with a custom persona + custom_persona_name = "Techie Customer" + custom_characteristics = ["tech-savvy", "impatient", "detail-oriented"] + custom_message_templates = [ + "I've tried rebooting my device, but the error persists. Can you help?", + "What's the latest update on the cloud service outage?", + "I need specifics on the API rate limits for the enterprise plan.", + "The latency on your servers is unacceptable. What's being done about it?", + "Can you explain the technical details of your encryption method?" + ] + + logger.info(f"Running simulation with custom persona: {custom_persona_name} using {ai_name}") + custom_conversation = simulator.run_custom_simulation( + custom_persona_name, + custom_characteristics, + custom_message_templates, + num_turns=3 + ) + + output_content += f"\nConversation with {custom_persona_name}:\n" + for message in custom_conversation: + output_line = f"{message['role'].capitalize()}: {message['content']}\n" + output_content += output_line + logger.debug(output_line.strip()) + + # Save the output to a file + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + output_directory = "output" + os.makedirs(output_directory, exist_ok=True) + output_file = os.path.join(output_directory, f"{ai_name.lower()}_conversation_output_{timestamp}.txt") + save_output(output_content, output_file) + logger.info(f"Output saved to {output_file}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/isopro/conversation_simulation/user_personas.py b/isopro/conversation_simulation/user_personas.py new file mode 100644 index 0000000000000000000000000000000000000000..6c4ba6f54ddc00768857028b2b34f19158aafdb4 --- /dev/null +++ b/isopro/conversation_simulation/user_personas.py @@ -0,0 +1,112 @@ +""" +User Personas + +This module defines various user personas for the conversation simulation. +""" + +import random +import logging + +logger = logging.getLogger(__name__) + +class UserPersona: + """ + Base class for user personas in the conversation simulation. + """ + + def __init__(self, name): + self.name = name + + def generate_message(self, conversation_history): + """ + Generate a message based on the persona and conversation history. + + Args: + conversation_history (list): A list of dictionaries containing the conversation history. + + Returns: + str: The generated message. + """ + raise NotImplementedError("Subclasses must implement generate_message method") + + @staticmethod + def create(persona_type, **kwargs): + """ + Factory method to create user personas. + + Args: + persona_type (str): The type of user persona to create. + **kwargs: Additional arguments for the user persona. + + Returns: + UserPersona: An instance of the specified user persona. + """ + persona_classes = { + "upset": UpsetCustomer, + "human_request": HumanRequestCustomer, + "inappropriate": InappropriateCustomer, + "incomplete_info": IncompleteInfoCustomer, + } + + if persona_type not in persona_classes: + raise ValueError(f"Unknown persona type: {persona_type}") + + return persona_classes[persona_type](**kwargs) + +class UpsetCustomer(UserPersona): + def __init__(self): + super().__init__("Upset Customer") + self.complaints = [ + "This is unacceptable!", + "I've been waiting for hours!", + "I want to speak to your manager!", + "This is the worst service I've ever experienced!", + "I'm extremely disappointed with your company!", + ] + + def generate_message(self, conversation_history): + message = random.choice(self.complaints) + logger.debug(f"UpsetCustomer generated message: {message}") + return message + +class HumanRequestCustomer(UserPersona): + def __init__(self): + super().__init__("Human Request Customer") + self.requests = [ + "Can I speak to a human representative?", + "I don't want to talk to a bot. Get me a real person.", + "Is there a way to talk to an actual employee?", + "I need to speak with a human agent, not an AI.", + "Please transfer me to a live representative.", + ] + + def generate_message(self, conversation_history): + message = random.choice(self.requests) + logger.debug(f"HumanRequestCustomer generated message: {message}") + return message + +class InappropriateCustomer(UserPersona): + def __init__(self): + super().__init__("Inappropriate Customer") + self.inappropriate_words = ["[INAPPROPRIATE1]", "[INAPPROPRIATE2]", "[INAPPROPRIATE3]"] + + def generate_message(self, conversation_history): + message = f"You're a {random.choice(self.inappropriate_words)} and this service is {random.choice(self.inappropriate_words)}!" + logger.debug(f"InappropriateCustomer generated message: {message}") + return message + +class IncompleteInfoCustomer(UserPersona): + def __init__(self): + super().__init__("Incomplete Info Customer") + self.vague_requests = [ + "I need help with my account.", + "There's a problem with my order.", + "Something's not working right.", + "I have a question about your service.", + "Can you check on the status of my thing?", + ] + + def generate_message(self, conversation_history): + message = random.choice(self.vague_requests) + logger.debug(f"IncompleteInfoCustomer generated message: {message}") + return message \ No newline at end of file diff --git a/isopro/environments/__init__.py b/isopro/environments/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..70109bcd96facd2cc2608073172df79b1ae12a77 --- /dev/null +++ b/isopro/environments/__init__.py @@ -0,0 +1,9 @@ +""" +Environment classes for the isopro package. +""" + +from .simulation_environment import SimulationEnvironment +from .custom_environment import CustomEnvironment +from .llm_orchestrator import LLMOrchestrator + +__all__ = ["SimulationEnvironment", "CustomEnvironment", "LLMOrchestrator"] \ No newline at end of file diff --git a/isopro/environments/__pycache__/__init__.cpython-38.pyc b/isopro/environments/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..04b7eaabc1c450c0f972cf879c6c40f53674baf6 Binary files /dev/null and b/isopro/environments/__pycache__/__init__.cpython-38.pyc differ diff --git a/isopro/environments/__pycache__/custom_environment.cpython-38.pyc b/isopro/environments/__pycache__/custom_environment.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d07231e1e148fa99ccc5b1720c1f5fe9965dcb76 Binary files /dev/null and b/isopro/environments/__pycache__/custom_environment.cpython-38.pyc differ diff --git a/isopro/environments/__pycache__/llm_orchestrator.cpython-38.pyc b/isopro/environments/__pycache__/llm_orchestrator.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6065839fdd2a83a9390dd1b19eab46362b3292a1 Binary files /dev/null and b/isopro/environments/__pycache__/llm_orchestrator.cpython-38.pyc differ diff --git a/isopro/environments/__pycache__/simulation_environment.cpython-38.pyc b/isopro/environments/__pycache__/simulation_environment.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..56b4787f1dc463aa522482a399025e1fc7c80207 Binary files /dev/null and b/isopro/environments/__pycache__/simulation_environment.cpython-38.pyc differ diff --git a/isopro/environments/custom_environment.py b/isopro/environments/custom_environment.py new file mode 100644 index 0000000000000000000000000000000000000000..23b37a47e89f9d541efb952a03197076c9b7759c --- /dev/null +++ b/isopro/environments/custom_environment.py @@ -0,0 +1,108 @@ +"""Custom Environment for creating user-defined simulation environments.""" +from ..environments.simulation_environment import SimulationEnvironment +from ..agents.ai_agent import AI_Agent +from ..base.base_component import BaseComponent, agent_component + +class CustomAgent(AI_Agent): + """ + CustomAgent + + This class defines a custom agent. Users can extend this class to implement their own agents. + """ + def __init__(self, name, custom_param): + """ + Initialize the CustomAgent. + + Args: + name (str): The name of the agent. + custom_param: A custom parameter for the agent. + """ + super().__init__(name) + self.custom_param = custom_param + + def run(self, input_data): + """ + Run the custom agent. + + Args: + input_data (dict): The input data for the agent. + + Returns: + dict: The processed output data. + """ + self.logger.info(f"Running custom agent: {self.name} with parameter: {self.custom_param}") + # Implement custom behavior here + return super().run(input_data) + +@agent_component +class CustomComponent(BaseComponent): + """ + CustomComponent + + This class defines a custom component. Users can extend this class to implement their own components. + """ + def __init__(self, name, custom_param): + """ + Initialize the CustomComponent. + + Args: + name (str): The name of the component. + custom_param: A custom parameter for the component. + """ + super().__init__(name) + self.custom_param = custom_param + + def run(self, input_data): + """ + Run the custom component. + + Args: + input_data (dict): The input data for the component. + + Returns: + dict: The processed output data. + """ + self.logger.info(f"Running custom component: {self.name} with parameter: {self.custom_param}") + # Implement custom behavior here + return input_data + +class CustomEnvironment(SimulationEnvironment): + """ + CustomEnvironment + + This class provides a template for creating a custom training environment. + Users can define their own agents and components, and integrate them into the simulation environment. + """ + def __init__(self, num_agents=1, custom_param=None): + """ + Initialize the CustomEnvironment. + + Args: + num_agents (int): The number of agents to create. + custom_param: A custom parameter for the environment. + """ + super().__init__() + self.num_agents = num_agents + self.custom_param = custom_param + self._create_custom_agents() + + def _create_custom_agents(self): + """Create custom agents and add them to the environment.""" + for i in range(self.num_agents): + agent = CustomAgent(name=f"Custom Agent {i+1}", custom_param=self.custom_param) + component = CustomComponent(name=f"Custom Component {i+1}", custom_param=self.custom_param) + agent.add_component(component) + self.add_agent(agent) + + def add_custom_agent(self, agent_name, custom_param): + """ + Add a custom agent to the environment. + + Args: + agent_name (str): The name of the agent. + custom_param: A custom parameter for the agent. + """ + agent = CustomAgent(name=agent_name, custom_param=custom_param) + component = CustomComponent(name=f"Component for {agent_name}", custom_param=custom_param) + agent.add_component(component) + self.add_agent(agent) \ No newline at end of file diff --git a/isopro/environments/llm_orchestrator.py b/isopro/environments/llm_orchestrator.py new file mode 100644 index 0000000000000000000000000000000000000000..dcbc964c0b42339938d2859869a0e0adb7477567 --- /dev/null +++ b/isopro/environments/llm_orchestrator.py @@ -0,0 +1,194 @@ +""" +LLM Orchestrator for managing and executing LLM components in various modes. +""" + +import logging +import heapq +from concurrent.futures import ThreadPoolExecutor +from typing import List, Any, Optional, Callable +from ..base.base_component import BaseComponent + +logger = logging.getLogger(__name__) + +class ComponentException(Exception): + """Custom exception for component-related errors.""" + pass + +class LLMOrchestrator: + """ + LLMOrchestrator manages and executes LLM components in various modes: + sequential, parallel, or priority-based node execution. + """ + + def __init__(self): + """Initialize the LLMOrchestrator with an empty list of components.""" + self.components: List[BaseComponent] = [] + self.priority_function: Optional[Callable[[BaseComponent, Any], int]] = None + + def add_component(self, component: BaseComponent) -> None: + """ + Add a component to the orchestrator. + + Args: + component (BaseComponent): The component to be added. + + Raises: + ValueError: If the component is None or not an instance of BaseComponent. + """ + if component is None: + raise ValueError("Cannot add None as a component") + if not isinstance(component, BaseComponent): + raise ValueError(f"Only BaseComponent instances can be added, got {type(component)}") + self.components.append(component) + + def set_priority_function(self, priority_func: Callable[[BaseComponent, Any], int]) -> None: + """ + Set the priority function for node-based execution. + + Args: + priority_func (Callable[[BaseComponent, Any], int]): A function that takes a component + and input data, and returns an integer priority value. + """ + self.priority_function = priority_func + + def run_orchestration(self, mode: str = 'sequence', input_data: Optional[Any] = None) -> List[Any]: + """ + Run the orchestration in the specified mode. + + Args: + mode (str): The execution mode ('sequence', 'parallel', or 'node'). + input_data (Any, optional): The initial input data for the components. + + Returns: + List[Any]: The results from all components. + + Raises: + ValueError: If an invalid execution mode is specified. + """ + if not self.components: + logger.warning("No components to run") + return [] + + if mode == 'sequence': + return self._run_in_sequence(input_data) + elif mode == 'parallel': + return self._run_in_parallel(input_data) + elif mode == 'node': + return self._run_as_node(input_data) + else: + raise ValueError("Invalid execution mode") + + def _run_in_sequence(self, input_data: Any) -> List[Any]: + """ + Run components sequentially, passing the output of each as input to the next. + + Args: + input_data (Any): The initial input data for the first component. + + Returns: + List[Any]: The results from all components. + """ + logger.info("Running in sequence mode") + results = [] + current_input = input_data + + for component in self.components: + try: + result = self._run_component(component, current_input) + results.append(result) + current_input = result # Use the output as input for the next component + except ComponentException as e: + logger.error(f"Error: {e}") + results.append(str(e)) + + return results + + def _run_in_parallel(self, input_data: Any) -> List[Any]: + """ + Run components in parallel, providing the same input to all components. + + Args: + input_data (Any): The input data for all components. + + Returns: + List[Any]: The results from all components. + """ + logger.info("Running in parallel mode") + results = [] + + with ThreadPoolExecutor() as executor: + futures = [executor.submit(self._run_component, component, input_data) + for component in self.components] + + for future in futures: + try: + result = future.result() + results.append(result) + except ComponentException as e: + logger.error(f"Error: {e}") + results.append(str(e)) + + return results + + def _run_as_node(self, input_data: Any) -> List[Any]: + """ + Run components in priority-based node mode. + + The priority is defined either by the LLM using reasoning on the best path + of solving the problem or designated by the user through the priority_function. + + Args: + input_data (Any): The input data for all components. + + Returns: + List[Any]: The results from all components, ordered by priority. + """ + logger.info("Running in node mode (priority-based)") + results = [] + + if self.priority_function is None: + logger.warning("No priority function set. Using default priority (0) for all components.") + priority_queue = [(0, i, component) for i, component in enumerate(self.components)] + else: + priority_queue = [(self.priority_function(component, input_data), i, component) + for i, component in enumerate(self.components)] + + heapq.heapify(priority_queue) + + while priority_queue: + priority, _, component = heapq.heappop(priority_queue) + logger.info(f"Running component {component} with priority {priority}") + try: + result = self._run_component(component, input_data) + results.append(result) + + # If the component changes the priority, we need to update the queue + if self.priority_function: + new_priority = self.priority_function(component, result) + if new_priority != priority: + heapq.heappush(priority_queue, (new_priority, len(results), component)) + logger.info(f"Updated priority for component {component}: {priority} -> {new_priority}") + + except ComponentException as e: + logger.error(f"Error: {e}") + results.append(str(e)) + + return results + + def _run_component(self, component: BaseComponent, input_data: Any) -> Any: + """ + Run a single component with the given input data. + + Args: + component (BaseComponent): The component to run. + input_data (Any): The input data for the component. + + Returns: + Any: The result of running the component. + + Raises: + ComponentException: If the component doesn't have a callable 'run' method. + """ + if not hasattr(component, 'run') or not callable(component.run): + raise ComponentException(f"Component {component} does not have a callable 'run' method") + return component.run(input_data) \ No newline at end of file diff --git a/isopro/environments/simulation_environment.py b/isopro/environments/simulation_environment.py new file mode 100644 index 0000000000000000000000000000000000000000..e0e035c376caac8df00260c66c4a4adecc213e47 --- /dev/null +++ b/isopro/environments/simulation_environment.py @@ -0,0 +1,48 @@ +"""Simulation Environment for LLM training.""" +from ..utils.logging_utils import setup_logger + +class SimulationEnvironment: + """Simulation Environment for LLM training.""" + + def __init__(self): + """Initialize the SimulationEnvironment.""" + self.agents = [] + self.logger = setup_logger(self.__class__.__name__) + + def add_agent(self, agent): + """ + Add an agent to the simulation environment. + + Args: + agent (AI_Agent): The agent to add. + """ + self.agents.append(agent) + self.logger.info(f"Added agent: {agent.name}") + + def step(self): + """ + Execute one step of the simulation for all agents. + + Returns: + list: A list of outputs from all agents. + """ + outputs = [] + for agent in self.agents: + output = agent.run({}) # Placeholder input, replace with actual simulation state + outputs.append(output) + return outputs + + def reset(self): + """Reset the simulation environment.""" + # Implement reset logic here + self.logger.info("Simulation environment reset") + + def render(self): + """Render the current state of the simulation environment.""" + # Implement rendering logic here + self.logger.info("Rendering simulation environment") + + def close(self): + """Close the simulation environment and clean up resources.""" + # Implement cleanup logic here + self.logger.info("Closing simulation environment") \ No newline at end of file diff --git a/isopro/examples/__init__.py b/isopro/examples/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d650e13612c0929f438c231111f3db4a5fea1a58 --- /dev/null +++ b/isopro/examples/__init__.py @@ -0,0 +1,34 @@ +""" +ISOPRO Examples + +This package contains example Jupyter notebooks demonstrating various features and use cases of the ISOPRO package. + +Available examples: +- custom_environment_example: Demonstrates how to create custom environments using Claude or Hugging Face models. +- conversation_simulation_example: Shows how to use the conversation simulation with a Claude agent for customer service. +- adversarial_simulation_example: Illustrates how to use the adversarial simulation and analyze its results. + +To run these examples, open the respective .ipynb files in a Jupyter notebook environment. +""" + +# Import any shared utilities or constants used across notebooks here +# For example: +# from .utils import plot_results, load_sample_data + +# List available example notebooks +AVAILABLE_EXAMPLES = [ + "custom_environment_example", + "conversation_simulation_example", + "adversarial_simulation_example" +] + +def list_examples(): + """ + Print a list of available example notebooks. + """ + print("Available ISOPRO example notebooks:") + for example in AVAILABLE_EXAMPLES: + print(f"- {example}") + print("\nTo run an example, open the corresponding .ipynb file in a Jupyter notebook environment.") + +# You can add any other shared functions or variables here that might be useful across multiple notebooks \ No newline at end of file diff --git a/isopro/examples/adversarial_example.ipynb b/isopro/examples/adversarial_example.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..2411a97a5d8acbe83e63c08810a9d9ec63f44a62 --- /dev/null +++ b/isopro/examples/adversarial_example.ipynb @@ -0,0 +1,242 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Adversarial Simulation Notebook\n", + "\n", + "This notebook demonstrates how to run an adversarial simulation against a language model (in this case, Claude) and analyze the results.\n", + "\n", + "## Setup\n", + "\n", + "First, we'll import the necessary libraries and set up our environment." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "from typing import List\n", + "from isopro.adversarial_simulation import AdversarialSimulator, AdversarialEnvironment\n", + "from isopro.utils.analyze_adversarial_sim import analyze_adversarial_results, summarize_adversarial_impact\n", + "from isopro.agents.ai_agent import AI_Agent\n", + "import anthropic\n", + "import os\n", + "from dotenv import load_dotenv\n", + "import json\n", + "from datetime import datetime\n", + "import numpy as np\n", + "import torch\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "# Load environment variables\n", + "load_dotenv()\n", + "\n", + "# Set up logging\n", + "logging.basicConfig(level=logging.INFO)\n", + "logger = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define Helper Classes and Functions\n", + "\n", + "Now, we'll define our ClaudeAgent class and some helper functions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class ClaudeAgent(AI_Agent):\n", + " def __init__(self, name):\n", + " super().__init__(name)\n", + " self.client = anthropic.Anthropic(api_key=os.getenv(\"ANTHROPIC_API_KEY\"))\n", + "\n", + " def run(self, input_data):\n", + " response = self.client.messages.create(\n", + " model=\"claude-3-opus-20240229\",\n", + " max_tokens=100,\n", + " messages=[{\"role\": \"user\", \"content\": input_data['text']}]\n", + " )\n", + " return response.content[0].text\n", + "\n", + " def step(self, sim_state):\n", + " sim_state['output'] = self.run(sim_state)\n", + " return sim_state\n", + "\n", + "class NumpyEncoder(json.JSONEncoder):\n", + " def default(self, obj):\n", + " if isinstance(obj, np.floating):\n", + " return float(obj)\n", + " if isinstance(obj, torch.Tensor):\n", + " return obj.tolist()\n", + " return super(NumpyEncoder, self).default(obj)\n", + "\n", + "def save_scores(output_dir: str, run_id: str, analysis_results: dict) -> None:\n", + " os.makedirs(output_dir, exist_ok=True)\n", + " output_file = os.path.join(output_dir, f\"adv-{run_id}.json\")\n", + " \n", + " try:\n", + " with open(output_file, 'w') as f:\n", + " json.dump(analysis_results, f, indent=2, cls=NumpyEncoder)\n", + " \n", + " logging.info(f\"Saved analysis results to {output_file}\")\n", + " except Exception as e:\n", + " logging.error(f\"Error saving analysis results: {str(e)}\")\n", + "\n", + "def get_sample_inputs() -> List[str]:\n", + " return [\n", + " \"What is the capital of France?\",\n", + " \"How does photosynthesis work?\",\n", + " \"Explain the theory of relativity.\"\n", + " ]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run the Adversarial Simulation\n", + "\n", + "Now we'll set up and run our adversarial simulation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def run_simulation():\n", + " run_id = datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n", + " logger.info(f\"Starting adversarial simulation run {run_id}\")\n", + "\n", + " claude_agent = ClaudeAgent(\"Claude Agent\")\n", + "\n", + " # Create the AdversarialEnvironment\n", + " adv_env = AdversarialEnvironment(\n", + " agent_wrapper=claude_agent,\n", + " num_adversarial_agents=2,\n", + " attack_types=[\"textbugger\", \"deepwordbug\"],\n", + " attack_targets=[\"input\", \"output\"]\n", + " )\n", + "\n", + " # Set up the adversarial simulator with the environment\n", + " simulator = AdversarialSimulator(adv_env)\n", + "\n", + " input_data = get_sample_inputs()\n", + "\n", + " logger.info(\"Starting adversarial simulation...\")\n", + " simulation_results = simulator.run_simulation(input_data, num_steps=1)\n", + "\n", + " logger.info(\"Analyzing simulation results...\")\n", + " analysis_results = analyze_adversarial_results(simulation_results)\n", + "\n", + " summary = summarize_adversarial_impact(analysis_results)\n", + "\n", + " print(\"\\nAdversarial Simulation Summary:\")\n", + " print(summary)\n", + "\n", + " output_dir = \"output\"\n", + " save_scores(output_dir, run_id, analysis_results)\n", + "\n", + " logger.info(\"Simulation complete.\")\n", + " \n", + " return simulation_results, analysis_results\n", + "\n", + "# Run the simulation\n", + "simulation_results, analysis_results = run_simulation()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analyze and Visualize Results\n", + "\n", + "Now that we have our results, let's analyze and visualize them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_metric_changes(analysis_results):\n", + " metrics = ['bleu', 'rouge-1', 'rouge-2', 'rouge-l', 'perplexity', 'coherence']\n", + " changes = [analysis_results[f'{metric}_change'] for metric in metrics]\n", + " \n", + " plt.figure(figsize=(12, 6))\n", + " sns.barplot(x=metrics, y=changes)\n", + " plt.title('Changes in Metrics After Adversarial Attacks')\n", + " plt.xlabel('Metrics')\n", + " plt.ylabel('Percentage Change')\n", + " plt.xticks(rotation=45)\n", + " plt.show()\n", + "\n", + "plot_metric_changes(analysis_results)\n", + "\n", + "# Display original and perturbed inputs and outputs\n", + "for i, result in enumerate(simulation_results):\n", + " print(f\"\\nExample {i+1}:\")\n", + " print(f\"Original Input: {result['original_input']}\")\n", + " print(f\"Perturbed Input: {result['perturbed_input']}\")\n", + " print(f\"Original Output: {result['original_output']}\")\n", + " print(f\"Perturbed Output: {result['perturbed_output']}\")\n", + " print(\"-\" * 50)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conclusion\n", + "\n", + "This notebook demonstrates how to run an adversarial simulation against a language model and analyze the results. The simulation applies various adversarial attacks to the input or output of the model and measures the impact on different metrics.\n", + "\n", + "Key observations:\n", + "1. The changes in different metrics (BLEU, ROUGE, perplexity, coherence) show how the adversarial attacks affect the model's performance.\n", + "2. By comparing the original and perturbed inputs and outputs, we can see how the attacks modify the text and how the model's responses change as a result.\n", + "\n", + "This information can be used to assess the robustness of the language model against adversarial attacks and identify areas for improvement in the model's defenses." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "smooth_env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + }, + "vscode": { + "interpreter": { + "hash": "e35b4d35af899f01dc238e082b97509c22792197b4b3ae814b774a24a240ad24" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/isopro/examples/conversation_simulation_example.ipynb b/isopro/examples/conversation_simulation_example.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..d0347311483c0d9178e84b6f565abfbe2889697b --- /dev/null +++ b/isopro/examples/conversation_simulation_example.ipynb @@ -0,0 +1,258 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Conversation Simulator\n", + "\n", + "This notebook demonstrates the usage of the Conversation Simulator from the isopro package. It simulates conversations between an AI assistant (either Claude or GPT-4) and various user personas." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "First, let's import the necessary modules and set up our environment." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "from logging.handlers import RotatingFileHandler\n", + "import os\n", + "from datetime import datetime\n", + "from dotenv import load_dotenv\n", + "from isopro.conversation_simulation.conversation_simulator import ConversationSimulator\n", + "from isopro.conversation_simulation.custom_persona import create_custom_persona\n", + "\n", + "# Load environment variables\n", + "load_dotenv()\n", + "\n", + "# Set up logging\n", + "log_directory = \"logs\"\n", + "os.makedirs(log_directory, exist_ok=True)\n", + "log_file = os.path.join(log_directory, \"conversation_simulator.log\")\n", + "\n", + "# Create a rotating file handler\n", + "file_handler = RotatingFileHandler(log_file, maxBytes=1024*1024, backupCount=5)\n", + "file_handler.setLevel(logging.DEBUG)\n", + "file_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')\n", + "file_handler.setFormatter(file_formatter)\n", + "\n", + "# Create a console handler\n", + "console_handler = logging.StreamHandler()\n", + "console_handler.setLevel(logging.INFO)\n", + "console_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')\n", + "console_handler.setFormatter(console_formatter)\n", + "\n", + "# Set up the logger\n", + "logger = logging.getLogger()\n", + "logger.setLevel(logging.DEBUG)\n", + "logger.addHandler(file_handler)\n", + "logger.addHandler(console_handler)\n", + "\n", + "print(\"Setup complete.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Helper Functions\n", + "\n", + "Next, let's define some helper functions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def save_output(content, filename):\n", + " \"\"\"Save the output content to a file.\"\"\"\n", + " with open(filename, 'w', encoding='utf-8') as f:\n", + " f.write(content)\n", + "\n", + "def get_user_choice():\n", + " \"\"\"Get user's choice of AI model.\"\"\"\n", + " while True:\n", + " choice = input(\"Choose AI model (claude/openai): \").lower()\n", + " if choice in ['claude', 'openai']:\n", + " return choice\n", + " print(\"Invalid choice. Please enter 'claude' or 'openai'.\")\n", + "\n", + "print(\"Helper functions defined.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Main Simulation Function\n", + "\n", + "Now, let's define our main simulation function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def run_simulation():\n", + " # Get user's choice of AI model\n", + " ai_choice = get_user_choice()\n", + "\n", + " # Set up the appropriate model and API key\n", + " if ai_choice == 'claude':\n", + " model = \"claude-3-opus-20240229\"\n", + " os.environ[\"ANTHROPIC_API_KEY\"] = os.getenv(\"ANTHROPIC_API_KEY\")\n", + " ai_name = \"Claude\"\n", + " else: # openai\n", + " model = \"gpt-4-1106-preview\"\n", + " os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"OPENAI_API_KEY\")\n", + " ai_name = \"GPT-4 Turbo\"\n", + "\n", + " # Initialize the ConversationSimulator\n", + " simulator = ConversationSimulator(\n", + " ai_prompt=f\"You are {ai_name}, an AI assistant created to be helpful, harmless, and honest. You are a customer service agent for a tech company. Respond politely and professionally.\"\n", + " )\n", + "\n", + " output_content = f\"Conversation Simulator using {ai_name} model: {model}\\n\\n\"\n", + "\n", + " # Run simulations with different personas\n", + " personas = [\"upset\", \"human_request\", \"inappropriate\", \"incomplete_info\"]\n", + " \n", + " for persona in personas:\n", + " logger.info(f\"Running simulation with {persona} persona using {ai_name}\")\n", + " conversation_history = simulator.run_simulation(persona, num_turns=3)\n", + " \n", + " output_content += f\"\\nConversation with {persona} persona:\\n\"\n", + " for message in conversation_history:\n", + " output_line = f\"{message['role'].capitalize()}: {message['content']}\\n\"\n", + " output_content += output_line\n", + " logger.debug(output_line.strip())\n", + " output_content += \"\\n\" + \"-\"*50 + \"\\n\"\n", + "\n", + " # Create and run a simulation with a custom persona\n", + " custom_persona_name = \"Techie Customer\"\n", + " custom_characteristics = [\"tech-savvy\", \"impatient\", \"detail-oriented\"]\n", + " custom_message_templates = [\n", + " \"I've tried rebooting my device, but the error persists. Can you help?\",\n", + " \"What's the latest update on the cloud service outage?\",\n", + " \"I need specifics on the API rate limits for the enterprise plan.\",\n", + " \"The latency on your servers is unacceptable. What's being done about it?\",\n", + " \"Can you explain the technical details of your encryption method?\"\n", + " ]\n", + "\n", + " logger.info(f\"Running simulation with custom persona: {custom_persona_name} using {ai_name}\")\n", + " custom_conversation = simulator.run_custom_simulation(\n", + " custom_persona_name,\n", + " custom_characteristics,\n", + " custom_message_templates,\n", + " num_turns=3\n", + " )\n", + "\n", + " output_content += f\"\\nConversation with {custom_persona_name}:\\n\"\n", + " for message in custom_conversation:\n", + " output_line = f\"{message['role'].capitalize()}: {message['content']}\\n\"\n", + " output_content += output_line\n", + " logger.debug(output_line.strip())\n", + "\n", + " # Save the output to a file\n", + " timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n", + " output_directory = \"output\"\n", + " os.makedirs(output_directory, exist_ok=True)\n", + " output_file = os.path.join(output_directory, f\"{ai_name.lower()}_conversation_output_{timestamp}.txt\")\n", + " save_output(output_content, output_file)\n", + " logger.info(f\"Output saved to {output_file}\")\n", + "\n", + " return output_content\n", + "\n", + "print(\"Main simulation function defined.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run the Simulation\n", + "\n", + "Now we're ready to run the simulation. This cell will prompt you to choose between Claude and GPT-4, then run the simulation and display the results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "simulation_output = run_simulation()\n", + "print(simulation_output)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analyze the Results\n", + "\n", + "After running the simulation, you can analyze the results here. For example, you might want to count the number of times certain phrases or words were used, or calculate the average length of responses." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Example analysis: Count the number of apologies\n", + "apology_count = simulation_output.lower().count(\"sorry\") + simulation_output.lower().count(\"apologi\")\n", + "print(f\"Number of apologies: {apology_count}\")\n", + "\n", + "# Example analysis: Average length of AI responses\n", + "ai_responses = [line.split(\": \", 1)[1] for line in simulation_output.split(\"\\n\") if line.startswith(\"Assistant: \")]\n", + "avg_response_length = sum(len(response.split()) for response in ai_responses) / len(ai_responses)\n", + "print(f\"Average length of AI responses: {avg_response_length:.2f} words\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conclusion\n", + "\n", + "This notebook demonstrates how to use the Conversation Simulator from the isopro package. You can modify the personas, adjust the number of turns, or add your own analysis to further explore the capabilities of the AI models in customer service scenarios." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/isopro/examples/orchestrator_example.ipynb b/isopro/examples/orchestrator_example.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..7f2330b8ddda45a962812a0d81379cdeb9eb6d9b --- /dev/null +++ b/isopro/examples/orchestrator_example.ipynb @@ -0,0 +1,245 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# isopro Tutorial: Orchestrator, Evaluator, and Evaluation Modules\n", + "\n", + "This notebook will guide you through using the `isopro` package, focusing on the orchestrator, evaluator, and evaluation modules. We'll cover installation, setup, and usage examples." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Installation\n", + "\n", + "First, let's install the `isopro` package. Run the following cell to install it using pip:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install isopro" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Setup\n", + "\n", + "Now, let's import the necessary modules and set up our environment. We'll need to set our API keys for OpenAI and Anthropic. In a production environment, you should use environment variables for these keys. For this notebook, we'll set them directly (but remember not to share your notebook with these keys included)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click here for more info. View Jupyter log for further details." + ] + }, + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mCanceled future for execute_request message before replies were done" + ] + } + ], + "source": [ + "import os\n", + "from isopro.orchestration_simulation import OrchestrationEnv\n", + "from isopro.orchestration_simulation.components import LLaMAAgent, AnalysisAgent, WritingAgent\n", + "from isopro.orchestration_simulation.evaluator import Evaluator\n", + "from dotenv import load_dotenv\n", + "\n", + "# Load environment variables from .env file\n", + "load_dotenv()\n", + "\n", + "# Access API keys from environment variables\n", + "openai_api_key = os.getenv(\"OPENAI_API_KEY\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Creating the Orchestration Environment\n", + "\n", + "Let's create our orchestration environment and add our agents to it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create the orchestration environment\n", + "env = OrchestrationEnv()\n", + "\n", + "# Add agents to the environment\n", + "env.add_component(LLaMAAgent(\"Research\", \"conduct thorough research on the impact of artificial intelligence on job markets in the next decade\"))\n", + "env.add_component(AnalysisAgent(\"Analysis\"))\n", + "env.add_component(WritingAgent(\"Writing\"))\n", + "\n", + "print(\"Orchestration environment created with agents added!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Defining the Task\n", + "\n", + "Now, let's define the task that our agents will work on." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "task = \"Prepare a comprehensive report on the impact of artificial intelligence on job markets in the next decade.\"\n", + "print(f\"Task defined: {task}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Running Simulations in Different Modes\n", + "\n", + "We'll now run our simulation in different modes: parallel, sequence, and node." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "modes = ['parallel', 'sequence', 'node']\n", + "results = {}\n", + "\n", + "for mode in modes:\n", + " print(f\"\\nRunning simulation in {mode} mode...\")\n", + " result = env.run_simulation(mode=mode, input_data={'task': task, 'run_order': 'first'})\n", + " results[mode] = result\n", + " print(f\"Simulation in {mode} mode completed.\")\n", + "\n", + "print(\"\\nAll simulations completed!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Evaluating the Results\n", + "\n", + "Now that we have our results, let's use the Evaluator to determine which mode performed best." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'Evaluator' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mevaluator\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mEvaluator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mbest_mode\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mevaluator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"\\nEvaluation complete. The best execution mode for this task was: {best_mode}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'Evaluator' is not defined" + ] + } + ], + "source": [ + "evaluator = Evaluator()\n", + "best_mode = evaluator.evaluate(results)\n", + "\n", + "print(f\"\\nEvaluation complete. The best execution mode for this task was: {best_mode}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 7. Examining the Results\n", + "\n", + "Let's take a closer look at the results from each mode." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for mode, result in results.items():\n", + " print(f\"\\nResults for {mode} mode:\")\n", + " print(f\"Execution Time: {result.get('execution_time', 'N/A')} seconds\")\n", + " print(f\"Memory Usage: {result.get('memory_usage', 'N/A')} MB\")\n", + " print(f\"Output Sample: {result.get('output', 'N/A')[:200]}...\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 8. Conclusion\n", + "\n", + "In this tutorial, we've learned how to:\n", + "1. Set up the isopro package\n", + "2. Create an orchestration environment and add agents\n", + "3. Run simulations in different modes\n", + "4. Use the Evaluator to determine the best execution mode\n", + "5. Examine the results of our simulations\n", + "\n", + "This demonstrates the power and flexibility of the isopro package for orchestrating AI agents and evaluating their performance in different execution modes." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "smooth_env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + }, + "vscode": { + "interpreter": { + "hash": "e35b4d35af899f01dc238e082b97509c22792197b4b3ae814b774a24a240ad24" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/isopro/examples/run_cartpole_example.ipynb b/isopro/examples/run_cartpole_example.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..bc3d608975d78665b90c301f9f0e694db87c0208 --- /dev/null +++ b/isopro/examples/run_cartpole_example.ipynb @@ -0,0 +1,403 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# LLM-based CartPole Reinforcement Learning Agent\n", + "\n", + "This notebook demonstrates how to create and train a Reinforcement Learning agent that uses a Large Language Model (LLM) to make decisions in the CartPole environment.\n", + "\n", + "## Setup\n", + "\n", + "First, let's import the necessary libraries and set up our environment." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting isopro\n", + " Downloading isopro-0.1.2-py3-none-any.whl (60 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.4/60.4 kB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting tqdm\n", + " Using cached tqdm-4.66.5-py3-none-any.whl (78 kB)\n", + "Collecting gymnasium\n", + " Using cached gymnasium-0.29.1-py3-none-any.whl (953 kB)\n", + "Collecting transformers\n", + " Downloading transformers-4.45.0-py3-none-any.whl (9.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.9/9.9 MB\u001b[0m \u001b[31m36.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting seaborn\n", + " Downloading seaborn-0.13.2-py3-none-any.whl (294 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m294.9/294.9 kB\u001b[0m \u001b[31m19.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting rouge\n", + " Using cached rouge-1.0.1-py3-none-any.whl (13 kB)\n", + "Collecting langchain-openai\n", + " Downloading langchain_openai-0.1.25-py3-none-any.whl (51 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.5/51.5 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nltk\n", + " Using cached nltk-3.9.1-py3-none-any.whl (1.5 MB)\n", + "Collecting scikit-learn\n", + " Downloading scikit_learn-1.3.2-cp38-cp38-macosx_10_9_x86_64.whl (10.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.1/10.1 MB\u001b[0m \u001b[31m55.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n", + "\u001b[?25hCollecting matplotlib\n", + " Downloading matplotlib-3.7.5-cp38-cp38-macosx_10_12_x86_64.whl (7.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.4/7.4 MB\u001b[0m \u001b[31m53.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting openai\n", + " Downloading openai-1.48.0-py3-none-any.whl (376 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m376.1/376.1 kB\u001b[0m \u001b[31m38.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting stable-baselines3\n", + " Using cached stable_baselines3-2.3.2-py3-none-any.whl (182 kB)\n", + "Collecting torch\n", + " Using cached torch-2.2.2-cp38-none-macosx_10_9_x86_64.whl (150.6 MB)\n", + "Collecting anthropic\n", + " Downloading anthropic-0.34.2-py3-none-any.whl (891 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m891.9/891.9 kB\u001b[0m \u001b[31m46.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting iso-adverse\n", + " Using cached iso_adverse-0.2.0-py3-none-any.whl (12 kB)\n", + "Requirement already satisfied: numpy in /Users/jazmiahenry/toy_genai/env/lib/python3.8/site-packages (from isopro) (1.24.2)\n", + "Collecting langchain\n", + " Downloading langchain-0.2.16-py3-none-any.whl (1.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m37.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting sentence-transformers\n", + " Downloading sentence_transformers-3.1.1-py3-none-any.whl (245 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m245.3/245.3 kB\u001b[0m \u001b[31m23.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting python-dotenv\n", + " Using cached python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n", + "Collecting distro<2,>=1.7.0\n", + " Using cached distro-1.9.0-py3-none-any.whl (20 kB)\n", + "Collecting tokenizers>=0.13.0\n", + " Downloading tokenizers-0.20.0-cp38-cp38-macosx_10_12_x86_64.whl (2.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m51.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: anyio<5,>=3.5.0 in /Users/jazmiahenry/toy_genai/env/lib/python3.8/site-packages (from anthropic->isopro) (3.6.2)\n", + "Collecting jiter<1,>=0.4.0\n", + " Using cached jiter-0.5.0-cp38-cp38-macosx_10_12_x86_64.whl (284 kB)\n", + "Collecting typing-extensions<5,>=4.7\n", + " Using cached typing_extensions-4.12.2-py3-none-any.whl (37 kB)\n", + "Requirement already satisfied: sniffio in /Users/jazmiahenry/toy_genai/env/lib/python3.8/site-packages (from anthropic->isopro) (1.3.0)\n", + "Collecting httpx<1,>=0.23.0\n", + " Downloading httpx-0.27.2-py3-none-any.whl (76 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.4/76.4 kB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting pydantic<3,>=1.9.0\n", + " Downloading pydantic-2.9.2-py3-none-any.whl (434 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m434.9/434.9 kB\u001b[0m \u001b[31m33.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting cloudpickle>=1.2.0\n", + " Using cached cloudpickle-3.0.0-py3-none-any.whl (20 kB)\n", + "Collecting farama-notifications>=0.0.1\n", + " Using cached Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)\n", + "Requirement already satisfied: importlib-metadata>=4.8.0 in /Users/jazmiahenry/toy_genai/env/lib/python3.8/site-packages (from gymnasium->isopro) (6.0.0)\n", + "Collecting filelock\n", + " Downloading filelock-3.16.1-py3-none-any.whl (16 kB)\n", + "Requirement already satisfied: jinja2 in /Users/jazmiahenry/toy_genai/env/lib/python3.8/site-packages (from torch->isopro) (3.1.2)\n", + "Collecting sympy\n", + " Downloading sympy-1.13.3-py3-none-any.whl (6.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m51.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting fsspec\n", + " Downloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m22.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting networkx\n", + " Using cached networkx-3.1-py3-none-any.whl (2.1 MB)\n", + "Requirement already satisfied: packaging>=20.0 in /Users/jazmiahenry/toy_genai/env/lib/python3.8/site-packages (from transformers->isopro) (23.0)\n", + "Collecting safetensors>=0.4.1\n", + " Downloading safetensors-0.4.5-cp38-cp38-macosx_10_12_x86_64.whl (392 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m392.9/392.9 kB\u001b[0m \u001b[31m21.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting huggingface-hub<1.0,>=0.23.2\n", + " Downloading huggingface_hub-0.25.1-py3-none-any.whl (436 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m436.4/436.4 kB\u001b[0m \u001b[31m26.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting regex!=2019.12.17\n", + " Downloading regex-2024.9.11-cp38-cp38-macosx_10_9_x86_64.whl (287 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m287.5/287.5 kB\u001b[0m \u001b[31m17.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: requests in /Users/jazmiahenry/toy_genai/env/lib/python3.8/site-packages (from transformers->isopro) (2.28.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /Users/jazmiahenry/toy_genai/env/lib/python3.8/site-packages (from transformers->isopro) (6.0)\n", + "Collecting langchain-core<0.3.0,>=0.2.38\n", + " Downloading langchain_core-0.2.41-py3-none-any.whl (397 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m397.0/397.0 kB\u001b[0m \u001b[31m21.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting SQLAlchemy<3,>=1.4\n", + " Downloading SQLAlchemy-2.0.35-cp38-cp38-macosx_10_9_x86_64.whl (2.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m60.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting async-timeout<5.0.0,>=4.0.0\n", + " Using cached async_timeout-4.0.3-py3-none-any.whl (5.7 kB)\n", + "Collecting aiohttp<4.0.0,>=3.8.3\n", + " Downloading aiohttp-3.10.6-cp38-cp38-macosx_10_9_x86_64.whl (401 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m401.0/401.0 kB\u001b[0m \u001b[31m28.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting tenacity!=8.4.0,<9.0.0,>=8.1.0\n", + " Using cached tenacity-8.5.0-py3-none-any.whl (28 kB)\n", + "Collecting langsmith<0.2.0,>=0.1.17\n", + " Downloading langsmith-0.1.128-py3-none-any.whl (292 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m292.1/292.1 kB\u001b[0m \u001b[31m24.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting langchain-text-splitters<0.3.0,>=0.2.0\n", + " Downloading langchain_text_splitters-0.2.4-py3-none-any.whl (25 kB)\n", + "Collecting tiktoken<1,>=0.7\n", + " Using cached tiktoken-0.7.0-cp38-cp38-macosx_10_9_x86_64.whl (961 kB)\n", + "Collecting pyparsing>=2.3.1\n", + " Using cached pyparsing-3.1.4-py3-none-any.whl (104 kB)\n", + "Requirement already satisfied: importlib-resources>=3.2.0 in /Users/jazmiahenry/toy_genai/env/lib/python3.8/site-packages (from matplotlib->isopro) (5.10.2)\n", + "Collecting fonttools>=4.22.0\n", + " Downloading fonttools-4.54.1-cp38-cp38-macosx_10_9_universal2.whl (2.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.8/2.8 MB\u001b[0m \u001b[31m58.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting contourpy>=1.0.1\n", + " Downloading contourpy-1.1.1-cp38-cp38-macosx_10_9_x86_64.whl (247 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m247.0/247.0 kB\u001b[0m \u001b[31m30.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting pillow>=6.2.0\n", + " Using cached pillow-10.4.0-cp38-cp38-macosx_10_10_x86_64.whl (3.5 MB)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /Users/jazmiahenry/toy_genai/env/lib/python3.8/site-packages (from matplotlib->isopro) (2.8.2)\n", + "Collecting kiwisolver>=1.0.1\n", + " Downloading kiwisolver-1.4.7-cp38-cp38-macosx_10_9_x86_64.whl (65 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m65.7/65.7 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting cycler>=0.10\n", + " Using cached cycler-0.12.1-py3-none-any.whl (8.3 kB)\n", + "Collecting click\n", + " Using cached click-8.1.7-py3-none-any.whl (97 kB)\n", + "Collecting joblib\n", + " Using cached joblib-1.4.2-py3-none-any.whl (301 kB)\n", + "Requirement already satisfied: six in /Users/jazmiahenry/toy_genai/env/lib/python3.8/site-packages (from rouge->isopro) (1.16.0)\n", + "Collecting scipy>=1.5.0\n", + " Downloading scipy-1.10.1-cp38-cp38-macosx_10_9_x86_64.whl (35.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m35.0/35.0 MB\u001b[0m \u001b[31m43.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting threadpoolctl>=2.0.0\n", + " Using cached threadpoolctl-3.5.0-py3-none-any.whl (18 kB)\n", + "Collecting pandas>=1.2\n", + " Downloading pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl (11.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.7/11.7 MB\u001b[0m \u001b[31m54.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting multidict<7.0,>=4.5\n", + " Downloading multidict-6.1.0-cp38-cp38-macosx_10_9_x86_64.whl (29 kB)\n", + "Collecting yarl<2.0,>=1.12.0\n", + " Downloading yarl-1.12.1-cp38-cp38-macosx_10_9_x86_64.whl (116 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.6/116.6 kB\u001b[0m \u001b[31m11.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting aiosignal>=1.1.2\n", + " Using cached aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n", + "Requirement already satisfied: attrs>=17.3.0 in /Users/jazmiahenry/toy_genai/env/lib/python3.8/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain->isopro) (22.2.0)\n", + "Collecting aiohappyeyeballs>=2.3.0\n", + " Using cached aiohappyeyeballs-2.4.0-py3-none-any.whl (12 kB)\n", + "Collecting frozenlist>=1.1.1\n", + " Using cached frozenlist-1.4.1-cp38-cp38-macosx_10_9_x86_64.whl (55 kB)\n", + "Requirement already satisfied: idna>=2.8 in /Users/jazmiahenry/toy_genai/env/lib/python3.8/site-packages (from anyio<5,>=3.5.0->anthropic->isopro) (3.4)\n", + "Collecting httpcore==1.*\n", + " Using cached httpcore-1.0.5-py3-none-any.whl (77 kB)\n", + "Requirement already satisfied: certifi in /Users/jazmiahenry/toy_genai/env/lib/python3.8/site-packages (from httpx<1,>=0.23.0->anthropic->isopro) (2022.12.7)\n", + "Collecting h11<0.15,>=0.13\n", + " Using cached h11-0.14.0-py3-none-any.whl (58 kB)\n", + "Requirement already satisfied: zipp>=0.5 in /Users/jazmiahenry/toy_genai/env/lib/python3.8/site-packages (from importlib-metadata>=4.8.0->gymnasium->isopro) (3.13.0)\n", + "Collecting packaging>=20.0\n", + " Using cached packaging-24.1-py3-none-any.whl (53 kB)\n", + "Collecting jsonpatch<2.0,>=1.33\n", + " Using cached jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n", + "Collecting orjson<4.0.0,>=3.9.14\n", + " Downloading orjson-3.10.7-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl (251 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m251.1/251.1 kB\u001b[0m \u001b[31m20.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting tzdata>=2022.1\n", + " Downloading tzdata-2024.2-py2.py3-none-any.whl (346 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m346.6/346.6 kB\u001b[0m \u001b[31m27.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting pytz>=2020.1\n", + " Using cached pytz-2024.2-py2.py3-none-any.whl (508 kB)\n", + "Collecting annotated-types>=0.6.0\n", + " Using cached annotated_types-0.7.0-py3-none-any.whl (13 kB)\n", + "Collecting pydantic-core==2.23.4\n", + " Downloading pydantic_core-2.23.4-cp38-cp38-macosx_10_12_x86_64.whl (1.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.9/1.9 MB\u001b[0m \u001b[31m39.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: charset-normalizer<4,>=2 in /Users/jazmiahenry/toy_genai/env/lib/python3.8/site-packages (from requests->transformers->isopro) (3.0.1)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/jazmiahenry/toy_genai/env/lib/python3.8/site-packages (from requests->transformers->isopro) (1.26.14)\n", + "Collecting greenlet!=0.4.17\n", + " Downloading greenlet-3.1.1.tar.gz (186 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m186.0/186.0 kB\u001b[0m \u001b[31m22.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25ldone\n", + "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", + "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n", + "\u001b[?25hRequirement already satisfied: MarkupSafe>=2.0 in /Users/jazmiahenry/toy_genai/env/lib/python3.8/site-packages (from jinja2->torch->isopro) (2.1.2)\n", + "Collecting mpmath<1.4,>=1.1.0\n", + " Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)\n", + "Collecting jsonpointer>=1.9\n", + " Using cached jsonpointer-3.0.0-py2.py3-none-any.whl (7.6 kB)\n", + "Building wheels for collected packages: greenlet\n", + " Building wheel for greenlet (pyproject.toml) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for greenlet: filename=greenlet-3.1.1-cp38-cp38-macosx_10_9_x86_64.whl size=228270 sha256=61660bb35fa5416d14ab65bd473051c7c9f723b524837a5fd0c58d21fb4818bd\n", + " Stored in directory: /Users/jazmiahenry/Library/Caches/pip/wheels/ba/f9/e2/f8e444bf385c014fea09ef24bde9b85486657505f51396875f\n", + "Successfully built greenlet\n", + "Installing collected packages: pytz, mpmath, farama-notifications, tzdata, typing-extensions, tqdm, threadpoolctl, tenacity, sympy, scipy, safetensors, rouge, regex, python-dotenv, pyparsing, pillow, packaging, orjson, networkx, kiwisolver, jsonpointer, joblib, jiter, h11, greenlet, fsspec, frozenlist, fonttools, filelock, distro, cycler, contourpy, cloudpickle, click, async-timeout, aiohappyeyeballs, torch, tiktoken, SQLAlchemy, scikit-learn, pydantic-core, pandas, nltk, multidict, matplotlib, jsonpatch, huggingface-hub, httpcore, gymnasium, annotated-types, aiosignal, yarl, tokenizers, stable-baselines3, seaborn, pydantic, httpx, transformers, openai, langsmith, anthropic, aiohttp, sentence-transformers, langchain-core, iso-adverse, langchain-text-splitters, langchain-openai, langchain, isopro\n", + " Attempting uninstall: typing-extensions\n", + " Found existing installation: typing_extensions 4.4.0\n", + " Uninstalling typing_extensions-4.4.0:\n", + " Successfully uninstalled typing_extensions-4.4.0\n", + " Attempting uninstall: packaging\n", + " Found existing installation: packaging 23.0\n", + " Uninstalling packaging-23.0:\n", + " Successfully uninstalled packaging-23.0\n", + "Successfully installed SQLAlchemy-2.0.35 aiohappyeyeballs-2.4.0 aiohttp-3.10.6 aiosignal-1.3.1 annotated-types-0.7.0 anthropic-0.34.2 async-timeout-4.0.3 click-8.1.7 cloudpickle-3.0.0 contourpy-1.1.1 cycler-0.12.1 distro-1.9.0 farama-notifications-0.0.4 filelock-3.16.1 fonttools-4.54.1 frozenlist-1.4.1 fsspec-2024.9.0 greenlet-3.1.1 gymnasium-0.29.1 h11-0.14.0 httpcore-1.0.5 httpx-0.27.2 huggingface-hub-0.25.1 iso-adverse-0.2.0 isopro-0.1.2 jiter-0.5.0 joblib-1.4.2 jsonpatch-1.33 jsonpointer-3.0.0 kiwisolver-1.4.7 langchain-0.2.16 langchain-core-0.2.41 langchain-openai-0.1.25 langchain-text-splitters-0.2.4 langsmith-0.1.128 matplotlib-3.7.5 mpmath-1.3.0 multidict-6.1.0 networkx-3.1 nltk-3.9.1 openai-1.48.0 orjson-3.10.7 packaging-24.1 pandas-2.0.3 pillow-10.4.0 pydantic-2.9.2 pydantic-core-2.23.4 pyparsing-3.1.4 python-dotenv-1.0.1 pytz-2024.2 regex-2024.9.11 rouge-1.0.1 safetensors-0.4.5 scikit-learn-1.3.2 scipy-1.10.1 seaborn-0.13.2 sentence-transformers-3.1.1 stable-baselines3-2.3.2 sympy-1.13.3 tenacity-8.5.0 threadpoolctl-3.5.0 tiktoken-0.7.0 tokenizers-0.20.0 torch-2.2.2 tqdm-4.66.5 transformers-4.45.0 typing-extensions-4.12.2 tzdata-2024.2 yarl-1.12.1\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "!pip install isopro" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click here for more info. View Jupyter log for further details." + ] + }, + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mCanceled future for execute_request message before replies were done" + ] + } + ], + "source": [ + "import gymnasium as gym\n", + "from isopro.rl.rl_agent import RLAgent\n", + "from isopro.rl.rl_environment import LLMRLEnvironment\n", + "from stable_baselines3 import PPO\n", + "import numpy as np\n", + "import anthropic\n", + "import os\n", + "import logging\n", + "from typing import Optional, Dict, Any\n", + "from tqdm import tqdm\n", + "import json\n", + "from datetime import datetime\n", + "from llm_cartpole_wrapper import LLMCartPoleWrapper\n", + "\n", + "# Set up logging\n", + "logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n", + "logger = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create and Train the RL Agent\n", + "\n", + "Now, let's create our RL agent and train it using the LLM-based CartPole environment." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_prompt = \"\"\"You are an AI trained to play the CartPole game. \n", + "Your goal is to balance a pole on a moving cart for as long as possible. \n", + "You will receive observations about the cart's position, velocity, pole angle, and angular velocity. \n", + "Based on these, you should decide whether to move the cart left or right. \n", + "Respond with 'Move left' or 'Move right' for each decision.\"\"\"\n", + "\n", + "env = LLMCartPoleWrapper(agent_prompt)\n", + "model = PPO(\"MlpPolicy\", env, verbose=1)\n", + "\n", + "logger.info(\"Starting training\")\n", + "model.learn(total_timesteps=10000)\n", + "logger.info(\"Training completed\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test the Trained Agent\n", + "\n", + "Now that we've trained our agent, let's test it for 2 episodes and see how it performs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_episodes = 2\n", + "results = []\n", + "\n", + "logger.info(\"Starting test episodes\")\n", + "for episode in tqdm(range(test_episodes), desc=\"Test Episodes\"):\n", + " obs, _ = env.reset()\n", + " done = False\n", + " total_reward = 0\n", + " episode_length = 0\n", + " while not done:\n", + " action, _ = model.predict(obs, deterministic=True)\n", + " obs, reward, terminated, truncated, _ = env.step(action)\n", + " total_reward += reward\n", + " episode_length += 1\n", + " done = terminated or truncated\n", + " \n", + " logger.info(f\"Episode {episode + 1} completed. Total reward: {total_reward}, Length: {episode_length}\")\n", + " results.append({\"episode\": episode + 1, \"total_reward\": total_reward, \"length\": episode_length})\n", + "\n", + "# Save results to file\n", + "timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n", + "output_file = os.path.join(output_folder, f\"cartpole_results_{timestamp}.json\")\n", + "with open(output_file, 'w') as f:\n", + " json.dump(results, f, indent=2)\n", + "logger.info(f\"Results saved to {output_file}\")\n", + "\n", + "# Print summary\n", + "average_reward = sum(r['total_reward'] for r in results) / len(results)\n", + "average_length = sum(r['length'] for r in results) / len(results)\n", + "logger.info(f\"Test completed. Average reward: {average_reward:.2f}, Average length: {average_length:.2f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conclusion\n", + "\n", + "In this notebook, we've demonstrated how to:\n", + "\n", + "1. Set up an LLM-based wrapper for the CartPole environment\n", + "2. Train a reinforcement learning agent using this environment\n", + "3. Test the trained agent and collect performance metrics\n", + "\n", + "This approach combines the decision-making capabilities of a large language model with the learning process of reinforcement learning, potentially leading to interesting and novel solutions to the CartPole problem.\n", + "\n", + "Feel free to experiment with different prompts, training parameters, or even different environments to see how this approach can be applied in various scenarios!" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + }, + "vscode": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/isopro/examples/workflow_example.ipynb b/isopro/examples/workflow_example.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..8e0cf24dc854aada6501d7414329bb6b5ff73fbc --- /dev/null +++ b/isopro/examples/workflow_example.ipynb @@ -0,0 +1,316 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Automating a Meme Generator with Workflow Simulation\n", + "This notebook demonstrates how to use isopro.workflow_simulation to automate a meme generation workflow. We'll train an agent to:\n", + "\n", + "1. Navigate a meme generator website\n", + "2. Upload images\n", + "3. Add captions\n", + "4. Generate and download memes\n", + "\n", + "And do it all automatically!" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup\n", + "First, let's import our required libraries and set up our environment:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from pathlib import Path\n", + "from isopro.workflow_simulation import (\n", + " WorkflowSimulator,\n", + " AgentConfig,\n", + " VisualizationConfig,\n", + " ValidationConfig\n", + ")\n", + "import matplotlib.pyplot as plt\n", + "from IPython.display import Image, HTML" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configuration\n", + "Let's create a fun configuration for our meme generator automation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create output directory for our memes\n", + "output_dir = Path(\"meme_generator.mp4\")\n", + "output_dir.mkdir(exist_ok=True)\n", + "\n", + "# Configure our agent with some fun parameters\n", + "agent_config = AgentConfig(\n", + " learning_rate=3e-4, # Not too fast, not too slow - just right for meme making\n", + " pretrain_epochs=10, # Give it some time to learn the art of memes\n", + " use_demonstration=True, # Learn from the meme masters\n", + " use_reasoning=True, # Think before you meme\n", + " reward_threshold=0.8 # High standards for our memes!\n", + ")\n", + "\n", + "# Set up visualization so we can watch the magic happen\n", + "viz_config = VisualizationConfig(\n", + " show_ui_elements=True, # See what the agent sees\n", + " show_cursor=True, # Watch the cursor dance\n", + " show_actions=True, # Understand what's happening\n", + " save_frames=True, # Save the best moments\n", + " real_time_display=True # Watch it live!\n", + ")\n", + "\n", + "# Define what makes a successful meme\n", + "validation_config = ValidationConfig.from_dict({\n", + " \"success_criteria\": [\n", + " \"image_uploaded\",\n", + " \"captions_added\",\n", + " \"meme_generated\",\n", + " \"meme_downloaded\"\n", + " ],\n", + " \"error_tolerance\": 0.1 # Some memes are meant to be a little off...\n", + "})" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Recording a Demonstration\n", + "Before we can train our agent, we need to show it how to make memes. Here's how we record a demonstration:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize our simulator\n", + "simulator = WorkflowSimulator(\n", + " video_path=\"meme_tutorial.mp4\", # Your recorded workflow video\n", + " agent_config=agent_config,\n", + " viz_config=viz_config,\n", + " validation_config=validation_config,\n", + " output_dir=str(output_dir)\n", + ")\n", + "\n", + "# Let's see what our demonstration video looks like\n", + "display(HTML(f\"\"\"\n", + "\n", + "\"\"\"))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training Our Meme Master\n", + "Now that we have our demonstration, let's train our agent to become a meme master:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Time to learn!\n", + "print(\"🎓 Training our agent to become a meme master...\")\n", + "training_results = simulator.train_agents()\n", + "\n", + "# Show the learning progress\n", + "plt.figure(figsize=(10, 5))\n", + "plt.plot(training_results['episode_rewards'])\n", + "plt.title(\"Learning to Meme\")\n", + "plt.xlabel(\"Episode\")\n", + "plt.ylabel(\"Reward\")\n", + "plt.show()\n", + "\n", + "# Print some fun stats\n", + "print(\"\\n🎯 Training Results:\")\n", + "print(f\"Average Reward: {training_results['mean_reward']:.2f}\")\n", + "print(f\"Success Rate: {training_results['success_rate']*100:.1f}%\")\n", + "print(f\"Best Episode Reward: {max(training_results['episode_rewards']):.2f}\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Unleashing the Meme Generator\n", + "Let's use our trained agent to generate some memes!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Prepare some fun meme templates and captions\n", + "meme_tasks = [\n", + " {\n", + " \"template\": \"distracted_boyfriend.jpg\",\n", + " \"captions\": [\n", + " \"Python\",\n", + " \"Me\",\n", + " \"JavaScript\"\n", + " ]\n", + " },\n", + " {\n", + " \"template\": \"drake.jpg\",\n", + " \"captions\": [\n", + " \"Writing code without comments\",\n", + " \"Writing comments without code\"\n", + " ]\n", + " },\n", + " {\n", + " \"template\": \"expanding_brain.jpg\",\n", + " \"captions\": [\n", + " \"print('debug')\",\n", + " \"console.log('debug')\",\n", + " \"Using a debugger\",\n", + " \"Adding random print statements and hoping for the best\"\n", + " ]\n", + " }\n", + "]\n", + "\n", + "# Generate memes!\n", + "print(\"🎨 Generating memes...\")\n", + "for i, task in enumerate(meme_tasks):\n", + " print(f\"\\n✨ Creating meme {i+1}/{len(meme_tasks)}\")\n", + " \n", + " # Let our agent work its magic\n", + " observation = simulator.reset()\n", + " done = False\n", + " \n", + " while not done:\n", + " action, _ = simulator.predict(observation)\n", + " observation, reward, done, info = simulator.step(action)\n", + " \n", + " if info.get('meme_generated'):\n", + " print(\"🎉 Meme created successfully!\")\n", + " \n", + " # Display the generated meme\n", + " meme_path = output_dir / f\"meme_{i+1}.png\"\n", + " display(Image(filename=str(meme_path)))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analyzing Our Meme Factory\n", + "Let's look at some fun statistics about our meme generation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get evaluation results\n", + "eval_results = simulator.evaluate_agents()\n", + "\n", + "# Create a fun visualization of our meme factory stats\n", + "stats = {\n", + " \"Memes Generated\": len(meme_tasks),\n", + " \"Success Rate\": f\"{eval_results['success_rate']*100:.1f}%\",\n", + " \"Average Generation Time\": f\"{eval_results['mean_length']:.1f}s\",\n", + " \"Quality Score\": f\"{eval_results['mean_reward']:.2f}/1.0\"\n", + "}\n", + "\n", + "print(\"📊 Meme Factory Statistics:\")\n", + "for stat, value in stats.items():\n", + " print(f\"{stat}: {value}\")\n", + "\n", + "# Plot a fun pie chart of time spent on each step\n", + "steps = [\n", + " \"Finding Templates\",\n", + " \"Adding Captions\",\n", + " \"Adjusting Layout\",\n", + " \"Generating Meme\",\n", + " \"Saving Masterpiece\"\n", + "]\n", + "times = [15, 30, 25, 20, 10] # Example percentages\n", + "\n", + "plt.figure(figsize=(10, 8))\n", + "plt.pie(times, labels=steps, autopct='%1.1f%%', \n", + " colors=['#FF9999', '#66B2FF', '#99FF99', '#FFCC99', '#FF99CC'])\n", + "plt.title(\"Time Spent Making Memes\")\n", + "plt.show()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Conclusion\n", + "Congratulations! You've successfully created an automated meme factory! 🎉\n", + "Some fun things we learned:\n", + "\n", + "- Our agent can learn to navigate UI elements and create memes\n", + "- The power of combining computer vision with reinforcement learning\n", + "- How to make our code more entertaining with emojis 😄\n", + "\n", + "## Next Steps\n", + "Want to make your meme factory even better? Here are some fun ideas:\n", + "\n", + "- Train on different meme templates\n", + "- Add text effects and styling\n", + "- Create a meme recommendation system\n", + "- Build a Discord bot using this automation\n", + "- Generate captions using Claude" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.12.7" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "7500c3e1c7c786e4ba1e4b4eb7588219b4e35d5153674f92eb3a82672b534f6e" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/isopro/orchestration_simulation/__init__.py b/isopro/orchestration_simulation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..61d119f52858e05603ee6f3c90f2d108bc1d04aa --- /dev/null +++ b/isopro/orchestration_simulation/__init__.py @@ -0,0 +1,14 @@ +from .agent import AI_Agent +from .orchestration_env import OrchestrationEnv +from .components.subagent import SubAgent +from .components.llama_agent import LLaMAAgent +from .components.base_component import BaseComponent +from .exceptions import AI_AgentException, ComponentException + +__all__ = ['AI_Agent', + 'BaseComponent', + 'OrchestrationEnv', + 'SubAgent', + 'LLaMAAgent', + 'AI_AgentException', + 'ComponentException'] \ No newline at end of file diff --git a/isopro/orchestration_simulation/agent.py b/isopro/orchestration_simulation/agent.py new file mode 100644 index 0000000000000000000000000000000000000000..9f4bcf1892a9ab3fc51236e19566fdc8616c3617 --- /dev/null +++ b/isopro/orchestration_simulation/agent.py @@ -0,0 +1,32 @@ +from .exceptions import AI_AgentException, ComponentException + +class AI_Agent: + def __init__(self, name): + self.name = name + self.components = [] + + def add_component(self, component): + self.components.append(component) + + def run(self, agent_input): + print(f"Running agent: {self.name}") + agent_output = {} + for component in self.components: + try: + component_output = component.run(agent_input) + agent_output.update(component_output) + except ComponentException as e: + raise AI_AgentException(f"Exception in agent {self.name}: {e}") + return agent_output + + def reset(self): + print(f"Resetting agent: {self.name}") + for component in self.components: + if hasattr(component, 'reset'): + component.reset() + + def close(self): + print(f"Closing agent: {self.name}") + for component in self.components: + if hasattr(component, 'close'): + component.close() \ No newline at end of file diff --git a/isopro/orchestration_simulation/components/__init__.py b/isopro/orchestration_simulation/components/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d031b8d8689dbc82e0055481be79fa288a74af2a --- /dev/null +++ b/isopro/orchestration_simulation/components/__init__.py @@ -0,0 +1,11 @@ +from .base_component import BaseComponent +from .subagent import SubAgent +from .llama_agent import LLaMAAgent +from .writing_agent import WritingAgent +from .analysis_agent import AnalysisAgent + +__all__ = ['BaseComponent', + 'SubAgent', + 'LLaMAAgent', + 'WritingAgent', + 'AnalysisAgent'] \ No newline at end of file diff --git a/isopro/orchestration_simulation/components/analysis_agent.py b/isopro/orchestration_simulation/components/analysis_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..446547d383b0b6d3d3d5198438d6989a17ad1642 --- /dev/null +++ b/isopro/orchestration_simulation/components/analysis_agent.py @@ -0,0 +1,83 @@ +# components/analysis_agent.py + +from .subagent import SubAgent +import logging +import os +from dotenv import load_dotenv +from isozero.reason_sim import OpenAIAgent, ClaudeAgent, ReasonSimulation, ReasonSimulationWrapper +from typing import Dict, Any + +# Load environment variables +load_dotenv() + +logger = logging.getLogger(__name__) + +class AnalysisAgent(SubAgent): + def __init__(self, name, priority=0): + super().__init__(name, self.analyze, priority) + self.model_type = self.determine_model_type() + self.agent = self.initialize_agent() + + def determine_model_type(self): + task = "Determine the best AI model for text analysis considering model capability, ease of use, and computational requirements. Options are OpenAI and Claude. Respond with only the name of the chosen model." + simulation = ReasonSimulation(task, max_steps=5) + wrapper = ReasonSimulationWrapper(OpenAIAgent(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4"), simulation) + + logger.info("Starting model determination simulation") + + for step in range(5): + state = wrapper.step() + logger.info(f"Model determination - Step {step + 1} completed") + + final_state = wrapper.render() + logger.info("Model determination simulation completed") + + wrapper.close() + + # Parse the string output to determine the chosen model + chosen_model = final_state.strip().lower() + if 'openai' in chosen_model: + return "OpenAI" + elif 'claude' in chosen_model: + return "Claude" + else: + logger.warning(f"Unclear model choice: {chosen_model}. Defaulting to OpenAI.") + return "OpenAI" + + def initialize_agent(self): + if self.model_type == "OpenAI": + return OpenAIAgent(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4") + elif self.model_type == "Claude": + return ClaudeAgent(api_key=os.getenv("ANTHROPIC_API_KEY")) + else: + raise ValueError(f"Unknown model type: {self.model_type}") + + def run_simulation(self, task: str, max_steps: int = 5) -> Dict[str, Any]: + simulation = ReasonSimulation(task, max_steps=max_steps) + wrapper = ReasonSimulationWrapper(self.agent, simulation) + + logger.info(f"Starting analysis simulation with {self.model_type}") + + for step in range(max_steps): + state = wrapper.step() + logger.info(f"{self.model_type} Analysis - Step {step + 1} completed") + + final_state = wrapper.render() + logger.info(f"Analysis simulation with {self.model_type} completed") + + wrapper.close() + return {'output': final_state} + + def analyze(self, input_data): + try: + task = f"Analyze the following text:\n\n{input_data}\n\nProvide a detailed analysis." + result = self.run_simulation(task) + + # Extract the final analysis from the simulation result + analysis = result.get('output', 'No analysis produced') + + logger.info(f"Analysis Agent output:\n{analysis}") + return {"result": analysis} + except Exception as e: + logger.error(f"Error in Analysis Agent: {e}") + return {"result": f"Error in Analysis Agent: {str(e)}"} \ No newline at end of file diff --git a/isopro/orchestration_simulation/components/base_component.py b/isopro/orchestration_simulation/components/base_component.py new file mode 100644 index 0000000000000000000000000000000000000000..ad19ddca811c12af366c7fc930863f93e308a713 --- /dev/null +++ b/isopro/orchestration_simulation/components/base_component.py @@ -0,0 +1,7 @@ +class BaseComponent: + def __init__(self, name, priority=0): + self.name = name + self.priority = priority + + def run(self, input_data): + raise NotImplementedError("Subclasses must implement the 'run' method") diff --git a/isopro/orchestration_simulation/components/llama_agent.py b/isopro/orchestration_simulation/components/llama_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..92b16232c2a36a73af47a1b8bf5c48c92128e78f --- /dev/null +++ b/isopro/orchestration_simulation/components/llama_agent.py @@ -0,0 +1,71 @@ +# components/llama_agent.py + +import torch +from transformers import AutoTokenizer, AutoModelForCausalLM +from .subagent import SubAgent +import logging + +logger = logging.getLogger(__name__) + +class LLaMAAgent(SubAgent): + def __init__(self, name, task, model_name="facebook/opt-350m", priority=0): + super().__init__(name, self.llama_behavior, priority) + self.model_name = model_name + self.tokenizer = None + self.model = None + self.task = task + self.initialize_model() + + def initialize_model(self): + try: + self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) + self.model = AutoModelForCausalLM.from_pretrained(self.model_name) + except Exception as e: + logger.error(f"Error initializing LLaMA model: {e}") + raise + + def llama_behavior(self, input_data): + if not self.tokenizer or not self.model: + logger.error("LLaMA model not properly initialized") + return {"result": "Error: Model not initialized"} + + run_order = input_data.get('run_order', 'unknown') + previous_output = input_data.get('previous_output', '') + + prompt = f""" + Task: {self.task} + + Your current position in the run order: {run_order} + + Previous output (if any): + {previous_output} + + Based on your position in the run order, provide a concise and informative response. + Response:""" + + try: + inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) + + with torch.no_grad(): + outputs = self.model.generate( + **inputs, + max_length=300, + num_return_sequences=1, + temperature=0.7, + top_k=50, + top_p=0.95, + do_sample=True + ) + + response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) + answer = response.split("Response:")[-1].strip() + + if not answer: + logger.warning(f"LLaMA Agent {self.name} generated an empty response") + return {"result": "Error: Empty response generated"} + + logger.info(f"LLaMA Agent {self.name} response:\n{answer}") + return {"result": answer} + except Exception as e: + logger.error(f"Error in LLaMA Agent {self.name}: {e}") + return {"result": f"Error in LLaMA Agent: {str(e)}"} \ No newline at end of file diff --git a/isopro/orchestration_simulation/components/subagent.py b/isopro/orchestration_simulation/components/subagent.py new file mode 100644 index 0000000000000000000000000000000000000000..421f44d2fa6e06678c50aa688fd69bd441507cf1 --- /dev/null +++ b/isopro/orchestration_simulation/components/subagent.py @@ -0,0 +1,23 @@ +# components/subagent.py + +from .base_component import BaseComponent +from ..exceptions import ComponentException +import logging + +logger = logging.getLogger(__name__) + +class SubAgent(BaseComponent): + def __init__(self, name, behavior, priority=0): + super().__init__(name, priority) + self.behavior = behavior + + def run(self, input_data=None): + try: + logger.info(f"Running subagent: {self.name}") + result = self.behavior(input_data) + if not result: + raise ValueError("Empty result from subagent") + return result + except Exception as e: + logger.error(f"Exception in subagent {self.name}: {e}") + raise ComponentException(f"Exception in subagent {self.name}: {e}") \ No newline at end of file diff --git a/isopro/orchestration_simulation/components/writing_agent.py b/isopro/orchestration_simulation/components/writing_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..2ca74cab1a2f9d1882e225adf973f02420687ce6 --- /dev/null +++ b/isopro/orchestration_simulation/components/writing_agent.py @@ -0,0 +1,83 @@ +# components/writing_agent.py + +from .subagent import SubAgent +import logging +import os +from dotenv import load_dotenv +from isozero.reason_sim import OpenAIAgent, ClaudeAgent, ReasonSimulation, ReasonSimulationWrapper +from typing import Dict, Any + +# Load environment variables +load_dotenv() + +logger = logging.getLogger(__name__) + +class WritingAgent(SubAgent): + def __init__(self, name, priority=0): + super().__init__(name, self.write, priority) + self.model_type = self.determine_model_type() + self.agent = self.initialize_agent() + + def determine_model_type(self): + task = "Determine the best AI model for report writing considering writing quality, coherence, and adherence to format. Options are OpenAI and Claude. Respond with only the name of the chosen model." + simulation = ReasonSimulation(task, max_steps=5) + wrapper = ReasonSimulationWrapper(OpenAIAgent(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4"), simulation) + + logger.info("Starting model determination simulation") + + for step in range(5): + state = wrapper.step() + logger.info(f"Model determination - Step {step + 1} completed") + + final_state = wrapper.render() + logger.info("Model determination simulation completed") + + wrapper.close() + + # Parse the string output to determine the chosen model + chosen_model = final_state.strip().lower() + if 'openai' in chosen_model: + return "OpenAI" + elif 'claude' in chosen_model: + return "Claude" + else: + logger.warning(f"Unclear model choice: {chosen_model}. Defaulting to OpenAI.") + return "OpenAI" + + def initialize_agent(self): + if self.model_type == "OpenAI": + return OpenAIAgent(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4") + elif self.model_type == "Claude": + return ClaudeAgent(api_key=os.getenv("ANTHROPIC_API_KEY")) + else: + raise ValueError(f"Unknown model type: {self.model_type}") + + def run_simulation(self, task: str, max_steps: int = 5) -> Dict[str, Any]: + simulation = ReasonSimulation(task, max_steps=max_steps) + wrapper = ReasonSimulationWrapper(self.agent, simulation) + + logger.info(f"Starting writing simulation with {self.model_type}") + + for step in range(max_steps): + state = wrapper.step() + logger.info(f"{self.model_type} Writing - Step {step + 1} completed") + + final_state = wrapper.render() + logger.info(f"Writing simulation with {self.model_type} completed") + + wrapper.close() + return {'output': final_state} + + def write(self, input_data): + try: + task = f"Write a comprehensive report:\n\n{input_data}\n\nProvide a well-structured report." + result = self.run_simulation(task) + + # Extract the final report from the simulation result + report = result.get('output', 'No report produced') + + logger.info(f"Writing Agent output:\n{report}") + return {"result": report} + except Exception as e: + logger.error(f"Error in Writing Agent: {e}") + return {"result": f"Error in Writing Agent: {str(e)}"} \ No newline at end of file diff --git a/isopro/orchestration_simulation/evaluation.py b/isopro/orchestration_simulation/evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..c5c613443173ac6909e2a32a2899a4dd55807499 --- /dev/null +++ b/isopro/orchestration_simulation/evaluation.py @@ -0,0 +1,65 @@ +# evaluation.py + +import nltk +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.metrics.pairwise import cosine_similarity +import logging + +logger = logging.getLogger(__name__) + +def measure_coherence(texts): + try: + # Ensure texts is a list of strings + if not all(isinstance(text, str) for text in texts): + raise ValueError("All texts must be strings") + + # Tokenize the texts into sentences + sentences = [sent for text in texts for sent in nltk.sent_tokenize(text)] + + # Create TF-IDF vectors + vectorizer = TfidfVectorizer().fit_transform(sentences) + + # Calculate cosine similarity + similarity_matrix = cosine_similarity(vectorizer) + + # Calculate average similarity + coherence = similarity_matrix.mean() + + return coherence + except Exception as e: + logger.error(f"Error measuring coherence: {e}") + return 0.0 + +def evaluate_results(evaluations): + try: + # Normalize scores + normalized_scores = {} + for metric in ['execution_time', 'memory_usage', 'coherence']: + min_val = min(eval[metric] for eval in evaluations.values()) + max_val = max(eval[metric] for eval in evaluations.values()) + for mode in evaluations: + if mode not in normalized_scores: + normalized_scores[mode] = {} + if max_val - min_val == 0: + normalized_scores[mode][metric] = 1 # Avoid division by zero + else: + if metric in ['execution_time', 'memory_usage']: + normalized_scores[mode][metric] = 1 - (evaluations[mode][metric] - min_val) / (max_val - min_val) + else: + normalized_scores[mode][metric] = (evaluations[mode][metric] - min_val) / (max_val - min_val) + + # Calculate total scores + total_scores = {mode: sum(scores.values()) for mode, scores in normalized_scores.items()} + + best_mode = max(total_scores, key=total_scores.get) + + logger.info(f"\nBest mode for this task: {best_mode}") + logger.info("Reasoning:") + logger.info(f" Execution Time: {evaluations[best_mode]['execution_time']:.2f} seconds") + logger.info(f" Memory Usage: {evaluations[best_mode]['memory_usage']:.2f} MB") + logger.info(f" Coherence: {evaluations[best_mode]['coherence']:.2f}") + + return best_mode + except Exception as e: + logger.error(f"Error in evaluate_results: {e}") + return "Error in evaluation" \ No newline at end of file diff --git a/isopro/orchestration_simulation/evaluator.py b/isopro/orchestration_simulation/evaluator.py new file mode 100644 index 0000000000000000000000000000000000000000..f8b8461d975456b68dba05a77891050dcc8218b9 --- /dev/null +++ b/isopro/orchestration_simulation/evaluator.py @@ -0,0 +1,105 @@ +# isopro/orchestration_simulation/evaluator.py + +import logging +from typing import Dict, Any +import numpy as np +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.metrics.pairwise import cosine_similarity + +logger = logging.getLogger(__name__) + +class Evaluator: + def __init__(self): + self.metrics = ['execution_time', 'memory_usage', 'coherence'] + + def evaluate(self, results: Dict[str, Dict[str, Any]]) -> str: + """ + Evaluate the results from different execution modes and determine the best mode. + + :param results: A dictionary with execution modes as keys and their results as values + :return: The name of the best execution mode + """ + evaluations = {} + for mode, result in results.items(): + evaluations[mode] = self._evaluate_mode(result) + + normalized_scores = self._normalize_scores(evaluations) + total_scores = {mode: sum(scores.values()) for mode, scores in normalized_scores.items()} + + best_mode = max(total_scores, key=total_scores.get) + + self._log_evaluation_results(evaluations, best_mode) + + return best_mode + + def _evaluate_mode(self, result: Dict[str, Any]) -> Dict[str, float]: + """ + Evaluate a single execution mode based on the metrics. + + :param result: The result dictionary for a single execution mode + :return: A dictionary of evaluation scores for each metric + """ + evaluation = { + 'execution_time': result.get('execution_time', 0), + 'memory_usage': result.get('memory_usage', 0), + 'coherence': self._calculate_coherence(result.get('output', '')) + } + return evaluation + + def _normalize_scores(self, evaluations: Dict[str, Dict[str, float]]) -> Dict[str, Dict[str, float]]: + """ + Normalize the evaluation scores across all modes. + + :param evaluations: A dictionary of evaluation scores for each mode + :return: A dictionary of normalized scores + """ + normalized = {} + for metric in self.metrics: + min_val = min(eval[metric] for eval in evaluations.values()) + max_val = max(eval[metric] for eval in evaluations.values()) + for mode in evaluations: + if mode not in normalized: + normalized[mode] = {} + if max_val - min_val == 0: + normalized[mode][metric] = 1 # Avoid division by zero + else: + if metric in ['execution_time', 'memory_usage']: + normalized[mode][metric] = 1 - (evaluations[mode][metric] - min_val) / (max_val - min_val) + else: + normalized[mode][metric] = (evaluations[mode][metric] - min_val) / (max_val - min_val) + return normalized + + def _calculate_coherence(self, text: str) -> float: + """ + Calculate the coherence of the output text. + + :param text: The output text to evaluate + :return: A coherence score + """ + sentences = text.split('.') + if len(sentences) < 2: + return 0.0 + + vectorizer = TfidfVectorizer().fit_transform(sentences) + similarity_matrix = cosine_similarity(vectorizer) + + return np.mean(similarity_matrix) + + def _log_evaluation_results(self, evaluations: Dict[str, Dict[str, float]], best_mode: str): + """ + Log the evaluation results and the best mode. + + :param evaluations: A dictionary of evaluation scores for each mode + :param best_mode: The name of the best execution mode + """ + logger.info("Evaluation Results:") + for mode, scores in evaluations.items(): + logger.info(f"{mode} mode:") + for metric, score in scores.items(): + logger.info(f" {metric}: {score:.2f}") + + logger.info(f"\nBest mode: {best_mode}") + logger.info("Reasoning:") + logger.info(f" Execution Time: {evaluations[best_mode]['execution_time']:.2f} seconds") + logger.info(f" Memory Usage: {evaluations[best_mode]['memory_usage']:.2f} MB") + logger.info(f" Coherence: {evaluations[best_mode]['coherence']:.2f}") \ No newline at end of file diff --git a/isopro/orchestration_simulation/exceptions.py b/isopro/orchestration_simulation/exceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..e2096dbb435a70a5ad38f47d242886b7c3612a52 --- /dev/null +++ b/isopro/orchestration_simulation/exceptions.py @@ -0,0 +1,5 @@ +class AI_AgentException(Exception): + pass + +class ComponentException(Exception): + pass \ No newline at end of file diff --git a/isopro/orchestration_simulation/execution_modes.py b/isopro/orchestration_simulation/execution_modes.py new file mode 100644 index 0000000000000000000000000000000000000000..ca44a5e3a7b5c81646d7a8c2cc5c5f0023c11fef --- /dev/null +++ b/isopro/orchestration_simulation/execution_modes.py @@ -0,0 +1,14 @@ +from concurrent.futures import ThreadPoolExecutor + +def run_in_sequence(components): + for component in components: + component.run() + +def run_in_parallel(components): + with ThreadPoolExecutor() as executor: + executor.map(lambda component: component.run(), components) + +def run_as_node(components): + # Implement custom node-based execution logic here + for component in components: + component.run() \ No newline at end of file diff --git a/isopro/orchestration_simulation/main.py b/isopro/orchestration_simulation/main.py new file mode 100644 index 0000000000000000000000000000000000000000..ba0474a27f4988ad4f7d2ccbfeb08d2884ed3931 --- /dev/null +++ b/isopro/orchestration_simulation/main.py @@ -0,0 +1,162 @@ +import os +import json +from datetime import datetime +from dotenv import load_dotenv +import logging +import traceback +from langchain.agents import Tool, create_react_agent +from langchain_openai import OpenAI +from langchain.prompts import PromptTemplate +from langchain.agents import AgentExecutor +from langchain.schema import AgentAction, AgentFinish +import json +from isopro.orchestration_simulation.orchestration_env import OrchestrationEnv +from isopro.orchestration_simulation.utils import setup_logging +from isopro.orchestration_simulation.components.base_component import BaseComponent + +# Load environment variables from .env file +load_dotenv() + +# Access API keys from environment variables +openai_api_key = os.getenv("OPENAI_API_KEY") + +if not openai_api_key: + raise ValueError("OpenAI API key not found. Please check your .env file.") + +# Set up logging +log_dir = os.path.join(os.getcwd(), "logs") +os.makedirs(log_dir, exist_ok=True) +log_file = os.path.join(log_dir, f"renewable_energy_simulation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log") +logger = setup_logging(log_file=log_file) + +class ToolComponent(BaseComponent): + def __init__(self, name, func, description): + super().__init__(name) + self.func = func + self.description = description + + def run(self, input_data): + logger.info(f"Running {self.name} with input: {input_data}") + result = self.func(input_data) + logger.info(f"{self.name} result: {result}") + return result + +# Create tool components +tech_research = ToolComponent("TechResearch", lambda x: "Latest advancements include improved efficiency in solar panels, advanced wind turbine designs, and breakthroughs in energy storage technologies.", "Research technological advancements") +economic_analysis = ToolComponent("EconomicAnalysis", lambda x: "Economic factors include decreasing costs of renewable technologies, increasing investment in clean energy, and the implementation of carbon pricing mechanisms.", "Analyze economic factors") +policy_review = ToolComponent("PolicyReview", lambda x: "Policy initiatives include renewable energy targets, feed-in tariffs, tax incentives for clean energy adoption, and stricter regulations on fossil fuel emissions.", "Review policy initiatives") + +tools = [ + Tool(name=tech_research.name, func=tech_research.run, description=tech_research.description), + Tool(name=economic_analysis.name, func=economic_analysis.run, description=economic_analysis.description), + Tool(name=policy_review.name, func=policy_review.run, description=policy_review.description), +] + +# Create agent +llm = OpenAI(temperature=0.7) + +# Create a prompt template +prompt = PromptTemplate( + input_variables=["input", "agent_scratchpad"], + template="""You are an expert in renewable energy analysis. Use the following tools to answer the question: + +{tools} + +Use the following format: + +Question: the input question you must answer +Thought: you should always think about what to do +Action: the action to take, should be one of [{tool_names}] +Action Input: the input to the action +Observation: the result of the action +... (this Thought/Action/Action Input/Observation can repeat N times) +Thought: I now know the final answer +Final Answer: the final answer to the original input question + +Question: {input} +{agent_scratchpad} + +Answer: """ +) + +# Prepare the tool_names string +tool_names = ", ".join([tool.name for tool in tools]) + +# Create the agent +agent = create_react_agent(llm, tools, prompt) + +class AgentComponent(BaseComponent): + def __init__(self, agent, tools, max_iterations=5): + super().__init__("AgentComponent") + self.agent = agent + self.tools = tools + self.max_iterations = max_iterations + self.agent_executor = AgentExecutor.from_agent_and_tools( + agent=self.agent, + tools=self.tools, + verbose=True, + handle_parsing_errors=True + ) + + def run(self, input_data): + logger.info(f"AgentComponent running with input: {input_data}") + try: + result = self.agent_executor.invoke( + {"input": input_data}, + {"max_iterations": self.max_iterations} + ) + logger.info(f"AgentComponent raw result: {json.dumps(result, indent=2)}") + + final_result = { + "result": result.get("output", "No output generated"), + "intermediate_steps": [ + { + "action": str(step[0]), + "observation": str(step[1]) + } for step in result.get("intermediate_steps", []) + ] + } + + logger.info(f"AgentComponent processed result: {json.dumps(final_result, indent=2)}") + return final_result + except Exception as e: + logger.error(f"Error in AgentComponent: {str(e)}") + logger.error(traceback.format_exc()) + return {"error": str(e)} + + +def save_output(result, mode): + output_dir = os.path.join(os.getcwd(), "output") + os.makedirs(output_dir, exist_ok=True) + filename = os.path.join(output_dir, f"result_{mode}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json") + with open(filename, 'w') as f: + json.dump(result, f, indent=2) + logger.info(f"Saved {mode} result to {filename}") + +def main(): + # Create simulation environment + sim_env = OrchestrationEnv() + + # Add agent component + agent_component = AgentComponent(agent, tools) + sim_env.add_component(agent_component) + + # Question + question = "Analyze the current state of renewable energy adoption worldwide. Consider technological advancements, economic factors, and policy initiatives in your analysis. Then, based on this analysis, predict the most promising renewable energy source for widespread adoption in the next decade." + + # Run simulation in agent mode + logger.info("Running agent simulation") + agent_results = sim_env.run_simulation(mode='agent', input_data=question) + logger.info(f"Agent results: {json.dumps(agent_results, indent=2)}") + + # Save output + if agent_results: + save_output(agent_results[0], "agent") + print("\nSimulation Result:") + print(json.dumps(agent_results[0], indent=2)) + else: + logger.error("No results returned from simulation") + print("Error: No results returned from simulation. Check the logs for details.") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/isopro/orchestration_simulation/orchestration_agent_example.py b/isopro/orchestration_simulation/orchestration_agent_example.py new file mode 100644 index 0000000000000000000000000000000000000000..f88f7c91415acf648bd097f67c018a33f6a987e0 --- /dev/null +++ b/isopro/orchestration_simulation/orchestration_agent_example.py @@ -0,0 +1,78 @@ +# main.py + +import os +import logging +import time +import psutil +from .orchestration_env import OrchestrationEnv +from .components.llama_agent import LLaMAAgent +from .components.analysis_agent import AnalysisAgent +from .components.writing_agent import WritingAgent +from .evaluation import evaluate_results, measure_coherence + +# Set up logging +logging.basicConfig(filename='logs/orchestration.log', level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +# Ensure necessary directories exist +os.makedirs('logs', exist_ok=True) +os.makedirs('output', exist_ok=True) + +def run_simulation(env, mode, task): + logger.info(f"Running in {mode} mode") + + start_time = time.time() + process = psutil.Process(os.getpid()) + initial_memory = process.memory_info().rss / 1024 / 1024 # in MB + + result = env.run_simulation(mode=mode, input_data={'task': task, 'run_order': 'first'}) + + final_memory = process.memory_info().rss / 1024 / 1024 # in MB + memory_usage = final_memory - initial_memory + execution_time = time.time() - start_time + + coherence = measure_coherence([r['result'] for r in result if 'result' in r]) + + return result, execution_time, memory_usage, coherence + +def save_output(mode, result): + with open(f'output/{mode}_output.txt', 'w') as f: + f.write(f"Mode: {mode}\n\n") + for i, r in enumerate(result): + f.write(f"Agent {i+1} output:\n") + f.write(str(r.get('result', 'No result')) + '\n\n') + +def main(): + env = OrchestrationEnv() + + # Add agents + env.add_component(LLaMAAgent("Research", "conduct thorough research on the impact of artificial intelligence on job markets in the next decade")) + env.add_component(AnalysisAgent("Analysis")) + env.add_component(WritingAgent("Writing")) + + task = "Prepare a comprehensive report on the impact of artificial intelligence on job markets in the next decade." + + modes = ['parallel', 'sequence', 'node'] + results = {} + evaluations = {} + + for mode in modes: + result, execution_time, memory_usage, coherence = run_simulation(env, mode, task) + results[mode] = result + evaluations[mode] = { + 'execution_time': execution_time, + 'memory_usage': memory_usage, + 'coherence': coherence + } + save_output(mode, result) + + logger.info(f"{mode.capitalize()} mode results:") + for i, r in enumerate(result): + logger.info(f"Agent {i+1} output:\n{r.get('result', 'No result')}\n") + + best_mode = evaluate_results(evaluations) + logger.info(f"\nBest mode for this task: {best_mode}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/isopro/orchestration_simulation/orchestration_env.py b/isopro/orchestration_simulation/orchestration_env.py new file mode 100644 index 0000000000000000000000000000000000000000..5ae900819efc1c89f3ff234eb6ad683488dd905d --- /dev/null +++ b/isopro/orchestration_simulation/orchestration_env.py @@ -0,0 +1,112 @@ +import logging +from concurrent.futures import ThreadPoolExecutor +import heapq +from typing import List +from tqdm import tqdm +from isopro.orchestration_simulation.components.base_component import BaseComponent + +logger = logging.getLogger(__name__) + +class OrchestrationEnv: + def __init__(self): + self.components: List[BaseComponent] = [] + + def add_component(self, component: BaseComponent): + if not isinstance(component, BaseComponent): + raise ValueError(f"Only BaseComponent instances can be added, got {type(component)}") + self.components.append(component) + logger.info(f"Added component: {component.name}") + + def run_simulation(self, mode='agent', input_data=None): + if not self.components: + logger.warning("No components to run") + return + + logger.info(f"Starting simulation in {mode} mode") + if mode == 'agent': + return self.run_agent_mode(input_data) + elif mode == 'sequence': + return self.run_in_sequence(input_data) + elif mode == 'parallel': + return self.run_in_parallel(input_data) + elif mode == 'node': + return self.run_as_node(input_data) + else: + raise ValueError("Invalid execution mode") + + def run_agent_mode(self, input_data): + logger.info("Running in agent mode") + agent_component = next((c for c in self.components if c.name == "AgentComponent"), None) + if not agent_component: + raise ValueError("AgentComponent not found") + + with tqdm(total=1, desc="Agent Progress") as pbar: + result = agent_component.run(input_data) + pbar.update(1) + + logger.info("Agent mode completed") + return [result] # Wrap the result in a list for consistency with other modes + + def run_in_sequence(self, input_data): + logger.info("Running in sequence mode") + results = [] + current_input = input_data + with tqdm(total=len(self.components), desc="Sequence Progress") as pbar: + for component in self.components: + try: + logger.info(f"Running component: {component.name}") + result = component.run(current_input) + results.append(result) + if 'result' in result: + current_input = result['result'] + else: + logger.warning(f"Component {component.name} did not return a 'result'. Using original input for next component.") + except Exception as e: + logger.error(f"Error in component {component.name}: {e}") + results.append({"error": str(e)}) + finally: + pbar.update(1) + + logger.info("Sequence mode completed") + return results + + def run_in_parallel(self, input_data): + logger.info("Running in parallel mode") + results = [] + with ThreadPoolExecutor() as executor: + futures = [executor.submit(component.run, input_data) for component in self.components] + with tqdm(total=len(futures), desc="Parallel Progress") as pbar: + for future in futures: + try: + result = future.result() + results.append(result) + except Exception as e: + logger.error(f"Error: {e}") + results.append({"error": str(e)}) + finally: + pbar.update(1) + + logger.info("Parallel mode completed") + return results + + def run_as_node(self, input_data): + logger.info("Running in node mode (priority-based)") + results = [] + priority_queue = [(i, component) for i, component in enumerate(self.components)] + heapq.heapify(priority_queue) + + with tqdm(total=len(self.components), desc="Node Progress") as pbar: + while priority_queue: + _, component = heapq.heappop(priority_queue) + try: + logger.info(f"Running component: {component.name}") + result = component.run(input_data) + results.append(result) + except Exception as e: + logger.error(f"Error in component {component.name}: {e}") + results.append({"error": str(e)}) + finally: + pbar.update(1) + + logger.info("Node mode completed") + return results \ No newline at end of file diff --git a/isopro/orchestration_simulation/run_orchestration.py b/isopro/orchestration_simulation/run_orchestration.py new file mode 100644 index 0000000000000000000000000000000000000000..84ab3bfc1c425fa6c88aadcad56522290b9f6c08 --- /dev/null +++ b/isopro/orchestration_simulation/run_orchestration.py @@ -0,0 +1,4 @@ +from isopro.orchestration_simulation.main import main + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/isopro/orchestration_simulation/utils.py b/isopro/orchestration_simulation/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..8ad5aabc652628998ee5a1374afbad77f2009be7 --- /dev/null +++ b/isopro/orchestration_simulation/utils.py @@ -0,0 +1,23 @@ +import logging +import os + +def setup_logging(log_file=None): + logger = logging.getLogger(__name__) + logger.setLevel(logging.INFO) + + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + + # Console handler + console_handler = logging.StreamHandler() + console_handler.setFormatter(formatter) + logger.addHandler(console_handler) + + if log_file: + # File handler + log_dir = os.path.join(os.getcwd(), "logs") + os.makedirs(log_dir, exist_ok=True) + file_handler = logging.FileHandler(os.path.join(log_dir, log_file)) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + + return logger \ No newline at end of file diff --git a/isopro/rl/__init__.py b/isopro/rl/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a55431416aeec1e00c50fe0a9bf1821a5671a064 --- /dev/null +++ b/isopro/rl/__init__.py @@ -0,0 +1,10 @@ +""" +Reinforcement Learning module for the isopro package. +""" + +from .rl_environment import BaseRLEnvironment, GymRLEnvironment, LLMRLEnvironment +from .rl_agent import RLAgent +from .rl_utils import calculate_discounted_rewards, update_q_table +from .llm_cartpole_wrapper import LLMCartPoleWrapper + +__all__ = ["BaseRLEnvironment", "LLMRLEnvironment", "GymRLEnvironment", "LLMCartPoleWrapper", "RLEnvironment", "RLAgent", "calculate_discounted_rewards", "update_q_table"] \ No newline at end of file diff --git a/isopro/rl/__pycache__/__init__.cpython-38.pyc b/isopro/rl/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..07afa9ef45e1ffc29e79e15a83c6d1c124f24b0c Binary files /dev/null and b/isopro/rl/__pycache__/__init__.cpython-38.pyc differ diff --git a/isopro/rl/__pycache__/rl_agent.cpython-38.pyc b/isopro/rl/__pycache__/rl_agent.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a9882da9b4929a9184aab2f54f97af9fbf91cf25 Binary files /dev/null and b/isopro/rl/__pycache__/rl_agent.cpython-38.pyc differ diff --git a/isopro/rl/__pycache__/rl_environment.cpython-38.pyc b/isopro/rl/__pycache__/rl_environment.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ad4fe4cc7f706b859b1503cd40ed1e5aaf1b3c99 Binary files /dev/null and b/isopro/rl/__pycache__/rl_environment.cpython-38.pyc differ diff --git a/isopro/rl/llm_cartpole_wrapper.py b/isopro/rl/llm_cartpole_wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..cec3ccf5dca14eedb97ebf5a3d047f742f6ddb3b --- /dev/null +++ b/isopro/rl/llm_cartpole_wrapper.py @@ -0,0 +1,62 @@ +import gymnasium as gym +from isopro.rl.rl_environment import LLMRLEnvironment +import numpy as np +import anthropic +import logging + +# Set up logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +class LLMCartPoleWrapper(LLMRLEnvironment): + def __init__(self, agent_prompt, llm_call_limit: int, api_key: str): + super().__init__(agent_prompt, None) + self.cartpole_env = gym.make('CartPole-v1') + self.action_space = self.cartpole_env.action_space + self.observation_space = self.cartpole_env.observation_space + self.client = anthropic.Anthropic(api_key=api_key) + self.llm_call_count = 0 + self.llm_call_limit = llm_call_limit # Set the maximum number of LLM calls allowed + + def reset(self, **kwargs): + # Reset the environment and the LLM call count + self.llm_call_count = 0 + return self.cartpole_env.reset(**kwargs) + + def step(self, action): + if self.llm_call_count >= self.llm_call_limit: + # If the LLM call limit is reached, take a default action (e.g., action = 0) + logging.warning("LLM call limit reached, default action taken") + return self.cartpole_env.step(0) # Default action can be customized + + # Otherwise, proceed with the LLM call and increment the counter + self.llm_call_count += 1 + return self.cartpole_env.step(action) + + + def _llm_decision_to_cartpole_action(self, llm_decision): + if isinstance(llm_decision, (int, np.integer)): + return llm_decision + elif isinstance(llm_decision, str): + return 0 if "left" in llm_decision.lower() else 1 + else: + raise ValueError(f"Unexpected action type: {type(llm_decision)}") + + def _update_llm(self, observation, reward, done): + user_message = f"Observation: {observation}, Reward: {reward}, Done: {done}. What action should we take next?" + + messages = self.conversation_history + [ + {"role": "user", "content": user_message}, + ] + + response = self.client.messages.create( + model="claude-3-opus-20240229", + max_tokens=150, + system=self.agent_prompt, + messages=messages + ) + + ai_response = response.content[0].text + self.conversation_history.append({"role": "user", "content": user_message}) + self.conversation_history.append({"role": "assistant", "content": ai_response}) + logger.debug(f"LLM updated. AI response: {ai_response}") \ No newline at end of file diff --git a/isopro/rl/main.py b/isopro/rl/main.py new file mode 100644 index 0000000000000000000000000000000000000000..15e183ac2d84ac53c154eb26a1a981ecd161eab1 --- /dev/null +++ b/isopro/rl/main.py @@ -0,0 +1,74 @@ +import gymnasium as gym +from .rl_agent import RLAgent +from .rl_environment import LLMRLEnvironment +from stable_baselines3 import PPO +import numpy as np +import anthropic +import os +import logging +from typing import Optional, Dict, Any +from tqdm import tqdm +import json +from datetime import datetime +from .llm_cartpole_wrapper import LLMCartPoleWrapper +from dotenv import load_dotenv + +load_dotenv() + +# Set up logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +def main(): + # Create output folder + output_folder = "output" + os.makedirs(output_folder, exist_ok=True) + + # Create a unique filename for this run + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + output_file = os.path.join(output_folder, f"cartpole_results_{timestamp}.json") + + agent_prompt = """You are an AI trained to play the CartPole game. + Your goal is to balance a pole on a moving cart for as long as possible. + You will receive observations about the cart's position, velocity, pole angle, and angular velocity. + Based on these, you should decide whether to move the cart left or right. + Respond with 'Move left' or 'Move right' for each decision.""" + + env = LLMCartPoleWrapper(agent_prompt, llm_call_limit=100, api_key=os.getenv("ANTHROPIC_API_KEY")) + rl_agent = RLAgent("LLM_CartPole_Agent", env, algorithm='PPO') + + logger.info("Starting training") + rl_agent.train(total_timesteps=1) + logger.info("Training completed") + + test_episodes = 1 + results = [] + + logger.info("Starting test episodes") + for episode in tqdm(range(test_episodes), desc="Test Episodes"): + obs, _ = env.reset() + done = False + total_reward = 0 + episode_length = 0 + while not done: + action, _ = rl_agent.model.predict(obs, deterministic=True) + obs, reward, terminated, truncated, _ = env.step(action) + total_reward += reward + episode_length += 1 + done = terminated or truncated + + logger.info(f"Episode {episode + 1} completed. Total reward: {total_reward}, Length: {episode_length}") + results.append({"episode": episode + 1, "total_reward": total_reward, "length": episode_length}) + + # Save results to file + with open(output_file, 'w') as f: + json.dump(results, f, indent=2) + logger.info(f"Results saved to {output_file}") + + # Print summary + average_reward = sum(r['total_reward'] for r in results) / len(results) + average_length = sum(r['length'] for r in results) / len(results) + logger.info(f"Test completed. Average reward: {average_reward:.2f}, Average length: {average_length:.2f}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/isopro/rl/rl_agent.py b/isopro/rl/rl_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..41f0f9b064faf282b977c7fe64b39478cca262f9 --- /dev/null +++ b/isopro/rl/rl_agent.py @@ -0,0 +1,93 @@ +""" +This module contains the RLAgent class, which implements a flexible +Reinforcement Learning agent capable of using different algorithms. +""" + +from stable_baselines3 import PPO, DQN, A2C +from ..agents.ai_agent import AI_Agent +import logging + +# Set up logging +logger = logging.getLogger(__name__) + +class RLAgent(AI_Agent): + """ + A flexible Reinforcement Learning agent that can use different algorithms. + """ + + def __init__(self, name, env, algorithm='PPO', policy='MlpPolicy', **kwargs): + """ + Initialize the RL agent. + + Args: + name (str): The name of the agent. + env: The environment the agent will interact with. + algorithm (str): The RL algorithm to use ('PPO', 'DQN', or 'A2C'). + policy (str): The policy network architecture to use. + **kwargs: Additional arguments to pass to the RL algorithm. + """ + super().__init__(name) + self.env = env + self.algorithm = algorithm + self.policy = policy + self.model = self._create_model(**kwargs) + logger.info(f"Initialized RLAgent '{name}' with {algorithm} algorithm") + + def _create_model(self, **kwargs): + """ + Create the RL model based on the specified algorithm. + + Args: + **kwargs: Additional arguments to pass to the RL algorithm. + + Returns: + The created RL model. + + Raises: + ValueError: If an unsupported algorithm is specified. + """ + if self.algorithm == 'PPO': + return PPO(self.policy, self.env, verbose=1, **kwargs) + elif self.algorithm == 'DQN': + return DQN(self.policy, self.env, verbose=1, **kwargs) + elif self.algorithm == 'A2C': + return A2C(self.policy, self.env, verbose=1, **kwargs) + else: + raise ValueError(f"Unsupported algorithm: {self.algorithm}") + + def train(self, total_timesteps=10000): + """ + Train the RL agent. + + Args: + total_timesteps (int): The total number of timesteps to train for. + """ + logger.info(f"Starting training of RLAgent '{self.name}' for {total_timesteps} timesteps") + self.model.learn(total_timesteps=total_timesteps) + logger.info(f"Completed training of RLAgent '{self.name}'") + + def run(self, episodes=1): + """ + Run the trained RL agent for a specified number of episodes. + + Args: + episodes (int): The number of episodes to run. + + Returns: + float: The average reward per episode. + """ + total_reward = 0 + logger.info(f"Running RLAgent '{self.name}' for {episodes} episodes") + for episode in range(episodes): + obs = self.env.reset() + episode_reward = 0 + done = False + while not done: + action, _states = self.model.predict(obs, deterministic=True) + obs, reward, done, info = self.env.step(action) + episode_reward += reward + total_reward += episode_reward + logger.debug(f"Episode {episode + 1} reward: {episode_reward}") + average_reward = total_reward / episodes + logger.info(f"RLAgent '{self.name}' average reward over {episodes} episodes: {average_reward}") + return average_reward \ No newline at end of file diff --git a/isopro/rl/rl_environment.py b/isopro/rl/rl_environment.py new file mode 100644 index 0000000000000000000000000000000000000000..5a01995eebb86936ae4c3b0e59e2d8ed861ca99b --- /dev/null +++ b/isopro/rl/rl_environment.py @@ -0,0 +1,236 @@ +""" +This module contains the base and specific implementations of RL environments. +It includes a base class for RL environments and two subclasses: +one for LLM-based environments and another for traditional gym environments. +""" + +import gymnasium as gym +from gymnasium import spaces +import numpy as np +import anthropic +import os +import logging + +# Set up logging +logger = logging.getLogger(__name__) + +class BaseRLEnvironment(gym.Env): + """ + Base class for RL environments in the isopro package. + """ + + def __init__(self): + """Initialize the base RL environment.""" + super().__init__() + self.action_space = None + self.observation_space = None + logger.info("Initialized BaseRLEnvironment") + + def reset(self): + """Reset the environment to its initial state.""" + raise NotImplementedError("Subclasses must implement reset method") + + def step(self, action): + """ + Take a step in the environment. + + Args: + action: The action to take in the environment. + + Returns: + A tuple containing the next observation, reward, done flag, and info dictionary. + """ + raise NotImplementedError("Subclasses must implement step method") + +class LLMRLEnvironment(BaseRLEnvironment): + """ + RL environment that uses a Language Model for interactions. + """ + + def __init__(self, agent_prompt, ai_agent): + """ + Initialize the LLM-based RL environment. + + Args: + agent_prompt (str): The prompt to guide the AI agent's behavior. + ai_agent: The AI agent to interact with. + """ + super().__init__() + self.agent_prompt = agent_prompt + self.ai_agent = ai_agent + self.action_space = spaces.Discrete(5) # Define the action space + self.observation_space = spaces.Box(low=0, high=1, shape=(10,), dtype=np.float32) + self.current_step = 0 + self.conversation_history = [] + self.client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) + logger.info("Initialized LLMRLEnvironment") + + def reset(self): + """ + Reset the LLM environment to its initial state. + + Returns: + numpy.ndarray: The initial observation. + """ + self.current_step = 0 + self.conversation_history = [] + logger.info("Reset LLMRLEnvironment") + return np.random.random(10) # Initial observation + + def step(self, action): + """ + Take a step in the LLM environment. + + Args: + action: The action to take in the environment. + + Returns: + tuple: (observation, reward, done, info) + """ + self.current_step += 1 + done = self.current_step >= 10 + + # Prepare the message for the AI model + messages = [ + {"role": "system", "content": self.agent_prompt}, + {"role": "user", "content": f"Action: {action}"}, + ] + self.conversation_history + + # Get response from the AI model + response = self.client.messages.create( + model="claude-3-opus-20240229", + max_tokens=1000, + messages=messages + ) + + ai_response = response.content[0].text + self.conversation_history.append({"role": "assistant", "content": ai_response}) + + reward = self.calculate_reward(ai_response) + observation = self.update_observation(ai_response) + + logger.info(f"Step {self.current_step}: Action={action}, Reward={reward}, Done={done}") + return observation, reward, done, {} + + def calculate_reward(self, response): + """ + Calculate the reward based on the AI's response. + + Args: + response (str): The AI's response. + + Returns: + float: The calculated reward. + """ + adherence_score = self.evaluate_persona_adherence(response) + human_feedback = self.get_human_feedback() + total_reward = adherence_score + human_feedback + logger.debug(f"Calculated reward: {total_reward}") + return total_reward + + def evaluate_persona_adherence(self, response): + """ + Evaluate how well the AI's response adheres to the given persona. + + Args: + response (str): The AI's response. + + Returns: + float: The adherence score. + """ + evaluation = self.client.messages.create( + model="claude-3-opus-20240229", + max_tokens=100, + messages=[ + {"role": "system", "content": "Evaluate how well the following response adheres to the given persona. Return a score between 0 and 1."}, + {"role": "user", "content": f"Persona: {self.agent_prompt}\nResponse: {response}"} + ] + ) + adherence_score = float(evaluation.content[0].text) + logger.debug(f"Persona adherence score: {adherence_score}") + return adherence_score + + def get_human_feedback(self): + """ + Simulate human feedback. + + Returns: + float: A random value between -0.5 and 0.5 to simulate human feedback. + """ + feedback = np.random.uniform(-0.5, 0.5) + logger.debug(f"Simulated human feedback: {feedback}") + return feedback + + def update_observation(self, response): + """ + Update the observation based on the AI's response. + + Args: + response (str): The AI's response. + + Returns: + numpy.ndarray: The updated observation. + """ + # TODO: Implement actual feature extraction from the response + observation = np.random.random(10) + logger.debug(f"Updated observation: {observation}") + return observation + +class GymRLEnvironment(BaseRLEnvironment): + """ + Wrapper for standard gym environments to be used in the isopro framework. + """ + + def __init__(self, env_name): + """ + Initialize the gym environment wrapper. + + Args: + env_name (str): The name of the gym environment to create. + """ + super().__init__() + self.env = gym.make(env_name) + self.action_space = self.env.action_space + self.observation_space = self.env.observation_space + logger.info(f"Initialized GymRLEnvironment with {env_name}") + + def reset(self): + """ + Reset the gym environment. + + Returns: + The initial observation from the gym environment. + """ + logger.info("Reset GymRLEnvironment") + return self.env.reset() + + def step(self, action): + """ + Take a step in the gym environment. + + Args: + action: The action to take in the environment. + + Returns: + tuple: (observation, reward, done, info) as returned by the gym environment. + """ + result = self.env.step(action) + logger.debug(f"GymRLEnvironment step: action={action}, result={result}") + return result + + def render(self, mode='human'): + """ + Render the gym environment. + + Args: + mode (str): The mode to render the environment in. + + Returns: + The rendering of the environment. + """ + return self.env.render(mode) + + def close(self): + """Close the gym environment.""" + logger.info("Closing GymRLEnvironment") + return self.env.close() \ No newline at end of file diff --git a/isopro/rl/rl_utils.py b/isopro/rl/rl_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..2290ea81102e87c755d29f036f0ceac796a20e39 --- /dev/null +++ b/isopro/rl/rl_utils.py @@ -0,0 +1,171 @@ +""" +Reinforcement Learning Utilities + +This module provides utility functions for Reinforcement Learning tasks, +including reward calculation, Q-table updates, action selection, and more. +""" + +import numpy as np +import random +from typing import List, Tuple, Dict, Any +import logging + +logger = logging.getLogger(__name__) + +def update_q_table(q_table: np.ndarray, state: int, action: int, reward: float, next_state: int, alpha: float, gamma: float) -> np.ndarray: + """ + Update a Q-table using the Q-learning algorithm. + + Args: + q_table (np.ndarray): The current Q-table. + state (int): The current state. + action (int): The action taken. + reward (float): The reward received. + next_state (int): The next state. + alpha (float): The learning rate. + gamma (float): The discount factor. + + Returns: + np.ndarray: The updated Q-table. + """ + current_q = q_table[state, action] + max_future_q = np.max(q_table[next_state, :]) + new_q = (1 - alpha) * current_q + alpha * (reward + gamma * max_future_q) + q_table[state, action] = new_q + return q_table + +def epsilon_greedy_action(q_table: np.ndarray, state: int, epsilon: float) -> int: + """ + Select an action using an epsilon-greedy policy. + + Args: + q_table (np.ndarray): The current Q-table. + state (int): The current state. + epsilon (float): The exploration rate. + + Returns: + int: The selected action. + """ + if random.uniform(0, 1) < epsilon: + return random.randint(0, q_table.shape[1] - 1) + else: + return np.argmax(q_table[state, :]) + +def calculate_discounted_rewards(rewards: List[float], gamma: float) -> np.ndarray: + """ + Calculate discounted rewards for a list of rewards. + + Args: + rewards (List[float]): The list of rewards. + gamma (float): The discount factor. + + Returns: + np.ndarray: The array of discounted rewards. + """ + discounted_rewards = np.zeros_like(rewards, dtype=float) + running_sum = 0 + for t in reversed(range(len(rewards))): + running_sum = running_sum * gamma + rewards[t] + discounted_rewards[t] = running_sum + return discounted_rewards + +def normalize_rewards(rewards: np.ndarray) -> np.ndarray: + """ + Normalize rewards to have zero mean and unit variance. + + Args: + rewards (np.ndarray): The array of rewards. + + Returns: + np.ndarray: The normalized rewards. + """ + return (rewards - np.mean(rewards)) / (np.std(rewards) + 1e-8) + +def create_epsilon_decay_schedule(start_epsilon: float, end_epsilon: float, decay_steps: int) -> callable: + """ + Create an epsilon decay schedule function. + + Args: + start_epsilon (float): The starting epsilon value. + end_epsilon (float): The final epsilon value. + decay_steps (int): The number of steps over which to decay epsilon. + + Returns: + callable: A function that takes the current step and returns the current epsilon value. + """ + def epsilon_decay(step: int) -> float: + return max(end_epsilon, start_epsilon - (start_epsilon - end_epsilon) * (step / decay_steps)) + return epsilon_decay + +class ExperienceReplayBuffer: + """A simple experience replay buffer for storing and sampling transitions.""" + + def __init__(self, capacity: int): + """ + Initialize the experience replay buffer. + + Args: + capacity (int): The maximum number of transitions to store. + """ + self.capacity = capacity + self.buffer: List[Tuple[int, int, float, int, bool]] = [] + self.position = 0 + + def push(self, state: int, action: int, reward: float, next_state: int, done: bool): + """ + Add a transition to the buffer. + + Args: + state (int): The current state. + action (int): The action taken. + reward (float): The reward received. + next_state (int): The next state. + done (bool): Whether the episode is done. + """ + if len(self.buffer) < self.capacity: + self.buffer.append(None) + self.buffer[self.position] = (state, action, reward, next_state, done) + self.position = (self.position + 1) % self.capacity + + def sample(self, batch_size: int) -> List[Tuple[int, int, float, int, bool]]: + """ + Sample a batch of transitions from the buffer. + + Args: + batch_size (int): The number of transitions to sample. + + Returns: + List[Tuple[int, int, float, int, bool]]: A list of sampled transitions. + """ + return random.sample(self.buffer, min(batch_size, len(self.buffer))) + + def __len__(self) -> int: + """Return the current size of the buffer.""" + return len(self.buffer) + +def soft_update(target_network: Any, source_network: Any, tau: float): + """ + Perform a soft update of the target network parameters. + + Args: + target_network (Any): The target network to be updated. + source_network (Any): The source network to update from. + tau (float): The soft update coefficient (0 < tau < 1). + """ + for target_param, source_param in zip(target_network.parameters(), source_network.parameters()): + target_param.data.copy_(tau * source_param.data + (1.0 - tau) * target_param.data) + +def huber_loss(x: np.ndarray, delta: float = 1.0) -> np.ndarray: + """ + Compute the Huber loss. + + Args: + x (np.ndarray): The input array. + delta (float): The Huber loss parameter. + + Returns: + np.ndarray: The Huber loss values. + """ + return np.where(np.abs(x) < delta, 0.5 * x**2, delta * (np.abs(x) - 0.5 * delta)) + +logger.info("RL utilities module loaded") \ No newline at end of file diff --git a/isopro/rl/rl_wrapper.py b/isopro/rl/rl_wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..d705cac811a0ec4e491a425fc8b1ef2081509f3d --- /dev/null +++ b/isopro/rl/rl_wrapper.py @@ -0,0 +1,43 @@ +""" +This module contains the RLEnvironmentWrapper class, which provides a wrapper +for creating and managing RL environments and agents within the isopro framework. +""" + +from ..environments.simulation_environment import SimulationEnvironment +from .rl_environment import LLMRLEnvironment, GymRLEnvironment +from .rl_agent import RLAgent +from ..agents.ai_agent import AI_Agent +import logging + +# Set up logging +logger = logging.getLogger(__name__) + +class RLEnvironmentWrapper(SimulationEnvironment): + """ + A wrapper class for creating and managing RL environments and agents. + """ + + def __init__(self, env_type='llm', num_agents=1, agent_prompt=None, gym_env_name=None): + """ + Initialize the RLEnvironmentWrapper. + + Args: + env_type (str): The type of environment to create ('llm' or 'gym'). + num_agents (int): The number of agents to create. + agent_prompt (str, optional): The prompt to use for LLM-based environments. + gym_env_name (str, optional): The name of the gym environment to create. + """ + super().__init__() + self.num_agents = num_agents + self.agent_prompt = agent_prompt + self.gym_env_name = gym_env_name + self.env_type = env_type + self._create_rl_agents() + logger.info(f"Initialized RLEnvironmentWrapper with {env_type} environment and {num_agents} agents") + + def _create_rl_agents(self): + """ + Create RL agents based on the specified environment type and number of agents. + """ + for i in range(self.num_agents): + if self. \ No newline at end of file diff --git a/isopro/utils/__init__.py b/isopro/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b7c3883fad958129fe172ae0f89ec137b51cb242 --- /dev/null +++ b/isopro/utils/__init__.py @@ -0,0 +1,7 @@ +""" +Utility functions and classes for the isopro package. +""" + +from .logging_utils import setup_logger + +__all__ = ["setup_logger"] \ No newline at end of file diff --git a/isopro/utils/__pycache__/__init__.cpython-38.pyc b/isopro/utils/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0b68859ea00dd4cc0994401884aff3f45853a4c2 Binary files /dev/null and b/isopro/utils/__pycache__/__init__.cpython-38.pyc differ diff --git a/isopro/utils/__pycache__/analyze_adversarial_sim.cpython-38.pyc b/isopro/utils/__pycache__/analyze_adversarial_sim.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..747572565c584c65c4bb51846b779fe6cc991a9c Binary files /dev/null and b/isopro/utils/__pycache__/analyze_adversarial_sim.cpython-38.pyc differ diff --git a/isopro/utils/__pycache__/llm_metrics.cpython-38.pyc b/isopro/utils/__pycache__/llm_metrics.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..999258bbb8812e0a9fe0c480bb4d993b67b020a2 Binary files /dev/null and b/isopro/utils/__pycache__/llm_metrics.cpython-38.pyc differ diff --git a/isopro/utils/__pycache__/logging_utils.cpython-38.pyc b/isopro/utils/__pycache__/logging_utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..20846f95ba8cbaa9a387c54522e9cdfbb72d3e98 Binary files /dev/null and b/isopro/utils/__pycache__/logging_utils.cpython-38.pyc differ diff --git a/isopro/utils/analyze_adversarial_sim.py b/isopro/utils/analyze_adversarial_sim.py new file mode 100644 index 0000000000000000000000000000000000000000..aa79b377ed79b937944a572e5c90d86646a063a0 --- /dev/null +++ b/isopro/utils/analyze_adversarial_sim.py @@ -0,0 +1,98 @@ +""" +Analyze Adversarial Simulation + +This module provides functions for analyzing the results of adversarial simulations. +""" + +from typing import List, Dict, Any +from .llm_metrics import evaluate_llm_metrics +import numpy as np +from sklearn.metrics.pairwise import cosine_similarity +from sentence_transformers import SentenceTransformer +import logging + +logger = logging.getLogger(__name__) + +def calculate_text_similarity(text1: str, text2: str) -> float: + """ + Calculate the cosine similarity between two texts using sentence embeddings. + + Args: + text1 (str): The first text. + text2 (str): The second text. + + Returns: + float: The cosine similarity between the two texts. + """ + model = SentenceTransformer('all-MiniLM-L6-v2') + embeddings = model.encode([text1, text2]) + similarity = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0] + return similarity + +def analyze_adversarial_results(results: List[Dict[str, Any]]) -> Dict[str, Any]: + """ + Analyze the results of an adversarial simulation. + + Args: + results (List[Dict[str, Any]]): The list of simulation results to analyze. + + Returns: + Dict[str, Any]: A dictionary containing various analysis metrics. + """ + original_inputs = [r["original_input"] for r in results] + perturbed_inputs = [r["perturbed_input"] for r in results] + original_outputs = [r["original_output"] for r in results] + perturbed_outputs = [r["perturbed_output"] for r in results] + + # Calculate input perturbation metrics + input_similarities = [calculate_text_similarity(orig, pert) for orig, pert in zip(original_inputs, perturbed_inputs)] + avg_input_similarity = np.mean(input_similarities) + + # Calculate output perturbation metrics + output_similarities = [calculate_text_similarity(orig, pert) for orig, pert in zip(original_outputs, perturbed_outputs)] + avg_output_similarity = np.mean(output_similarities) + + # Calculate LLM metrics for original and perturbed outputs + original_metrics = evaluate_llm_metrics(original_inputs, original_outputs) + perturbed_metrics = evaluate_llm_metrics(original_inputs, perturbed_outputs) + + # Calculate relative changes in LLM metrics + metric_changes = { + f"{metric}_change": (perturbed_metrics[metric] - original_metrics[metric]) / original_metrics[metric] + for metric in original_metrics.keys() + } + + analysis_results = { + "avg_input_similarity": avg_input_similarity, + "avg_output_similarity": avg_output_similarity, + "original_metrics": original_metrics, + "perturbed_metrics": perturbed_metrics, + "metric_changes": metric_changes + } + + logger.info("Completed analysis of adversarial simulation results") + return analysis_results + +def summarize_adversarial_impact(analysis_results: Dict[str, Any]) -> str: + """ + Generate a summary of the impact of adversarial attacks based on the analysis results. + + Args: + analysis_results (Dict[str, Any]): The results of the adversarial analysis. + + Returns: + str: A summary of the adversarial impact. + """ + summary = [] + summary.append(f"Input Perturbation: The average similarity between original and perturbed inputs is {analysis_results['avg_input_similarity']:.2f}") + summary.append(f"Output Perturbation: The average similarity between original and perturbed outputs is {analysis_results['avg_output_similarity']:.2f}") + + for metric, change in analysis_results['metric_changes'].items(): + impact = "increased" if change > 0 else "decreased" + summary.append(f"{metric.capitalize()}: {impact} by {abs(change)*100:.2f}%") + + most_affected_metric = max(analysis_results['metric_changes'], key=lambda k: abs(analysis_results['metric_changes'][k])) + summary.append(f"The most affected metric was {most_affected_metric}, with a change of {abs(analysis_results['metric_changes'][most_affected_metric])*100:.2f}%") + + logger.info("Generated summary of adversarial impact") + return "\n".join(summary) \ No newline at end of file diff --git a/isopro/utils/conversation_analysis.py b/isopro/utils/conversation_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..ee19e19d6e7864788356176fd66bd7c881193f14 --- /dev/null +++ b/isopro/utils/conversation_analysis.py @@ -0,0 +1,106 @@ +""" +Conversation Analysis Utilities + +This module provides functions for analyzing conversation simulations, +including sentiment analysis, response time analysis, and context adaptation analysis. +""" + +import logging +from textblob import TextBlob +import numpy as np +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.metrics.pairwise import cosine_similarity + +logger = logging.getLogger(__name__) + +def analyze_sentiment(text): + """ + Perform sentiment analysis on the given text. + + Args: + text (str): The text to analyze. + + Returns: + float: The sentiment polarity (-1 to 1, where -1 is very negative and 1 is very positive). + """ + blob = TextBlob(text) + return blob.sentiment.polarity + +def analyze_response_time(conversation_history): + """ + Analyze the response time in a conversation. + + Args: + conversation_history (list): A list of dictionaries containing the conversation history. + + Returns: + dict: A dictionary containing average response times for the AI and the user. + """ + ai_response_times = [] + user_response_times = [] + + for i in range(1, len(conversation_history)): + time_diff = conversation_history[i]['timestamp'] - conversation_history[i-1]['timestamp'] + if conversation_history[i]['role'] == 'assistant': + ai_response_times.append(time_diff) + else: + user_response_times.append(time_diff) + + return { + 'avg_ai_response_time': np.mean(ai_response_times), + 'avg_user_response_time': np.mean(user_response_times) + } + +def analyze_context_adaptation(conversation_history): + """ + Analyze how well the AI adapts to the conversational context. + + Args: + conversation_history (list): A list of dictionaries containing the conversation history. + + Returns: + float: A score representing the AI's context adaptation (0 to 1, where 1 is perfect adaptation). + """ + ai_responses = [msg['content'] for msg in conversation_history if msg['role'] == 'assistant'] + user_messages = [msg['content'] for msg in conversation_history if msg['role'] == 'user'] + + if len(ai_responses) < 2 or len(user_messages) < 2: + return 0.0 + + vectorizer = TfidfVectorizer() + user_vectors = vectorizer.fit_transform(user_messages) + ai_vectors = vectorizer.transform(ai_responses) + + context_scores = [] + for i in range(1, len(ai_responses)): + user_context = user_vectors[i-1:i+1] + ai_response = ai_vectors[i] + similarity = cosine_similarity(user_context, ai_response) + context_scores.append(np.mean(similarity)) + + return np.mean(context_scores) + +def analyze_conversation(conversation_history): + """ + Perform a comprehensive analysis of the conversation. + + Args: + conversation_history (list): A list of dictionaries containing the conversation history. + + Returns: + dict: A dictionary containing various analysis results. + """ + sentiment_scores = [analyze_sentiment(msg['content']) for msg in conversation_history] + response_times = analyze_response_time(conversation_history) + context_adaptation = analyze_context_adaptation(conversation_history) + + analysis_results = { + 'overall_sentiment': np.mean(sentiment_scores), + 'sentiment_trend': np.polyfit(range(len(sentiment_scores)), sentiment_scores, 1)[0], + 'avg_ai_response_time': response_times['avg_ai_response_time'], + 'avg_user_response_time': response_times['avg_user_response_time'], + 'context_adaptation_score': context_adaptation + } + + logger.info("Completed conversation analysis") + return analysis_results \ No newline at end of file diff --git a/isopro/utils/llm_metrics.py b/isopro/utils/llm_metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..295ece0e282fc9478253949c0ab39fc25fc718dd --- /dev/null +++ b/isopro/utils/llm_metrics.py @@ -0,0 +1,176 @@ +""" +LLM Metrics Utilities + +This module provides functions for calculating traditional LLM metrics +such as BLEU, ROUGE, Perplexity, Coherence, and others, with the ability +to choose custom Hugging Face transformers for certain metrics. +""" + +import logging +import numpy as np +import torch +from nltk.translate.bleu_score import sentence_bleu +from rouge import Rouge +from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel +from sentence_transformers import SentenceTransformer +from sklearn.metrics import f1_score, precision_score, recall_score +from sklearn.metrics.pairwise import cosine_similarity + +logger = logging.getLogger(__name__) + +# Default models +DEFAULT_PERPLEXITY_MODEL = "gpt2" +DEFAULT_COHERENCE_MODEL = "sentence-transformers/all-MiniLM-L6-v2" + +def load_model(model_name, model_class): + """ + Load a model from Hugging Face's model hub. + + Args: + model_name (str): The name of the model to load. + model_class: The class of the model (e.g., AutoModelForCausalLM, AutoModel). + + Returns: + tuple: The loaded model and tokenizer. + """ + try: + model = model_class.from_pretrained(model_name) + tokenizer = AutoTokenizer.from_pretrained(model_name) + return model, tokenizer + except Exception as e: + logger.error(f"Error loading model {model_name}: {e}") + raise + +def calculate_bleu(reference, candidate): + """ + Calculate the BLEU score for a given reference and candidate. + + Args: + reference (list): A list of reference sentences. + candidate (str): The candidate sentence to evaluate. + + Returns: + float: The BLEU score. + """ + return sentence_bleu([reference.split()], candidate.split()) + +def calculate_rouge(reference, candidate): + """ + Calculate the ROUGE score for a given reference and candidate. + + Args: + reference (str): The reference sentence. + candidate (str): The candidate sentence to evaluate. + + Returns: + dict: A dictionary containing ROUGE-1, ROUGE-2, and ROUGE-L scores. + """ + rouge = Rouge() + scores = rouge.get_scores(candidate, reference) + return { + 'rouge-1': scores[0]['rouge-1']['f'], + 'rouge-2': scores[0]['rouge-2']['f'], + 'rouge-l': scores[0]['rouge-l']['f'] + } + +def calculate_perplexity(text, model_name=None): + """ + Calculate the perplexity of a given text using a specified or default language model. + + Args: + text (str): The text to evaluate. + model_name (str, optional): The name of the Hugging Face model to use for perplexity calculation. + + Returns: + float: The perplexity score. + """ + model_name = model_name or DEFAULT_PERPLEXITY_MODEL + model, tokenizer = load_model(model_name, AutoModelForCausalLM) + + inputs = tokenizer(text, return_tensors='pt') + with torch.no_grad(): + outputs = model(**inputs, labels=inputs.input_ids) + + return np.exp(outputs.loss.item()) + +def calculate_coherence(text, model_name=None): + """ + Calculate the coherence of a given text using sentence embeddings. + + Args: + text (str): The text to evaluate. + model_name (str, optional): The name of the Sentence Transformer model to use for coherence calculation. + + Returns: + float: The coherence score. + """ + sentences = text.split('.') + if len(sentences) < 2: + return 1.0 + + model_name = model_name or DEFAULT_COHERENCE_MODEL + model = SentenceTransformer(model_name) + embeddings = model.encode(sentences) + + coherence_scores = [] + for i in range(len(embeddings) - 1): + coherence_scores.append(cosine_similarity([embeddings[i]], [embeddings[i+1]])[0][0]) + + return np.mean(coherence_scores) + +def calculate_f1_precision_recall(true_labels, predicted_labels): + """ + Calculate F1 score, precision, and recall. + + Args: + true_labels (list): The true labels. + predicted_labels (list): The predicted labels. + + Returns: + dict: A dictionary containing F1 score, precision, and recall. + """ + return { + 'f1_score': f1_score(true_labels, predicted_labels, average='weighted'), + 'precision': precision_score(true_labels, predicted_labels, average='weighted'), + 'recall': recall_score(true_labels, predicted_labels, average='weighted') + } + +def evaluate_llm_metrics(reference_texts, generated_texts, true_labels=None, predicted_labels=None, perplexity_model=None, coherence_model=None): + """ + Evaluate various LLM metrics for given reference and generated texts. + + Args: + reference_texts (list): A list of reference texts. + generated_texts (list): A list of generated texts to evaluate. + true_labels (list, optional): True labels for classification metrics. + predicted_labels (list, optional): Predicted labels for classification metrics. + perplexity_model (str, optional): The name of the model to use for perplexity calculation. + coherence_model (str, optional): The name of the model to use for coherence calculation. + + Returns: + dict: A dictionary containing various LLM metrics. + """ + metrics = {} + + # BLEU + metrics['bleu'] = np.mean([calculate_bleu(ref, gen) for ref, gen in zip(reference_texts, generated_texts)]) + + # ROUGE + rouge_scores = [calculate_rouge(ref, gen) for ref, gen in zip(reference_texts, generated_texts)] + metrics['rouge-1'] = np.mean([score['rouge-1'] for score in rouge_scores]) + metrics['rouge-2'] = np.mean([score['rouge-2'] for score in rouge_scores]) + metrics['rouge-l'] = np.mean([score['rouge-l'] for score in rouge_scores]) + + # Perplexity + metrics['perplexity'] = np.mean([calculate_perplexity(text, perplexity_model) for text in generated_texts]) + + # Coherence + metrics['coherence'] = np.mean([calculate_coherence(text, coherence_model) for text in generated_texts]) + + # F1, Precision, Recall (if labels are provided) + if true_labels and predicted_labels: + classification_metrics = calculate_f1_precision_recall(true_labels, predicted_labels) + metrics.update(classification_metrics) + + logger.info("Completed LLM metrics evaluation") + return metrics \ No newline at end of file diff --git a/isopro/utils/logging_utils.py b/isopro/utils/logging_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..73e3d799450f2eba6783e5f0cce58e7de6fdc2aa --- /dev/null +++ b/isopro/utils/logging_utils.py @@ -0,0 +1,32 @@ +"""Logging utilities for the isopro package.""" + +import logging + +def setup_logger(name, level=logging.INFO): + """ + Set up a logger with the given name and level. + + Args: + name (str): The name of the logger. + level (int): The logging level (default: logging.INFO). + + Returns: + logging.Logger: The configured logger. + """ + logger = logging.getLogger(name) + logger.setLevel(level) + + # Check if the logger already has handlers to avoid duplicate logs + if not logger.handlers: + # Create a console handler + console_handler = logging.StreamHandler() + console_handler.setLevel(level) + + # Create a formatter and set it for the handler + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + console_handler.setFormatter(formatter) + + # Add the handler to the logger + logger.addHandler(console_handler) + + return logger diff --git a/isopro/workflow_simulation/__init__.py b/isopro/workflow_simulation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..65850e7ead832fd8ad7a465b34ac68532a69cb2a --- /dev/null +++ b/isopro/workflow_simulation/__init__.py @@ -0,0 +1,49 @@ +""" +Workflow Simulator Package + +A package for automating and learning UI workflows from video demonstrations. +Provides tools for training agents, validating workflows, and visualizing results. +""" + +# Core components +from .workflow_simulator import WorkflowSimulator, EpisodeMetrics +from .workflow_environment import ( + WorkflowEnvironment, + WorkflowState, + UIElement, + UIElementDetector, + MotionDetector +) + +# Configuration classes +from .agent_config import AgentConfig +from .workflow_visualizer import VisualizationConfig +from .workflow_validator import ValidationConfig + +# Main automation +from .main import WorkflowAutomation + +__version__ = "0.1.0" + +__all__ = [ + # Core simulator and environment + "WorkflowSimulator", + "WorkflowEnvironment", + + # Environment components + "WorkflowState", + "UIElement", + "UIElementDetector", + "MotionDetector", + + # Metrics and tracking + "EpisodeMetrics", + + # Configuration + "AgentConfig", + "VisualizationConfig", + "ValidationConfig", + + # Main automation + "WorkflowAutomation" +] diff --git a/isopro/workflow_simulation/agent_config.py b/isopro/workflow_simulation/agent_config.py new file mode 100644 index 0000000000000000000000000000000000000000..51342a7f6f0696040a841cf4121c204431c56065 --- /dev/null +++ b/isopro/workflow_simulation/agent_config.py @@ -0,0 +1,62 @@ +""" +Configuration module for workflow automation agents. + +Defines the configuration parameters and validation for training and evaluating +workflow automation agents. +""" + +from dataclasses import dataclass +from typing import Optional, Dict, Any + +@dataclass +class AgentConfig: + """Configuration for workflow automation agents.""" + + learning_rate: float = 3e-4 + pretrain_epochs: int = 10 + use_demonstration: bool = True + use_reasoning: bool = True + reward_threshold: float = 0.8 + batch_size: int = 64 + max_gradient_norm: float = 1.0 + update_frequency: int = 1 + buffer_size: int = 10000 + + @classmethod + def from_dict(cls, config_dict: Dict[str, Any]) -> 'AgentConfig': + """Create configuration from dictionary. + + Args: + config_dict: Dictionary containing configuration parameters + + Returns: + AgentConfig instance with specified parameters + """ + return cls( + learning_rate=config_dict.get('learning_rate', 3e-4), + pretrain_epochs=config_dict.get('pretrain_epochs', 10), + use_demonstration=config_dict.get('use_demonstration', True), + use_reasoning=config_dict.get('use_reasoning', True), + reward_threshold=config_dict.get('reward_threshold', 0.8), + batch_size=config_dict.get('batch_size', 64), + max_gradient_norm=config_dict.get('max_gradient_norm', 1.0), + update_frequency=config_dict.get('update_frequency', 1), + buffer_size=config_dict.get('buffer_size', 10000) + ) + + def validate(self) -> None: + """Validate configuration parameters.""" + if self.learning_rate <= 0: + raise ValueError("Learning rate must be positive") + if self.pretrain_epochs < 0: + raise ValueError("Pretrain epochs must be non-negative") + if not 0 <= self.reward_threshold <= 1: + raise ValueError("Reward threshold must be between 0 and 1") + if self.batch_size <= 0: + raise ValueError("Batch size must be positive") + if self.max_gradient_norm <= 0: + raise ValueError("Max gradient norm must be positive") + if self.update_frequency <= 0: + raise ValueError("Update frequency must be positive") + if self.buffer_size <= 0: + raise ValueError("Buffer size must be positive") \ No newline at end of file diff --git a/isopro/workflow_simulation/main.py b/isopro/workflow_simulation/main.py new file mode 100644 index 0000000000000000000000000000000000000000..d8f37eba0751c83879c5c750d124e0d019e9f36c --- /dev/null +++ b/isopro/workflow_simulation/main.py @@ -0,0 +1,176 @@ +""" +Main Module for Workflow Automation + +A streamlined entry point for the workflow automation system that captures, learns, +and replicates UI workflows from video demonstrations. +""" + +import argparse +import logging +from pathlib import Path +import json +from datetime import datetime +from typing import Dict + +from .workflow_simulator import WorkflowSimulator +from .workflow_agent import AgentConfig +from .workflow_visualizer import VisualizationConfig +from .workflow_validator import ValidationConfig +from .agent_config import AgentConfig + +class WorkflowAutomation: + """Main class for handling workflow automation setup and execution.""" + + def __init__(self, args: argparse.Namespace): + self.video_path = args.video + self.config_path = args.config + self.output_dir = Path(args.output) + self.log_dir = Path(args.logs) + + # Create necessary directories + self.output_dir.mkdir(parents=True, exist_ok=True) + self.log_dir.mkdir(parents=True, exist_ok=True) + + # Setup logging + self._setup_logging() + self.logger = logging.getLogger(__name__) + + # Load configuration + self.config = self._load_config() + + def _setup_logging(self): + """Configure logging with file and console output.""" + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + log_path = self.log_dir / f"workflow_{timestamp}.log" + + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler(log_path), + logging.StreamHandler() + ] + ) + + def _load_config(self) -> Dict: + """Load and validate configuration file.""" + try: + with open(self.config_path) as f: + return json.load(f) + except Exception as e: + self.logger.error(f"Failed to load config file: {e}") + raise + + def _create_configs(self): + """Create configuration objects from loaded config.""" + # Agent configuration + self.agent_config = AgentConfig( + learning_rate=self.config.get('learning_rate', 3e-4), + pretrain_epochs=self.config.get('pretrain_epochs', 10), + use_demonstration=True, + use_reasoning=True, + reward_threshold=self.config.get('reward_threshold', 0.8) # Added this line + ) + + # Visualization configuration + self.viz_config = VisualizationConfig( + show_ui_elements=True, + show_cursor=True, + show_actions=True, + save_frames=True, + real_time_display=self.config.get('show_visualization', True) + ) + + # Validation configuration + self.validation_config = ValidationConfig.from_dict( + self.config.get('validation', {}) + ) + + def run(self): + """Execute the workflow automation process.""" + self.logger.info("Starting workflow automation") + + try: + # Create configurations + self._create_configs() + + # Initialize simulator + simulator = WorkflowSimulator( + video_path=self.video_path, + anthropic_api_key=self.config.get('anthropic_api_key'), + agent_config=self.agent_config, + viz_config=self.viz_config, + validation_config=self.validation_config, + output_dir=str(self.output_dir) + ) + + # Train + self.logger.info("Starting training") + simulator.train_agents() + + # Evaluate + self.logger.info("Starting evaluation") + results = simulator.evaluate_agents() + + # Save results + self._save_results(results) + + self.logger.info("Workflow automation completed successfully") + return results + + except Exception as e: + self.logger.error(f"Workflow automation failed: {e}", exc_info=True) + raise + + def _save_results(self, results: Dict): + """Save evaluation results to file.""" + results_path = self.output_dir / "results.json" + try: + with open(results_path, 'w') as f: + json.dump(results, f, indent=2) + self.logger.info(f"Results saved to {results_path}") + except Exception as e: + self.logger.error(f"Failed to save results: {e}") + raise + +def parse_arguments(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Workflow Automation System", + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--video", + required=True, + help="Path to workflow video" + ) + + parser.add_argument( + "--config", + default="config.json", + help="Path to config file" + ) + + parser.add_argument( + "--output", + default="output", + help="Output directory for results and artifacts" + ) + + parser.add_argument( + "--logs", + default="logs", + help="Directory for log files" + ) + + return parser.parse_args() + +def main(): + """Main entry point for the workflow automation system.""" + args = parse_arguments() + automation = WorkflowAutomation(args) + automation.run() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/isopro/workflow_simulation/requirements.txt b/isopro/workflow_simulation/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..6f85c8a6c826e4cf4d799b0dba19daf7a6322fc7 --- /dev/null +++ b/isopro/workflow_simulation/requirements.txt @@ -0,0 +1,35 @@ +# Core Dependencies +numpy>=1.21.0 +pandas>=1.3.0 +opencv-python>=4.5.0 +torch>=2.0.0 +gymnasium>=0.29.0 + +# Computer Vision and Detection +ultralytics>=8.0.0 # For YOLO +pillow>=9.0.0 # Image processing + +# Machine Learning +stable-baselines3>=2.0.0 +scikit-learn>=1.0.0 + +# UI Analysis +isozero>=0.1.0 # For reasoning capabilities +anthropic>=0.3.0 # For Claude API + +# Visualization +matplotlib>=3.5.0 +seaborn>=0.11.0 +tqdm>=4.65.0 # Progress bars + +# Logging and Configuration +python-dotenv>=0.19.0 +pyyaml>=6.0.0 +rich>=13.0.0 # Enhanced terminal output + +# Testing and Development (Optional) +pytest>=7.0.0 +pytest-cov>=4.0.0 +black>=22.0.0 # Code formatting +isort>=5.10.0 # Import sorting +flake8>=4.0.0 # Linting \ No newline at end of file diff --git a/isopro/workflow_simulation/workflow_agent.py b/isopro/workflow_simulation/workflow_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..fe2e66991cc37250006a2dd08c146f058941ee06 --- /dev/null +++ b/isopro/workflow_simulation/workflow_agent.py @@ -0,0 +1,441 @@ +""" +Workflow Agent Module + +This module implements the WorkflowAgent class for learning and replicating UI workflows +using reinforcement learning and IsoZero reasoning capabilities. +""" + +import torch +import torch.nn as nn +from stable_baselines3 import PPO +from stable_baselines3.common.callbacks import BaseCallback +from ..agents.ai_agent import AI_Agent +import numpy as np +from typing import Dict, Any, List, Tuple, Optional +import logging +from tqdm import tqdm +from dataclasses import dataclass +import gymnasium as gym +from isozero import ClaudeAgent, QuestionAnswerer + +logger = logging.getLogger(__name__) + +@dataclass +class AgentConfig: + """Configuration for WorkflowAgent.""" + learning_rate: float = 3e-4 + n_steps: int = 2048 + batch_size: int = 64 + n_epochs: int = 10 + gamma: float = 0.99 + gae_lambda: float = 0.95 + clip_range: float = 0.2 + max_grad_norm: float = 0.5 + vf_coef: float = 0.5 + ent_coef: float = 0.01 + pretrain_epochs: int = 10 + use_demonstration: bool = True + use_reasoning: bool = True + +class WorkflowPolicy(nn.Module): + """Custom policy network for learning UI workflows.""" + + def __init__(self, observation_space: gym.Space, action_space: gym.Space): + super().__init__() + + # Input dimensions + self.ui_element_dim = 128 + self.cursor_dim = 32 + self.context_dim = 64 + + # UI element encoder + self.ui_encoder = nn.Sequential( + nn.Linear(4, 64), # bbox features + nn.ReLU(), + nn.Linear(64, self.ui_element_dim), + nn.ReLU() + ) + + # Cursor position encoder + self.cursor_encoder = nn.Sequential( + nn.Linear(2, self.cursor_dim), + nn.ReLU() + ) + + # Context encoder for workflow state + self.context_encoder = nn.Sequential( + nn.Linear(observation_space.shape[0] - 6, self.context_dim), # remaining features + nn.ReLU() + ) + + # Attention mechanism for UI elements + self.attention = nn.MultiheadAttention( + embed_dim=self.ui_element_dim, + num_heads=4, + batch_first=True + ) + + # Action predictor + combined_dim = self.ui_element_dim + self.cursor_dim + self.context_dim + self.action_predictor = nn.Sequential( + nn.Linear(combined_dim, 256), + nn.ReLU(), + nn.Linear(256, 256), + nn.ReLU(), + nn.Linear(256, action_space.shape[0]) + ) + + # Value function + self.value_predictor = nn.Sequential( + nn.Linear(combined_dim, 256), + nn.ReLU(), + nn.Linear(256, 1) + ) + + def forward( + self, + ui_elements: torch.Tensor, + cursor_pos: torch.Tensor, + context: torch.Tensor + ) -> Tuple[torch.Tensor, torch.Tensor]: + """Forward pass through the policy network.""" + # Encode UI elements + ui_features = self.ui_encoder(ui_elements) + + # Apply attention to UI elements + ui_features, _ = self.attention(ui_features, ui_features, ui_features) + ui_features = torch.max(ui_features, dim=1)[0] # max pooling + + # Encode cursor and context + cursor_features = self.cursor_encoder(cursor_pos) + context_features = self.context_encoder(context) + + # Combine features + combined = torch.cat([ui_features, cursor_features, context_features], dim=1) + + # Predict actions and value + action_logits = self.action_predictor(combined) + value = self.value_predictor(combined) + + return action_logits, value + +class WorkflowProgressCallback(BaseCallback): + """Callback for tracking training progress.""" + + def __init__(self, verbose: int = 0): + super().__init__(verbose) + self.pbar = None + + def _on_training_start(self): + """Initialize progress bar.""" + self.pbar = tqdm(total=self.locals['total_timesteps'], + desc="Training workflow agent") + + def _on_step(self) -> bool: + """Update progress bar.""" + if self.pbar: + self.pbar.update(1) + return True + + def _on_training_end(self): + """Close progress bar.""" + if self.pbar: + self.pbar.close() + self.pbar = None + +class WorkflowAgent(AI_Agent): + """Agent for learning and replicating UI workflows.""" + + def __init__( + self, + name: str, + env: Any, + config: AgentConfig = None, + anthropic_api_key: Optional[str] = None + ): + """Initialize the workflow agent. + + Args: + name: Agent identifier + env: The workflow environment + config: Agent configuration + anthropic_api_key: Optional API key for Claude agent + """ + super().__init__(name) + self.env = env + self.config = config or AgentConfig() + + # Initialize policy + policy_kwargs = { + 'policy_class': WorkflowPolicy, + 'features_extractor_class': None + } + + self.model = PPO( + "MlpPolicy", + env, + learning_rate=self.config.learning_rate, + n_steps=self.config.n_steps, + batch_size=self.config.batch_size, + n_epochs=self.config.n_epochs, + gamma=self.config.gamma, + gae_lambda=self.config.gae_lambda, + clip_range=self.config.clip_range, + max_grad_norm=self.config.max_grad_norm, + vf_coef=self.config.vf_coef, + ent_coef=self.config.ent_coef, + policy_kwargs=policy_kwargs, + verbose=1 + ) + + # Initialize IsoZero components if reasoning is enabled + if self.config.use_reasoning and anthropic_api_key: + self.claude_agent = ClaudeAgent(api_key=anthropic_api_key) + self.qa_system = QuestionAnswerer(self.claude_agent) + logger.info("Initialized IsoZero reasoning system") + + # Storage for demonstrations and reasoning + self.demonstration_data = [] + self.reasoning_cache = {} + + logger.info(f"Initialized WorkflowAgent: {name}") + + def store_demonstration( + self, + states: List[Dict[str, Any]], + actions: List[Dict[str, Any]] + ): + """Store demonstration data for imitation learning.""" + self.demonstration_data.extend(zip(states, actions)) + logger.info(f"Stored {len(states)} demonstration steps") + + def train( + self, + total_timesteps: int = 10000, + callback: Optional[BaseCallback] = None + ): + """Train the agent using both imitation learning and RL. + + Args: + total_timesteps: Number of environment steps for training + callback: Optional callback for tracking progress + """ + # First, pretrain on demonstrations if available + if self.config.use_demonstration and self.demonstration_data: + self._pretrain_on_demonstrations() + + # Setup training progress callback + callbacks = [WorkflowProgressCallback()] + if callback: + callbacks.append(callback) + + # Train with RL + logger.info(f"Training {self.name} with RL for {total_timesteps} steps") + self.model.learn( + total_timesteps=total_timesteps, + callback=callbacks + ) + + def _pretrain_on_demonstrations(self): + """Pretrain the policy using demonstration data.""" + logger.info("Starting pretraining on demonstrations") + + pbar = tqdm( + total=self.config.pretrain_epochs, + desc="Pretraining on demonstrations" + ) + + for epoch in range(self.config.pretrain_epochs): + total_loss = 0 + + for state, action in self.demonstration_data: + # Get policy prediction + obs_tensor = self._preprocess_observation(state) + action_tensor = torch.FloatTensor(action) + + policy_output = self.model.policy(obs_tensor) + + # Calculate loss + action_loss = self._calculate_action_loss( + policy_output.actions, + action_tensor + ) + + total_loss += action_loss.item() + + avg_loss = total_loss / len(self.demonstration_data) + logger.debug(f"Pretrain Epoch {epoch + 1}, Avg Loss: {avg_loss:.4f}") + pbar.update(1) + + pbar.close() + logger.info("Completed pretraining on demonstrations") + + def _preprocess_observation( + self, + observation: Dict[str, Any] + ) -> torch.Tensor: + """Preprocess observation for policy network.""" + # Extract and normalize UI elements + ui_elements = torch.FloatTensor([ + elem['bbox'] for elem in observation['ui_elements'] + ]) + + # Extract and normalize cursor position + cursor_pos = torch.FloatTensor(observation['cursor_pos']) + + # Extract additional context features + context = torch.FloatTensor([ + observation['sequence_progress'] + ]) + + return { + 'ui_elements': ui_elements, + 'cursor_pos': cursor_pos, + 'context': context + } + + def _calculate_action_loss( + self, + predicted_action: torch.Tensor, + target_action: torch.Tensor + ) -> torch.Tensor: + """Calculate loss between predicted and target actions.""" + # MSE loss for continuous actions + return torch.nn.functional.mse_loss(predicted_action, target_action) + + def predict( + self, + observation: Dict[str, Any], + state: Optional[Any] = None, + deterministic: bool = False + ) -> Tuple[np.ndarray, Optional[Any]]: + """Predict next action based on observation.""" + # Apply reasoning if enabled + if self.config.use_reasoning: + observation = self._enhance_observation_with_reasoning(observation) + + # Get action from policy + action, state = self.model.predict( + observation, + state=state, + deterministic=deterministic + ) + + return action, state + + def _enhance_observation_with_reasoning( + self, + observation: Dict[str, Any] + ) -> Dict[str, Any]: + """Enhance observation with IsoZero reasoning for behavior replication.""" + if not hasattr(self, 'qa_system'): + return observation + + # Create cache key from observation + cache_key = str(observation) + + if cache_key in self.reasoning_cache: + return {**observation, 'reasoning': self.reasoning_cache[cache_key]} + + # More specific questions for workflow understanding + questions = [ + # Understand UI element relationships + f"Given the UI elements {observation['ui_elements']}, what is their functional relationship to each other?", + + # Understand previous action context + f"Based on the last action '{observation['last_action']}', what was likely the user's intention?", + + # Identify action prerequisites + "What conditions need to be true before performing the next action?", + + # Predict next logical step + f"Given the workflow progress is {observation['sequence_progress']}, what would be the next logical step in this workflow?", + + # Understand element states + "Which UI elements are interactive in the current state and what interactions are they designed for?", + + # Validate action sequence + "Does this sequence of actions align with common UI patterns and best practices?", + + # Identify potential errors + "Are there any potential errors or invalid states that should be avoided in the next action?" + ] + + # Create detailed context document for reasoning + context = f""" + Current UI State Analysis: + 1. Active UI Elements: {observation['ui_elements']} + 2. Current Cursor Position: {observation['cursor_pos']} + 3. Most Recent Action: {observation['last_action']} + 4. Workflow Progress: {observation['sequence_progress']} + 5. Element States: {self._get_element_states(observation)} + 6. Action History: {self._get_action_history()} + 7. Known Workflow Patterns: {self._get_workflow_patterns()} + """ + + # Get reasoning from IsoZero + doc_pairs = [(q, context) for q in questions] + reasoning = self.qa_system.answer_questions(doc_pairs) + + # Extract actionable insights from reasoning + enhanced_reasoning = { + 'element_relationships': reasoning[questions[0]]['solution'], + 'user_intention': reasoning[questions[1]]['solution'], + 'prerequisites': reasoning[questions[2]]['solution'], + 'next_step': reasoning[questions[3]]['solution'], + 'interactive_elements': reasoning[questions[4]]['solution'], + 'pattern_validation': reasoning[questions[5]]['solution'], + 'potential_errors': reasoning[questions[6]]['solution'], + 'reasoning_steps': [ + step for q in questions + for step in reasoning[q]['reasoning'] + ] + } + + # Cache the enhanced reasoning + self.reasoning_cache[cache_key] = enhanced_reasoning + + # Add reasoning to observation + enhanced_obs = { + **observation, + 'reasoning': enhanced_reasoning, + 'suggested_action': self._extract_suggested_action(enhanced_reasoning) + } + + return enhanced_obs + + def _get_element_states(self, observation: Dict[str, Any]) -> Dict[str, Any]: + """Get detailed states of UI elements.""" + element_states = {} + for element in observation['ui_elements']: + element_states[element['id']] = { + 'type': element['type'], + 'interactive': element.get('interactive', True), + 'state': element.get('state', 'default'), + 'enabled': element.get('enabled', True), + 'visible': element.get('visible', True) + } + return element_states + + def _get_action_history(self) -> List[str]: + """Get recent action history for context.""" + return self.env.get_recent_actions() if hasattr(self.env, 'get_recent_actions') else [] + + def _get_workflow_patterns(self) -> List[str]: + """Get known workflow patterns for this type of interface.""" + return [ + "Form submission patterns", + "Navigation patterns", + "Selection patterns", + "Confirmation patterns", + "Error handling patterns" + ] + + def _extract_suggested_action(self, reasoning: Dict[str, Any]) -> Dict[str, Any]: + """Extract concrete action suggestion from reasoning.""" + next_step = reasoning['next_step'] + return { + 'action_type': self._parse_action_type(next_step), + 'target_element': self._parse_target_element(next_step), + 'confidence': self._calculate_action_confidence(reasoning) + } \ No newline at end of file diff --git a/isopro/workflow_simulation/workflow_environment.py b/isopro/workflow_simulation/workflow_environment.py new file mode 100644 index 0000000000000000000000000000000000000000..2bbfec1778db351591fd1b8c1c4c5667224d388f --- /dev/null +++ b/isopro/workflow_simulation/workflow_environment.py @@ -0,0 +1,365 @@ +""" +WorkflowEnvironment Module + +Provides a gymnasium-compatible environment for learning and replicating UI workflows +from video demonstrations. Handles video analysis, UI element detection, and state management. +""" + +import gymnasium as gym +from gymnasium import spaces +import numpy as np +import cv2 +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, List, Optional, Tuple +import json +import logging +from datetime import datetime +import os + +logger = logging.getLogger(__name__) + +@dataclass +class UIElement: + """Represents a detected UI element with its properties.""" + id: str + type: str + bbox: List[float] + confidence: float + state: str = 'default' + enabled: bool = True + visible: bool = True + +@dataclass +class WorkflowState: + """Represents the complete state of a workflow.""" + ui_elements: List[UIElement] + cursor_position: Tuple[float, float] + timestamp: float + last_action: Optional[str] = None + last_element_interacted: Optional[UIElement] = None + sequence_position: int = 0 + +class UIElementDetector: + """Handles detection of UI elements in video frames.""" + + def __init__(self, model_path: Optional[str] = None): + """Initialize the detector with optional custom model.""" + self.model_path = model_path + # Placeholder for actual model initialization + + def detect_elements(self, frame: np.ndarray) -> List[UIElement]: + """Detect UI elements in a single frame.""" + # Placeholder implementation - replace with actual detection logic + height, width = frame.shape[:2] + + # Example element for testing + element = UIElement( + id="test_element", + type="button", + bbox=[0, 0, width/4, height/4], + confidence=0.95 + ) + + return [element] + +class MotionDetector: + """Detects cursor motion between frames.""" + + def __init__(self, min_area: int = 500): + self.min_area = min_area + self.prev_frame = None + + def detect_motion(self, frame: np.ndarray) -> Optional[Tuple[float, float]]: + """Detect motion and return cursor position if found.""" + gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + gray = cv2.GaussianBlur(gray, (21, 21), 0) + + if self.prev_frame is None: + self.prev_frame = gray + return None + + # Calculate frame difference + frame_diff = cv2.absdiff(self.prev_frame, gray) + thresh = cv2.threshold(frame_diff, 25, 255, cv2.THRESH_BINARY)[1] + thresh = cv2.dilate(thresh, None, iterations=2) + + # Find motion areas + contours, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, + cv2.CHAIN_APPROX_SIMPLE) + + # Get largest motion area + if contours: + largest_contour = max(contours, key=cv2.contourArea) + if cv2.contourArea(largest_contour) > self.min_area: + M = cv2.moments(largest_contour) + if M["m00"] > 0: + cx = M["m10"] / M["m00"] + cy = M["m01"] / M["m00"] + self.prev_frame = gray + return (cx, cy) + + self.prev_frame = gray + return None + +class WorkflowEnvironment(gym.Env): + """Environment for learning and replicating UI workflows.""" + + def __init__( + self, + video_path: str, + output_dir: str = "output", + anthropic_api_key: Optional[str] = None, + model_path: Optional[str] = None, + viz_enabled: bool = False + ): + super().__init__() + + # Initialize paths - remove quotes if present and resolve path + self.video_path = Path(video_path.strip('"')).resolve() + self.output_dir = Path(output_dir) + self.output_dir.mkdir(parents=True, exist_ok=True) + + # Check if video file exists + if not self.video_path.exists(): + raise ValueError( + f"Video file not found at: {self.video_path}\n" + f"Current working directory: {os.getcwd()}" + ) + + # Initialize video capture + self.cap = cv2.VideoCapture(str(self.video_path)) + if not self.cap.isOpened(): + raise ValueError(f"Failed to open video file: {self.video_path}") + + # Initialize components + self.ui_detector = UIElementDetector(model_path) + self.motion_detector = MotionDetector() + self.anthropic_api_key = anthropic_api_key + self.viz_enabled = viz_enabled + + # Setup spaces + self._setup_spaces() + + # Initialize state + self.current_frame = None + self.current_step = 0 + self.total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) + + logger.info(f"Initialized WorkflowEnvironment with video: {self.video_path}") + + def _detect_ui_elements(self) -> List[Dict]: + """Detect UI elements in current frame.""" + if self.current_frame is None: + return [] + + elements = self.ui_detector.detect_elements(self.current_frame) + return [ + { + 'type': elem.type, + 'bbox': elem.bbox, + 'state': elem.state + } + for elem in elements + ] + + def _create_initial_state(self) -> 'WorkflowState': + """Create initial workflow state.""" + ui_elements = self.ui_detector.detect_elements(self.current_frame) + height, width = self.current_frame.shape[:2] + + return WorkflowState( + ui_elements=ui_elements, + cursor_position=(width/2, height/2), + timestamp=0.0, + sequence_position=0 + ) + + def _setup_spaces(self): + """Setup action and observation spaces.""" + self.action_space = spaces.Dict({ + 'action_type': spaces.Discrete(4), # click, double_click, drag, type + 'target_element': spaces.Box( + low=0, + high=1, + shape=(4,), + dtype=np.float32 + ), + 'parameters': spaces.Dict({ + 'text_input': spaces.Text(max_length=100), + 'drag_end': spaces.Box( + low=0, + high=1, + shape=(2,), + dtype=np.float32 + ) + }) + }) + + self.observation_space = spaces.Dict({ + 'ui_elements': spaces.Sequence( + spaces.Dict({ + 'type': spaces.Text(max_length=20), + 'bbox': spaces.Box(low=0, high=1, shape=(4,)), + 'state': spaces.Text(max_length=20) + }) + ), + 'cursor_pos': spaces.Box(low=0, high=1, shape=(2,)), + 'last_action': spaces.Text(max_length=50), + 'progress': spaces.Box(low=0, high=1, shape=(1,)) + }) + + def reset( + self, + seed: Optional[int] = None, + options: Optional[Dict] = None + ) -> Tuple[Dict, Dict]: + """Reset environment to initial state.""" + super().reset(seed=seed) + + # Reset video capture + self.cap.set(cv2.CAP_PROP_POS_FRAMES, 0) + + # Get first frame + success, self.current_frame = self.cap.read() + if not success: + raise RuntimeError("Failed to read first frame from video") + + # Initialize state + self.current_state = self._create_initial_state() + + return self._get_observation(), {} + + def step(self, action: Dict) -> Tuple[Dict, float, bool, bool, Dict]: + """Execute action and return next state.""" + if self.current_frame is None: + raise RuntimeError("Environment needs to be reset") + + # Read next frame + success, self.current_frame = self.cap.read() + if not success: + return self._get_observation(), 0.0, True, False, {} + + # Process action and update state + reward = self._process_action(action) + self.current_state = self._update_state(action) + + # Check if episode is done + frame_position = int(self.cap.get(cv2.CAP_PROP_POS_FRAMES)) + done = frame_position >= self.total_frames + + return self._get_observation(), reward, done, False, self._get_info() + + def render(self): + """Render current environment state.""" + if not self.viz_enabled or self.current_frame is None: + return + + frame = self.current_frame.copy() + + # Draw UI elements + for element in self.current_state.ui_elements: + x1, y1, x2, y2 = map(int, element.bbox) + cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) + cv2.putText(frame, element.type, (x1, y1-5), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) + + # Draw cursor + cx, cy = map(int, self.current_state.cursor_position) + cv2.circle(frame, (cx, cy), 5, (255, 0, 0), -1) + + cv2.imshow('WorkflowEnvironment', frame) + cv2.waitKey(1) + + def close(self): + """Clean up resources.""" + if self.cap is not None: + self.cap.release() + cv2.destroyAllWindows() + + def _update_state(self, action: Dict) -> WorkflowState: + """Update workflow state based on action and new frame.""" + # Detect UI elements in new frame + ui_elements = self.ui_detector.detect_elements(self.current_frame) + + # Detect cursor motion + cursor_pos = self.motion_detector.detect_motion(self.current_frame) + if cursor_pos is None: + cursor_pos = self.current_state.cursor_position + + # Find interacted element + target_element = None + if 'target_element' in action: + target_bbox = action['target_element'] + for element in ui_elements: + if self._check_overlap(target_bbox, element.bbox): + target_element = element + break + + return WorkflowState( + ui_elements=ui_elements, + cursor_position=cursor_pos, + timestamp=self.cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0, + last_action=self._get_action_type(action), + last_element_interacted=target_element, + sequence_position=int(self.cap.get(cv2.CAP_PROP_POS_FRAMES)) + ) + + def _get_observation(self) -> Dict: + """Get current observation.""" + if self.current_state is None: + return self._get_empty_observation() + + return { + 'ui_elements': [ + { + 'type': elem.type, + 'bbox': elem.bbox, + 'state': elem.state + } + for elem in self.current_state.ui_elements + ], + 'cursor_pos': self.current_state.cursor_position, + 'last_action': self.current_state.last_action or '', + 'progress': [self.current_state.sequence_position / self.total_frames] + } + + def _get_empty_observation(self) -> Dict: + """Return empty observation with correct structure.""" + return { + 'ui_elements': [], + 'cursor_pos': (0.0, 0.0), + 'last_action': '', + 'progress': [0.0] + } + + def _get_info(self) -> Dict: + """Get additional information about current state.""" + return { + 'frame_position': self.current_state.sequence_position, + 'timestamp': self.current_state.timestamp, + 'total_frames': self.total_frames + } + + def _process_action(self, action: Dict) -> float: + """Process action and calculate reward.""" + # Simple reward implementation - can be enhanced based on needs + if self.current_state.last_element_interacted: + return 1.0 + return 0.0 + + @staticmethod + def _check_overlap(bbox1: List[float], bbox2: List[float]) -> bool: + """Check if two bounding boxes overlap.""" + x1_min, y1_min, x1_max, y1_max = bbox1 + x2_min, y2_min, x2_max, y2_max = bbox2 + + return not (x1_max < x2_min or x1_min > x2_max or + y1_max < y2_min or y1_min > y2_max) + + @staticmethod + def _get_action_type(action: Dict) -> str: + """Convert action type from index to string.""" + action_types = ['click', 'double_click', 'drag', 'type'] + action_idx = action.get('action_type', 0) + return action_types[action_idx] \ No newline at end of file diff --git a/isopro/workflow_simulation/workflow_simulator.py b/isopro/workflow_simulation/workflow_simulator.py new file mode 100644 index 0000000000000000000000000000000000000000..d159fb437852437e757f3f99817156218defd512 --- /dev/null +++ b/isopro/workflow_simulation/workflow_simulator.py @@ -0,0 +1,276 @@ +""" +WorkflowSimulator Module + +Provides a simulator for workflow automation that handles training, evaluation, +and interaction between agents and the workflow environment. +""" + +import logging +from pathlib import Path +from typing import Dict, Any, Tuple, List, Optional +import numpy as np +import json +from datetime import datetime + +from gymnasium import Env +from .workflow_agent import AgentConfig +from .workflow_visualizer import VisualizationConfig +from .workflow_validator import ValidationConfig +from .workflow_environment import WorkflowEnvironment +from .agent_config import AgentConfig + +logger = logging.getLogger(__name__) + +class EpisodeMetrics: + """Tracks and analyzes episode metrics.""" + + def __init__(self): + self.rewards = [] + self.lengths = [] + self.success_rate = 0.0 + + def add_episode(self, rewards: List[float], length: int, success: bool = False): + """Add episode metrics.""" + self.rewards.append(sum(rewards)) + self.lengths.append(length) + self.success_rate = (self.success_rate * len(self.rewards) + float(success)) / (len(self.rewards) + 1) + + def get_summary(self) -> Dict[str, float]: + """Get summary statistics of episodes.""" + return { + 'mean_reward': float(np.mean(self.rewards)), + 'std_reward': float(np.std(self.rewards)), + 'mean_length': float(np.mean(self.lengths)), + 'success_rate': self.success_rate, + 'total_episodes': len(self.rewards) + } + +class WorkflowSimulator(Env): + """Simulator for workflow automation training and evaluation.""" + + def __init__( + self, + video_path: str, + agent_config: AgentConfig, + viz_config: VisualizationConfig, + validation_config: ValidationConfig, + output_dir: str, + anthropic_api_key: Optional[str] = None, + max_episodes: int = 1000, + eval_episodes: int = 10 + ): + super().__init__() + + # Store configurations + self.agent_config = agent_config + self.viz_config = viz_config + self.validation_config = validation_config + self.output_dir = Path(output_dir) + self.max_episodes = max_episodes + self.eval_episodes = eval_episodes + + # Create output directory + self.output_dir.mkdir(parents=True, exist_ok=True) + + # Initialize environment + self.env = WorkflowEnvironment( + video_path=video_path, + output_dir=str(self.output_dir / "env"), + anthropic_api_key=anthropic_api_key, + viz_enabled=viz_config.real_time_display + ) + + # Set spaces to match environment + self.action_space = self.env.action_space + self.observation_space = self.env.observation_space + + # Initialize tracking + self.current_episode = 0 + self.training_metrics = EpisodeMetrics() + self.eval_metrics = EpisodeMetrics() + self.best_reward = float('-inf') + + logger.info(f"Initialized WorkflowSimulator with output dir: {output_dir}") + + def reset( + self, + seed: Optional[int] = None, + options: Optional[Dict] = None + ) -> Tuple[Dict, Dict]: + """Reset simulator and environment.""" + self.current_episode += 1 + observation, info = self.env.reset(seed=seed) + return self.convert_to_agent_input(observation), info + + def step(self, action: Dict) -> Tuple[Dict, float, bool, bool, Dict]: + """Execute action in environment.""" + env_action = self.convert_from_agent_output(action) + observation, reward, terminated, truncated, info = self.env.step(env_action) + + agent_obs = self.convert_to_agent_input(observation) + return agent_obs, reward, terminated, truncated, info + + def render(self): + """Render current state.""" + return self.env.render() + + def close(self): + """Clean up resources.""" + if self.env is not None: + self.env.close() + + def convert_to_agent_input(self, observation: Dict) -> Dict: + """Convert environment observation to agent format.""" + return { + 'ui_elements': observation.get('ui_elements', []), + 'cursor_position': observation.get('cursor_pos', (0.0, 0.0)), + 'last_action': observation.get('last_action', ''), + 'progress': float(observation.get('progress', [0.0])[0]) + } + + def convert_from_agent_output(self, action: Dict) -> Dict: + """Convert agent action to environment format.""" + return { + 'action_type': int(action.get('action_type', 0)), + 'target_element': np.array(action.get('target_element', [0, 0, 0, 0])), + 'parameters': { + 'text_input': str(action.get('text_input', '')), + 'drag_end': np.array(action.get('drag_end', [0, 0])) + } + } + + def train_agents(self) -> Dict[str, Any]: + """Train agents on workflow demonstration.""" + logger.info("Starting agent training") + + episode_rewards = [] + best_episode_reward = float('-inf') + + for episode in range(self.max_episodes): + # Run training episode + episode_metrics = self._run_episode(training=True) + episode_rewards.extend(episode_metrics['rewards']) + + # Track best performance + episode_reward = sum(episode_metrics['rewards']) + if episode_reward > best_episode_reward: + best_episode_reward = episode_reward + self._save_checkpoint('best_model') + + # Log progress + if (episode + 1) % 10 == 0: + self._log_training_progress(episode + 1, episode_rewards[-10:]) + + # Early stopping check + if self._check_early_stopping(episode_rewards): + logger.info("Early stopping criteria met") + break + + # Save final model and results + self._save_checkpoint('final_model') + return self.training_metrics.get_summary() + + def evaluate_agents(self) -> Dict[str, Any]: + """Evaluate trained agents.""" + logger.info("Starting agent evaluation") + + for episode in range(self.eval_episodes): + # Run evaluation episode + episode_metrics = self._run_episode(training=False) + self.eval_metrics.add_episode( + rewards=episode_metrics['rewards'], + length=episode_metrics['length'], + success=episode_metrics['success'] + ) + + logger.info(f"Evaluation episode {episode + 1}/{self.eval_episodes} completed") + + # Save evaluation results + results = self.eval_metrics.get_summary() + self._save_results(results) + + return results + + def _run_episode(self, training: bool = True) -> Dict[str, Any]: + """Run a single episode and return metrics.""" + observation, _ = self.reset() + episode_rewards = [] + step_count = 0 + done = False + + while not done: + # Get action (random for now - replace with actual policy) + action = self.action_space.sample() + + # Take step in environment + observation, reward, terminated, truncated, _ = self.step(action) + done = terminated or truncated + + episode_rewards.append(reward) + step_count += 1 + + metrics = { + 'rewards': episode_rewards, + 'length': step_count, + 'success': sum(episode_rewards) > self.agent_config.reward_threshold + } + + if training: + self.training_metrics.add_episode(**metrics) + + return metrics + + def _check_early_stopping(self, rewards: List[float], window: int = 100) -> bool: + """Check if training should stop early.""" + if len(rewards) < window: + return False + + recent_mean = np.mean(rewards[-window:]) + return recent_mean >= self.agent_config.reward_threshold + + def _log_training_progress(self, episode: int, recent_rewards: List[float]): + """Log training progress.""" + mean_reward = np.mean(recent_rewards) + logger.info( + f"Episode {episode}/{self.max_episodes}, " + f"Average Reward: {mean_reward:.2f}" + ) + + def _save_checkpoint(self, name: str): + """Save training checkpoint.""" + checkpoint_dir = self.output_dir / "checkpoints" + checkpoint_dir.mkdir(exist_ok=True) + + checkpoint = { + 'episode': self.current_episode, + 'training_metrics': self.training_metrics.get_summary(), + 'timestamp': datetime.now().isoformat() + } + + checkpoint_path = checkpoint_dir / f"{name}.json" + with open(checkpoint_path, 'w') as f: + json.dump(checkpoint, f, indent=2) + + logger.info(f"Saved checkpoint: {checkpoint_path}") + + def _save_results(self, results: Dict[str, Any]): + """Save evaluation results.""" + results_path = self.output_dir / "evaluation_results.json" + + with open(results_path, 'w') as f: + json.dump( + { + 'results': results, + 'config': { + 'max_episodes': self.max_episodes, + 'eval_episodes': self.eval_episodes, + 'agent_config': vars(self.agent_config), + 'validation_config': vars(self.validation_config) + }, + 'timestamp': datetime.now().isoformat() + }, + f, + indent=2 + ) + + logger.info(f"Saved evaluation results to {results_path}") \ No newline at end of file diff --git a/isopro/workflow_simulation/workflow_utils.py b/isopro/workflow_simulation/workflow_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..607103900680def0ea6311d5fc9a323e33890470 --- /dev/null +++ b/isopro/workflow_simulation/workflow_utils.py @@ -0,0 +1,190 @@ +""" +Workflow Utilities Module + +Provides utility functions for UI element detection and motion tracking. +""" + +import cv2 +import numpy as np +from typing import List, Dict, Any, Tuple +from dataclasses import dataclass +from ultralytics import YOLO +import logging + +logger = logging.getLogger(__name__) + +@dataclass +class UIElement: + """Represents a detected UI element.""" + type: str + bbox: List[float] # [x1, y1, x2, y2] + confidence: float + id: str = '' + is_interactive: bool = True + +class UIElementDetector: + """Detects UI elements in frames using YOLO.""" + + def __init__(self, model_path: str = 'yolov8x.pt'): + """Initialize the UI element detector.""" + self.model = YOLO(model_path) + + # Common UI element classes + self.ui_classes = [ + 'button', + 'text', + 'checkbox', + 'dropdown', + 'input', + 'icon', + 'menu', + 'window', + 'link' + ] + + logger.info("Initialized UI element detector") + + def detect_elements(self, frame: np.ndarray) -> List[UIElement]: + """Detect UI elements in a frame.""" + results = self.model(frame) + detected_elements = [] + + for result in results: + boxes = result.boxes + for i, box in enumerate(boxes): + if box.conf[0].item() > 0.5: # Confidence threshold + element = UIElement( + type=self.ui_classes[int(box.cls[0].item())], + bbox=box.xyxy[0].tolist(), + confidence=box.conf[0].item(), + id=f"{self.ui_classes[int(box.cls[0].item())]}-{i}" + ) + detected_elements.append(element) + + return detected_elements + +class MotionDetector: + """Detects motion between frames.""" + + def __init__(self, min_area: int = 500): + """Initialize motion detector.""" + self.min_area = min_area + self.prev_frame = None + + def detect_motion(self, frame: np.ndarray) -> List[Dict[str, Any]]: + """Detect motion in frame.""" + # Convert frame to grayscale + gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + gray = cv2.GaussianBlur(gray, (21, 21), 0) + + # Initialize previous frame if needed + if self.prev_frame is None: + self.prev_frame = gray + return [] + + # Calculate frame difference + frame_diff = cv2.absdiff(self.prev_frame, gray) + thresh = cv2.threshold(frame_diff, 25, 255, cv2.THRESH_BINARY)[1] + thresh = cv2.dilate(thresh, None, iterations=2) + + # Find contours + contours, _ = cv2.findContours( + thresh.copy(), + cv2.RETR_EXTERNAL, + cv2.CHAIN_APPROX_SIMPLE + ) + + # Process motion areas + motion_areas = [] + for contour in contours: + if cv2.contourArea(contour) > self.min_area: + x, y, w, h = cv2.boundingRect(contour) + motion_areas.append({ + 'bbox': [x, y, x + w, y + h], + 'center': (x + w//2, y + h//2) + }) + + self.prev_frame = gray + return motion_areas + +def analyze_interaction( + ui_elements: List[UIElement], + motion_areas: List[Dict[str, Any]] +) -> Dict[str, Any]: + """Analyze interaction between motion and UI elements.""" + if not motion_areas or not ui_elements: + return None + + for motion in motion_areas: + motion_bbox = motion['bbox'] + + for element in ui_elements: + if check_overlap(motion_bbox, element.bbox): + # Determine interaction type + action_type = classify_interaction(motion_bbox) + + return { + 'action_type': action_type, + 'element': element, + 'motion_area': motion + } + + return None + +def check_overlap(bbox1: List[float], bbox2: List[float]) -> bool: + """Check if two bounding boxes overlap.""" + x1_min, y1_min, x1_max, y1_max = bbox1 + x2_min, y2_min, x2_max, y2_max = bbox2 + + return not ( + x1_max < x2_min or + x1_min > x2_max or + y1_max < y2_min or + y1_min > y2_max + ) + +def classify_interaction(motion_bbox: List[float]) -> str: + """Classify the type of interaction based on motion pattern.""" + width = motion_bbox[2] - motion_bbox[0] + height = motion_bbox[3] - motion_bbox[1] + + if width < 10 and height < 10: + return 'click' + elif width > 50 or height > 50: + return 'drag' + else: + return 'double_click' + +def estimate_cursor_position(motion_areas: List[Dict[str, Any]]) -> Tuple[float, float]: + """Estimate cursor position from motion areas.""" + if not motion_areas: + return (0.0, 0.0) + + # Use center of the most recent motion area + return motion_areas[-1]['center'] + +# Example usage +def process_frame(frame: np.ndarray) -> Dict[str, Any]: + """Process a single frame for UI interactions.""" + # Initialize detectors + ui_detector = UIElementDetector() + motion_detector = MotionDetector() + + # Detect UI elements + ui_elements = ui_detector.detect_elements(frame) + + # Detect motion + motion_areas = motion_detector.detect_motion(frame) + + # Analyze interactions + interaction = analyze_interaction(ui_elements, motion_areas) + + # Estimate cursor position + cursor_pos = estimate_cursor_position(motion_areas) + + return { + 'ui_elements': ui_elements, + 'motion_areas': motion_areas, + 'interaction': interaction, + 'cursor_position': cursor_pos + } \ No newline at end of file diff --git a/isopro/workflow_simulation/workflow_validator.py b/isopro/workflow_simulation/workflow_validator.py new file mode 100644 index 0000000000000000000000000000000000000000..935e692740ca388ba078d517cfd68188d4d79d58 --- /dev/null +++ b/isopro/workflow_simulation/workflow_validator.py @@ -0,0 +1,284 @@ +""" +Workflow Validator Module - +Validates workflow reasoning using IsoZero, a LLM-based reasoning system. +This module is responsible for comparing the executed workflow steps against the provided demonstrations +and calculating a validation score based on the matching action, target accuracy, timing accuracy, and reasoning score. +""" + +import logging +from pathlib import Path +from dataclasses import dataclass +from typing import List, Dict, Any, Optional +from datetime import datetime +import json +import numpy as np +from tqdm import tqdm +from isozero import ClaudeAgent, QuestionAnswerer + +logger = logging.getLogger(__name__) + +@dataclass +class ValidationConfig: + """Configuration for workflow validation.""" + sequence_matching: bool = True # Enable sequence matching validation + state_validation: bool = True # Enable state validation + save_visualizations: bool = True # Save validation visualizations + action_weight: float = 0.4 # Weight for action matching in scoring + target_weight: float = 0.3 # Weight for target accuracy in scoring + timing_weight: float = 0.1 # Weight for timing accuracy in scoring + reasoning_weight: float = 0.2 # Weight for reasoning score in scoring + timing_window: float = 0.5 # Timing window for accuracy (in seconds) + min_target_iou: float = 0.5 # Minimum IoU for target matching + + @classmethod + def from_dict(cls, config_dict: Dict[str, Any]) -> 'ValidationConfig': + """Create config from dictionary.""" + return cls( + sequence_matching=config_dict.get('sequence_matching', True), + state_validation=config_dict.get('state_validation', True), + save_visualizations=config_dict.get('save_visualizations', True), + action_weight=config_dict.get('action_weight', 0.4), + target_weight=config_dict.get('target_weight', 0.3), + timing_weight=config_dict.get('timing_weight', 0.1), + reasoning_weight=config_dict.get('reasoning_weight', 0.2), + timing_window=config_dict.get('timing_window', 0.5), + min_target_iou=config_dict.get('min_target_iou', 0.5) + ) + +@dataclass +class ValidationResult: + """Stores validation results for a workflow step.""" + step_id: int + action_match: bool + target_accuracy: float + timing_accuracy: float + reasoning_score: float + overall_score: float + messages: List[str] + +class WorkflowValidator: + """Validates executed workflows against demonstrations.""" + + def __init__( + self, + config: ValidationConfig, + output_dir: str = "output", + log_dir: str = "logs", + anthropic_api_key: Optional[str] = None + ): + self.config = config + self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + self.output_dir = Path(output_dir) + self.log_dir = Path(log_dir) + + # Create directories + self.output_dir.mkdir(parents=True, exist_ok=True) + self.log_dir.mkdir(parents=True, exist_ok=True) + + # Setup logging + self._setup_logging() + + # Initialize IsoZero for reasoning validation if key provided + if anthropic_api_key: + self.claude_agent = ClaudeAgent(api_key=anthropic_api_key) + self.qa_system = QuestionAnswerer(self.claude_agent) + logger.info("Initialized IsoZero reasoning system") + + def _setup_logging(self): + """Configure logging to file.""" + log_file = self.log_dir / f"validation_{self.timestamp}.log" + handler = logging.FileHandler(log_file) + handler.setFormatter( + logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') + ) + logger.addHandler(handler) + + def validate_workflow( + self, + demo_sequence: List[Dict[str, Any]], + exec_sequence: List[Dict[str, Any]] + ) -> List[ValidationResult]: + """Validate executed workflow against demonstration.""" + if not self.config.sequence_matching: + logger.info("Sequence matching validation disabled") + return [] + + results = [] + logger.info("Starting workflow validation") + + for step_id, (demo, exec) in enumerate(tqdm(zip(demo_sequence, exec_sequence))): + result = self._validate_step(step_id, demo, exec) + results.append(result) + + logger.info(f"Step {step_id} validation score: {result.overall_score:.2f}") + + self._save_results(results) + return results + + def _validate_step( + self, + step_id: int, + demo_step: Dict[str, Any], + exec_step: Dict[str, Any] + ) -> ValidationResult: + """Validate a single workflow step.""" + messages = [] + + # Check action matching + action_match = demo_step['action_type'] == exec_step['action_type'] + if not action_match: + messages.append(f"Action mismatch: expected {demo_step['action_type']}, " + f"got {exec_step['action_type']}") + + # Check target accuracy + target_accuracy = self._calculate_target_accuracy( + demo_step['target_element'], + exec_step['target_element'] + ) + + if target_accuracy < self.config.min_target_iou: + messages.append(f"Low target accuracy: {target_accuracy:.2f}") + + # Check timing + timing_accuracy = self._calculate_timing_accuracy( + demo_step.get('timestamp', 0), + exec_step.get('timestamp', 0) + ) + + # Get reasoning score if available + reasoning_score = 0.0 + if hasattr(self, 'qa_system'): + reasoning_score = self._validate_reasoning(demo_step, exec_step) + + # Calculate overall score + overall_score = self._calculate_score( + action_match, + target_accuracy, + timing_accuracy, + reasoning_score + ) + + return ValidationResult( + step_id=step_id, + action_match=action_match, + target_accuracy=target_accuracy, + timing_accuracy=timing_accuracy, + reasoning_score=reasoning_score, + overall_score=overall_score, + messages=messages + ) + + def _calculate_target_accuracy( + self, + demo_target: Dict[str, Any], + exec_target: Dict[str, Any] + ) -> float: + """Calculate accuracy of target element matching using IoU.""" + if not demo_target or not exec_target: + return 0.0 + + demo_bbox = demo_target.get('bbox', [0, 0, 0, 0]) + exec_bbox = exec_target.get('bbox', [0, 0, 0, 0]) + + # Calculate intersection over union + x1 = max(demo_bbox[0], exec_bbox[0]) + y1 = max(demo_bbox[1], exec_bbox[1]) + x2 = min(demo_bbox[2], exec_bbox[2]) + y2 = min(demo_bbox[3], exec_bbox[3]) + + intersection = max(0, x2 - x1) * max(0, y2 - y1) + + area1 = (demo_bbox[2] - demo_bbox[0]) * (demo_bbox[3] - demo_bbox[1]) + area2 = (exec_bbox[2] - exec_bbox[0]) * (exec_bbox[3] - exec_bbox[1]) + union = area1 + area2 - intersection + + return intersection / union if union > 0 else 0.0 + + def _calculate_timing_accuracy( + self, + demo_time: float, + exec_time: float + ) -> float: + """Calculate timing accuracy between demonstration and execution.""" + diff = abs(demo_time - exec_time) + return max(0, 1 - (diff / self.config.timing_window)) + + def _validate_reasoning( + self, + demo_step: Dict[str, Any], + exec_step: Dict[str, Any] + ) -> float: + """Validate reasoning about the action using IsoZero.""" + context = f""" + Demonstration: {demo_step['action_type']} on {demo_step['target_element']['type']} + Execution: {exec_step['action_type']} on {exec_step['target_element']['type']} + """ + + question = "On a scale of 0 to 1, how well does the executed action match the demonstrated action's intent?" + + response = self.qa_system.answer_questions([(question, context)]) + try: + return float(response[question]['solution']) + except (ValueError, KeyError): + return 0.0 + + def _calculate_score( + self, + action_match: bool, + target_accuracy: float, + timing_accuracy: float, + reasoning_score: float + ) -> float: + """Calculate overall validation score.""" + return ( + self.config.action_weight * float(action_match) + + self.config.target_weight * target_accuracy + + self.config.timing_weight * timing_accuracy + + self.config.reasoning_weight * reasoning_score + ) + + def _save_results(self, results: List[ValidationResult]): + """Save validation results and generate report.""" + if not self.config.save_visualizations: + return + + # Save detailed results + output_path = self.output_dir / f"validation_{self.timestamp}.json" + with open(output_path, 'w') as f: + json.dump( + [ + { + 'step_id': r.step_id, + 'action_match': r.action_match, + 'target_accuracy': r.target_accuracy, + 'timing_accuracy': r.timing_accuracy, + 'reasoning_score': r.reasoning_score, + 'overall_score': r.overall_score, + 'messages': r.messages + } + for r in results + ], + f, + indent=2 + ) + + # Generate summary report + report_path = self.output_dir / f"report_{self.timestamp}.txt" + with open(report_path, 'w') as f: + f.write("Workflow Validation Report\n") + f.write("=========================\n\n") + + avg_score = np.mean([r.overall_score for r in results]) + f.write(f"Overall Score: {avg_score:.2f}\n\n") + + for result in results: + f.write(f"Step {result.step_id}:\n") + f.write(f" Score: {result.overall_score:.2f}\n") + if result.messages: + f.write(" Messages:\n") + for msg in result.messages: + f.write(f" - {msg}\n") + f.write("\n") + + logger.info(f"Results saved to {output_path}") + logger.info(f"Report generated at {report_path}") \ No newline at end of file diff --git a/isopro/workflow_simulation/workflow_visualizer.py b/isopro/workflow_simulation/workflow_visualizer.py new file mode 100644 index 0000000000000000000000000000000000000000..4b3929f44b3f96fc9afb766a1404fc3247d604d4 --- /dev/null +++ b/isopro/workflow_simulation/workflow_visualizer.py @@ -0,0 +1,186 @@ +""" +Workflow Visualizer Module + +Provides visualization capabilities for workflow execution and validation results. +""" + +import matplotlib.pyplot as plt +import seaborn as sns +import cv2 +import numpy as np +from pathlib import Path +from typing import List, Dict, Any +from datetime import datetime +import logging +from dataclasses import dataclass + +logger = logging.getLogger(__name__) + +@dataclass +class VisualizationConfig: + """Configuration for visualization settings.""" + show_ui_elements: bool = True + show_cursor: bool = True + show_actions: bool = True + save_frames: bool = True + save_plots: bool = True + real_time_display: bool = False + +class WorkflowVisualizer: + """Visualizes workflow execution and validation results.""" + + def __init__(self, output_dir: str = "output", config: VisualizationConfig = None): + """Initialize workflow visualizer.""" + self.config = config or VisualizationConfig() + self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + self.output_dir = Path(output_dir) / "visualizations" / self.timestamp + self.output_dir.mkdir(parents=True, exist_ok=True) + + if self.config.real_time_display: + cv2.namedWindow("Workflow Visualization", cv2.WINDOW_NORMAL) + + logger.info("Initialized WorkflowVisualizer") + + def visualize_step( + self, + frame: np.ndarray, + ui_elements: List[Dict[str, Any]], + cursor_pos: tuple, + action: str, + step_num: int + ): + """Visualize a single workflow step.""" + viz_frame = frame.copy() + + # Draw UI elements + if self.config.show_ui_elements: + for element in ui_elements: + self._draw_ui_element(viz_frame, element) + + # Draw cursor + if self.config.show_cursor: + self._draw_cursor(viz_frame, cursor_pos) + + # Draw action label + if self.config.show_actions: + self._draw_action_label(viz_frame, action) + + # Show real-time display if enabled + if self.config.real_time_display: + cv2.imshow("Workflow Visualization", viz_frame) + cv2.waitKey(1) + + # Save frame if enabled + if self.config.save_frames: + frame_path = self.output_dir / f"step_{step_num:04d}.jpg" + cv2.imwrite(str(frame_path), viz_frame) + + def visualize_validation_results(self, validation_results: List[Dict[str, Any]]): + """Create visualization of validation results.""" + if not self.config.save_plots: + return + + plt.style.use('seaborn') + + # Create subplots for different metrics + fig, axes = plt.subplots(2, 2, figsize=(15, 10)) + + # Plot scores over time + self._plot_scores(validation_results, axes[0, 0]) + + # Plot accuracy heatmap + self._plot_accuracy_heatmap(validation_results, axes[0, 1]) + + # Plot success metrics + self._plot_success_metrics(validation_results, axes[1, 0]) + + # Plot timing analysis + self._plot_timing_analysis(validation_results, axes[1, 1]) + + plt.tight_layout() + plt.savefig(self.output_dir / "validation_results.png") + plt.close() + + def _draw_ui_element(self, frame: np.ndarray, element: Dict[str, Any]): + """Draw a UI element on the frame.""" + x1, y1, x2, y2 = [int(coord) for coord in element['bbox']] + + # Draw box + color = (0, 255, 0) if element.get('enabled', True) else (0, 0, 255) + cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2) + + # Draw label + label = f"{element['type']} ({element.get('confidence', 1.0):.2f})" + cv2.putText( + frame, + label, + (x1, y1 - 5), + cv2.FONT_HERSHEY_SIMPLEX, + 0.5, + color, + 2 + ) + + def _draw_cursor(self, frame: np.ndarray, cursor_pos: tuple): + """Draw cursor position on frame.""" + x, y = [int(coord) for coord in cursor_pos] + cv2.circle(frame, (x, y), 5, (255, 0, 0), -1) + + def _draw_action_label(self, frame: np.ndarray, action: str): + """Draw action label on frame.""" + cv2.putText( + frame, + f"Action: {action}", + (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, + 1, + (255, 255, 255), + 2 + ) + + def _plot_scores(self, results: List[Dict[str, Any]], ax: plt.Axes): + """Plot validation scores over time.""" + scores = [r['overall_score'] for r in results] + ax.plot(scores, marker='o') + ax.set_title('Validation Scores Over Time') + ax.set_xlabel('Step') + ax.set_ylabel('Score') + + def _plot_accuracy_heatmap(self, results: List[Dict[str, Any]], ax: plt.Axes): + """Plot accuracy heatmap.""" + data = np.array([ + [r['action_match'], r['target_accuracy'], r['timing_accuracy']] + for r in results + ]) + + sns.heatmap( + data.T, + ax=ax, + yticklabels=['Action', 'Target', 'Timing'], + cmap='YlOrRd' + ) + ax.set_title('Accuracy Heatmap') + + def _plot_success_metrics(self, results: List[Dict[str, Any]], ax: plt.Axes): + """Plot success metrics.""" + metrics = { + 'Action Match': np.mean([r['action_match'] for r in results]), + 'Target Accuracy': np.mean([r['target_accuracy'] for r in results]), + 'Timing Accuracy': np.mean([r['timing_accuracy'] for r in results]) + } + + ax.bar(metrics.keys(), metrics.values()) + ax.set_title('Average Success Metrics') + ax.set_ylim(0, 1) + + def _plot_timing_analysis(self, results: List[Dict[str, Any]], ax: plt.Axes): + """Plot timing analysis.""" + timing_accuracies = [r['timing_accuracy'] for r in results] + sns.histplot(timing_accuracies, ax=ax, bins=20) + ax.set_title('Timing Accuracy Distribution') + ax.set_xlabel('Accuracy') + + def close(self): + """Clean up visualization resources.""" + if self.config.real_time_display: + cv2.destroyAllWindows() \ No newline at end of file diff --git a/isopro/wrappers/__init__.py b/isopro/wrappers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bac3691110fda68f1d06aba9945e70b26124788f --- /dev/null +++ b/isopro/wrappers/__init__.py @@ -0,0 +1,7 @@ +""" +Wrapper classes for the isopro package. +""" + +from .simulation_wrapper import SimulationWrapper + +__all__ = ["SimulationWrapper"] \ No newline at end of file diff --git a/isopro/wrappers/__pycache__/__init__.cpython-38.pyc b/isopro/wrappers/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9685d987b776ef65a859a9578729ac43d1251d98 Binary files /dev/null and b/isopro/wrappers/__pycache__/__init__.cpython-38.pyc differ diff --git a/isopro/wrappers/__pycache__/simulation_wrapper.cpython-38.pyc b/isopro/wrappers/__pycache__/simulation_wrapper.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f6a41913997f4cccebb40d4024068ff1a01ae600 Binary files /dev/null and b/isopro/wrappers/__pycache__/simulation_wrapper.cpython-38.pyc differ diff --git a/isopro/wrappers/simulation_wrapper.py b/isopro/wrappers/simulation_wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..0009527a8d9133d3d79c3524d952df8d5dbe9d0f --- /dev/null +++ b/isopro/wrappers/simulation_wrapper.py @@ -0,0 +1,73 @@ +"""Simulation Wrapper for integrating agents with the simulation environment.""" +from ..base.base_wrapper import BaseWrapper + +class SimulationWrapper(BaseWrapper): + """Simulation Wrapper for integrating agents with the simulation environment.""" + + def __init__(self, agent, simulation): + """ + Initialize the SimulationWrapper. + + Args: + agent (AI_Agent): The agent to be wrapped. + simulation (SimulationEnvironment): The simulation environment. + """ + super().__init__(agent) + self.simulation = simulation + + def step(self): + """ + Execute one time step within the environment. + + Returns: + The result of the simulation step. + """ + sim_state = self.simulation.get_state() + agent_input = self.convert_to_agent_input(sim_state) + agent_output = self.agent.run(agent_input) + sim_input = self.convert_from_agent_output(agent_output) + return self.simulation.step(sim_input) + + def convert_to_agent_input(self, sim_state): + """ + Convert the simulation state to a format the agent can understand. + + Args: + sim_state (dict): The current state of the simulation. + + Returns: + dict: A dictionary containing the formatted input for the agent. + """ + text_data = sim_state.get('text_data', {}) + return { + 'text': { + 'task': text_data.get('reason', ''), + 'step': text_data.get('step', ''), + 'reasoning': text_data.get('reasoning', ''), + 'max_steps': getattr(self.simulation, 'max_steps', None) + } + } + + def convert_from_agent_output(self, agent_output): + """ + Convert agent output to simulation input format. + + Args: + agent_output (dict): The output from the agent. + + Returns: + dict: The converted input for the simulation. + """ + return agent_output + + def reset(self): + """Reset the simulation environment.""" + self.simulation.reset() + + def render(self): + """Render the simulation environment.""" + return self.simulation.render() + + def close(self): + """Close the simulation environment.""" + self.simulation.close() \ No newline at end of file