martinjosifoski
commited on
Commit
•
b475feb
1
Parent(s):
1a3765a
Propagate changes from refactoring.
Browse files- CF_CodeCriticWrongAttempt.py +1 -0
- CF_CodeCriticWrongAttempt.yaml +3 -7
- CF_CodeCriticWrongAttemptWithPlan.yaml +3 -8
- CF_CodeDebug.yaml +3 -1
- CF_CodeDebugCollab.yaml +65 -41
- CF_CodeDebugCollabWithPlan.yaml +59 -45
- CF_CodeDebugCritic.yaml +106 -6
- CF_CodeDebugCriticWithPlan.yaml +112 -15
- CF_CodeWithPlan.py +1 -0
- CF_CodeWithPlan.yaml +6 -13
- __init__.py +9 -12
CF_CodeCriticWrongAttempt.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
from flows.application_flows import OpenAIChatAtomicFlow
|
2 |
|
|
|
3 |
class CF_CodeCriticWrongAttempt(OpenAIChatAtomicFlow):
|
4 |
def __init__(self, **kwargs):
|
5 |
super().__init__(**kwargs)
|
|
|
1 |
from flows.application_flows import OpenAIChatAtomicFlow
|
2 |
|
3 |
+
|
4 |
class CF_CodeCriticWrongAttempt(OpenAIChatAtomicFlow):
|
5 |
def __init__(self, **kwargs):
|
6 |
super().__init__(**kwargs)
|
CF_CodeCriticWrongAttempt.yaml
CHANGED
@@ -70,7 +70,7 @@ init_human_message_prompt_template:
|
|
70 |
- "testing_results_summary"
|
71 |
template_format: jinja2
|
72 |
|
73 |
-
|
74 |
- "problem_description"
|
75 |
- "input_description"
|
76 |
- "output_description"
|
@@ -78,9 +78,5 @@ init_input_keys:
|
|
78 |
- "testing_results_summary"
|
79 |
- "code"
|
80 |
|
81 |
-
|
82 |
-
-
|
83 |
-
old_key2new_key:
|
84 |
-
api_output: "code_feedback"
|
85 |
-
output_keys:
|
86 |
-
- "code_feedback"
|
|
|
70 |
- "testing_results_summary"
|
71 |
template_format: jinja2
|
72 |
|
73 |
+
input_interface_non_initialized:
|
74 |
- "problem_description"
|
75 |
- "input_description"
|
76 |
- "output_description"
|
|
|
78 |
- "testing_results_summary"
|
79 |
- "code"
|
80 |
|
81 |
+
output_interface:
|
82 |
+
- "api_output"
|
|
|
|
|
|
|
|
CF_CodeCriticWrongAttemptWithPlan.yaml
CHANGED
@@ -76,8 +76,7 @@ init_human_message_prompt_template:
|
|
76 |
- "testing_results_summary"
|
77 |
template_format: jinja2
|
78 |
|
79 |
-
|
80 |
-
init_input_keys:
|
81 |
- "problem_description"
|
82 |
- "input_description"
|
83 |
- "output_description"
|
@@ -86,9 +85,5 @@ init_input_keys:
|
|
86 |
- "plan"
|
87 |
- "code"
|
88 |
|
89 |
-
|
90 |
-
-
|
91 |
-
old_key2new_key:
|
92 |
-
api_output: "code_feedback"
|
93 |
-
output_keys:
|
94 |
-
- "code_feedback"
|
|
|
76 |
- "testing_results_summary"
|
77 |
template_format: jinja2
|
78 |
|
79 |
+
input_interface_non_initialized:
|
|
|
80 |
- "problem_description"
|
81 |
- "input_description"
|
82 |
- "output_description"
|
|
|
85 |
- "plan"
|
86 |
- "code"
|
87 |
|
88 |
+
output_interface:
|
89 |
+
- "api_output"
|
|
|
|
|
|
|
|
CF_CodeDebug.yaml
CHANGED
@@ -62,7 +62,7 @@ topology:
|
|
62 |
keys_to_select:
|
63 |
- "code"
|
64 |
|
65 |
-
|
66 |
|
67 |
# ~~~ Code Testing Critic ~~~
|
68 |
- goal: "Test the code on the public tests and provide a results summary."
|
@@ -144,3 +144,5 @@ topology:
|
|
144 |
tests_separator: "\n\n"
|
145 |
|
146 |
issue_title: "# Issue with the last proposed solution"
|
|
|
|
|
|
62 |
keys_to_select:
|
63 |
- "code"
|
64 |
|
65 |
+
reset: false
|
66 |
|
67 |
# ~~~ Code Testing Critic ~~~
|
68 |
- goal: "Test the code on the public tests and provide a results summary."
|
|
|
144 |
tests_separator: "\n\n"
|
145 |
|
146 |
issue_title: "# Issue with the last proposed solution"
|
147 |
+
|
148 |
+
reset: true
|
CF_CodeDebugCollab.yaml
CHANGED
@@ -3,58 +3,82 @@ description: "ToDO: add description"
|
|
3 |
|
4 |
max_rounds: 2 # ToDo: To increase to 4
|
5 |
|
6 |
-
|
7 |
- "problem_description"
|
8 |
- "input_description"
|
9 |
- "output_description"
|
10 |
- "io_examples_and_explanation"
|
11 |
- "public_tests_individual_io"
|
12 |
-
|
13 |
-
output_keys:
|
14 |
- "code"
|
15 |
|
16 |
subflows_config:
|
17 |
CodeGenerator:
|
18 |
_target_: .CF_Code.instantiate_from_default_config
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
input_keys:
|
42 |
-
- "code_feedback"
|
43 |
-
- "testing_results_summary"
|
44 |
-
|
45 |
CodeDebugCritic:
|
46 |
_target_: .CF_CodeDebugCritic.instantiate_from_default_config
|
47 |
|
48 |
topology:
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
max_rounds: 2 # ToDo: To increase to 4
|
5 |
|
6 |
+
input_interface:
|
7 |
- "problem_description"
|
8 |
- "input_description"
|
9 |
- "output_description"
|
10 |
- "io_examples_and_explanation"
|
11 |
- "public_tests_individual_io"
|
12 |
+
output_interface:
|
|
|
13 |
- "code"
|
14 |
|
15 |
subflows_config:
|
16 |
CodeGenerator:
|
17 |
_target_: .CF_Code.instantiate_from_default_config
|
18 |
+
name: "CodeGenerator"
|
19 |
+
model_name: "gpt-4"
|
20 |
+
human_message_prompt_template:
|
21 |
+
_target_: langchain.PromptTemplate
|
22 |
+
template: |2-
|
23 |
+
{{testing_results_summary}}
|
24 |
+
|
25 |
+
{{code_feedback}}
|
26 |
+
|
27 |
+
|
28 |
+
Consider the problem statement, the last proposed solution, its issue and the provided feedback. Return a corrected version of the code that solves the original problem and resolves the issue, without any explanation, in the following format:
|
29 |
+
```python
|
30 |
+
{{code_placeholder}}
|
31 |
+
```
|
32 |
+
input_variables:
|
33 |
+
- code_feedback
|
34 |
+
- testing_results_summary
|
35 |
+
partial_variables:
|
36 |
+
code_placeholder: "{{python_code}}"
|
37 |
+
input_interface_initialized:
|
38 |
+
- "code_feedback"
|
39 |
+
- "testing_results_summary"
|
|
|
|
|
|
|
|
|
40 |
CodeDebugCritic:
|
41 |
_target_: .CF_CodeDebugCritic.instantiate_from_default_config
|
42 |
|
43 |
topology:
|
44 |
+
# ~~~ Code Generator ~~~
|
45 |
+
- goal: "Generate/refine a solution."
|
46 |
+
|
47 |
+
### Input Interface
|
48 |
+
input_interface:
|
49 |
+
_target_: flows.interfaces.KeyInterface
|
50 |
+
additional_transformations:
|
51 |
+
- _target_: flows.data_transformations.KeyMatchInput
|
52 |
+
|
53 |
+
### Flow Specification
|
54 |
+
flow: CodeGenerator
|
55 |
+
|
56 |
+
### Output Interface
|
57 |
+
output_interface:
|
58 |
+
_target_: flows.interfaces.KeyInterface
|
59 |
+
additional_transformations:
|
60 |
+
- _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
|
61 |
+
regex: '(?<=```python)([\s\S]*?)(?=```)'
|
62 |
+
regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
|
63 |
+
input_key: "api_output"
|
64 |
+
output_key: "code"
|
65 |
+
strip: True
|
66 |
+
assert_unique: True
|
67 |
+
keys_to_select:
|
68 |
+
- "code"
|
69 |
+
|
70 |
+
reset: false
|
71 |
+
|
72 |
+
# ~~~ Code Critic Grounded in Tests ~~~
|
73 |
+
- goal: ""
|
74 |
+
|
75 |
+
### Input Interface
|
76 |
+
input_interface:
|
77 |
+
_target_: flows.interfaces.KeyInterface
|
78 |
+
additional_transformations:
|
79 |
+
- _target_: flows.data_transformations.KeyMatchInput
|
80 |
+
|
81 |
+
### Flow Specification
|
82 |
+
flow: CodeDebugCritic
|
83 |
+
|
84 |
+
reset: true
|
CF_CodeDebugCollabWithPlan.yaml
CHANGED
@@ -1,53 +1,63 @@
|
|
1 |
name: "CodeDebugCollabWithPlan_Flow"
|
2 |
description: "ToDO: add description"
|
3 |
|
4 |
-
|
5 |
max_rounds: 2 # ToDo: To increase to 4
|
6 |
-
early_exit_key: "end_of_interaction"
|
7 |
|
8 |
-
|
9 |
-
input_keys:
|
10 |
- "problem_description"
|
11 |
- "input_description"
|
12 |
- "output_description"
|
13 |
- "io_examples_and_explanation"
|
14 |
- "public_tests_individual_io"
|
15 |
- "plan"
|
16 |
-
|
17 |
-
output_data_transformations:
|
18 |
-
- _target_: flows.data_transformations.KeyRename
|
19 |
-
old_key2new_key:
|
20 |
-
code: "code"
|
21 |
-
output_keys:
|
22 |
- "code"
|
23 |
|
24 |
subflows_config:
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
|
|
|
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
input_variables:
|
42 |
-
- code_feedback
|
43 |
-
- testing_results_summary
|
44 |
-
partial_variables:
|
45 |
-
code_placeholder: "{{python_code}}"
|
46 |
-
template_format: jinja2
|
47 |
-
input_keys:
|
48 |
-
- "code_feedback"
|
49 |
-
- "testing_results_summary"
|
50 |
-
output_data_transformations:
|
51 |
- _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
|
52 |
regex: '(?<=```python)([\s\S]*?)(?=```)'
|
53 |
regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
|
@@ -55,17 +65,21 @@ subflows_config:
|
|
55 |
output_key: "code"
|
56 |
strip: True
|
57 |
assert_unique: True
|
|
|
|
|
58 |
|
59 |
-
|
60 |
-
end_of_interaction_string: "Final answer"
|
61 |
-
input_key: "api_output"
|
62 |
-
output_key: "end_of_interaction"
|
63 |
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
-
|
70 |
-
CodeGenerator: False
|
71 |
-
CodeDebugCriticWithPlan_Flow: True
|
|
|
1 |
name: "CodeDebugCollabWithPlan_Flow"
|
2 |
description: "ToDO: add description"
|
3 |
|
|
|
4 |
max_rounds: 2 # ToDo: To increase to 4
|
|
|
5 |
|
6 |
+
input_interface:
|
|
|
7 |
- "problem_description"
|
8 |
- "input_description"
|
9 |
- "output_description"
|
10 |
- "io_examples_and_explanation"
|
11 |
- "public_tests_individual_io"
|
12 |
- "plan"
|
13 |
+
output_interface:
|
|
|
|
|
|
|
|
|
|
|
14 |
- "code"
|
15 |
|
16 |
subflows_config:
|
17 |
+
CodeGenerator:
|
18 |
+
_target_: .CF_CodeWithPlan.instantiate_from_default_config
|
19 |
+
name: "CodeGenerator"
|
20 |
+
model_name: "gpt-4"
|
21 |
+
human_message_prompt_template:
|
22 |
+
_target_: langchain.PromptTemplate
|
23 |
+
template: |2-
|
24 |
+
{{testing_results_summary}}
|
25 |
+
|
26 |
+
{{code_feedback}}
|
27 |
+
|
28 |
+
|
29 |
+
Consider the problem statement, the last proposed solution, its issue and the provided feedback. Return a corrected version of the code that solves the original problem and resolves the issue, without any explanation, in the following format:
|
30 |
+
```python
|
31 |
+
{{code_placeholder}}
|
32 |
+
```
|
33 |
+
input_variables:
|
34 |
+
- code_feedback
|
35 |
+
- testing_results_summary
|
36 |
+
partial_variables:
|
37 |
+
code_placeholder: "{{python_code}}"
|
38 |
+
input_interface_initialized:
|
39 |
+
- "code_feedback"
|
40 |
+
- "testing_results_summary"
|
41 |
+
CodeDebugCritic:
|
42 |
+
_target_: .CF_CodeDebugCriticWithPlan.instantiate_from_default_config
|
43 |
+
|
44 |
+
topology:
|
45 |
+
# ~~~ Code Generator ~~~
|
46 |
+
- goal: "Generate/refine a solution."
|
47 |
+
|
48 |
+
### Input Interface
|
49 |
+
input_interface:
|
50 |
+
_target_: flows.interfaces.KeyInterface
|
51 |
+
additional_transformations:
|
52 |
+
- _target_: flows.data_transformations.KeyMatchInput
|
53 |
|
54 |
+
### Flow Specification
|
55 |
+
flow: CodeGenerator
|
56 |
|
57 |
+
### Output Interface
|
58 |
+
output_interface:
|
59 |
+
_target_: flows.interfaces.KeyInterface
|
60 |
+
additional_transformations:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
- _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
|
62 |
regex: '(?<=```python)([\s\S]*?)(?=```)'
|
63 |
regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
|
|
|
65 |
output_key: "code"
|
66 |
strip: True
|
67 |
assert_unique: True
|
68 |
+
keys_to_select:
|
69 |
+
- "code"
|
70 |
|
71 |
+
reset: false
|
|
|
|
|
|
|
72 |
|
73 |
+
# ~~~ Code Critic Grounded in Tests ~~~
|
74 |
+
- goal: ""
|
75 |
+
|
76 |
+
### Input Interface
|
77 |
+
input_interface:
|
78 |
+
_target_: flows.interfaces.KeyInterface
|
79 |
+
additional_transformations:
|
80 |
+
- _target_: flows.data_transformations.KeyMatchInput
|
81 |
+
|
82 |
+
### Flow Specification
|
83 |
+
flow: CodeDebugCritic
|
84 |
|
85 |
+
reset: true
|
|
|
|
CF_CodeDebugCritic.yaml
CHANGED
@@ -1,9 +1,7 @@
|
|
1 |
name: "CodeDebugCritic_Flow"
|
2 |
description: "ToDo: add description"
|
3 |
|
4 |
-
|
5 |
-
|
6 |
-
input_keys:
|
7 |
- "problem_description"
|
8 |
- "input_description"
|
9 |
- "output_description"
|
@@ -11,11 +9,13 @@ input_keys:
|
|
11 |
- "public_tests_individual_io"
|
12 |
- "code"
|
13 |
|
14 |
-
|
15 |
- "testing_results_summary"
|
16 |
- "all_tests_passed"
|
17 |
- "code_feedback"
|
18 |
|
|
|
|
|
19 |
subflows_config:
|
20 |
CodeTestingCritic:
|
21 |
_target_: .CF_CodeTesting.instantiate_from_default_config
|
@@ -23,6 +23,106 @@ subflows_config:
|
|
23 |
_target_: .CF_CodeCriticWrongAttempt.instantiate_from_default_config
|
24 |
|
25 |
topology:
|
26 |
-
|
27 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
|
|
1 |
name: "CodeDebugCritic_Flow"
|
2 |
description: "ToDo: add description"
|
3 |
|
4 |
+
input_interface:
|
|
|
|
|
5 |
- "problem_description"
|
6 |
- "input_description"
|
7 |
- "output_description"
|
|
|
9 |
- "public_tests_individual_io"
|
10 |
- "code"
|
11 |
|
12 |
+
output_interface:
|
13 |
- "testing_results_summary"
|
14 |
- "all_tests_passed"
|
15 |
- "code_feedback"
|
16 |
|
17 |
+
public_tests_key: "public_tests_individual_io"
|
18 |
+
|
19 |
subflows_config:
|
20 |
CodeTestingCritic:
|
21 |
_target_: .CF_CodeTesting.instantiate_from_default_config
|
|
|
23 |
_target_: .CF_CodeCriticWrongAttempt.instantiate_from_default_config
|
24 |
|
25 |
topology:
|
26 |
+
# ~~~ Code Testing Critic ~~~
|
27 |
+
- goal: "Test the code on the public tests and provide a results summary."
|
28 |
+
|
29 |
+
### Input Interface
|
30 |
+
input_interface:
|
31 |
+
_target_: flows.interfaces.KeyInterface
|
32 |
+
additional_transformations:
|
33 |
+
- _target_: flows.data_transformations.KeyMatchInput
|
34 |
+
|
35 |
+
### Flow Specification
|
36 |
+
flow: CodeTestingCritic
|
37 |
+
|
38 |
+
### Output Interface
|
39 |
+
output_interface:
|
40 |
+
_target_: flows.interfaces.KeyInterface
|
41 |
+
additional_transformations:
|
42 |
+
- _target_: martinjosifoski.CC_flows.src.data_transformations.CorrectnessFlag
|
43 |
+
input_key: "public_tests_results"
|
44 |
+
output_key: "all_tests_passed"
|
45 |
+
- _target_: martinjosifoski.CC_flows.src.data_transformations.TestingResultsSummaryGeneration
|
46 |
+
output_key: "testing_results_summary"
|
47 |
+
|
48 |
+
single_test_error_message: True
|
49 |
+
|
50 |
+
no_error_template: |2-
|
51 |
+
${.issue_title}
|
52 |
+
All of the executed tests passed.
|
53 |
+
|
54 |
+
compilation_error_template: |2-
|
55 |
+
${.issue_title}
|
56 |
+
The execution resulted in a compilation error.
|
57 |
+
## Compilation error message:
|
58 |
+
{{error_message}}
|
59 |
+
timeout_error_template: |2-
|
60 |
+
${.issue_title}
|
61 |
+
The execution timed out, the solution is not efficient enough.
|
62 |
+
runtime_error_template: |2-
|
63 |
+
${.issue_title}
|
64 |
+
The execution resulted in a runtime error on the following test.
|
65 |
+
## [Failed test] Input
|
66 |
+
```
|
67 |
+
{{test_input}}
|
68 |
+
```
|
69 |
+
## [Failed test] Runtime error message
|
70 |
+
{{error_message}}
|
71 |
+
single_test_error_template: |2-
|
72 |
+
${.issue_title}
|
73 |
+
The Python code does not solve the problem in the problem description due to logical errors. It fails the following test:
|
74 |
+
## [Failed test] Input
|
75 |
+
```
|
76 |
+
{{test_input}}
|
77 |
+
```
|
78 |
+
## [Failed test] Expected output
|
79 |
+
```
|
80 |
+
{{expected_output}}
|
81 |
+
```
|
82 |
+
## [Failed test] Generated output
|
83 |
+
```
|
84 |
+
{{generated_output}}
|
85 |
+
```
|
86 |
+
all_tests_header: |2-
|
87 |
+
${.issue_title}
|
88 |
+
The Python code does not solve the problem in the problem description due to logical errors. It fails on the following tests.
|
89 |
+
test_error_template: |2-
|
90 |
+
## [Failed test {{idx}}]
|
91 |
+
### [Failed test {{idx}}] Input
|
92 |
+
```
|
93 |
+
{{test_input}}
|
94 |
+
```
|
95 |
+
### [Failed test {{idx}}] Expected output
|
96 |
+
```
|
97 |
+
{{expected_output}}
|
98 |
+
```
|
99 |
+
### [Failed test {{idx}}] Generated output
|
100 |
+
```
|
101 |
+
{{generated_output}}
|
102 |
+
```
|
103 |
+
tests_separator: "\n\n"
|
104 |
+
|
105 |
+
issue_title: "# Issue with the last proposed solution"
|
106 |
+
|
107 |
+
# ~~~ Feedback Generator ~~~
|
108 |
+
- goal: "Generate feedback grounded in the test results summary."
|
109 |
+
|
110 |
+
### Input Interface
|
111 |
+
input_interface:
|
112 |
+
_target_: flows.interfaces.KeyInterface
|
113 |
+
additional_transformations:
|
114 |
+
- _target_: flows.data_transformations.KeyMatchInput
|
115 |
+
|
116 |
+
### Flow Specification
|
117 |
+
flow: CodeCriticWrongAttempt
|
118 |
+
|
119 |
+
### Output Interface
|
120 |
+
output_interface:
|
121 |
+
_target_: flows.interfaces.KeyInterface
|
122 |
+
additional_transformations:
|
123 |
+
- _target_: flows.data_transformations.KeyRename
|
124 |
+
old_key2new_key:
|
125 |
+
api_output: "code_feedback"
|
126 |
+
|
127 |
+
reset: true
|
128 |
|
CF_CodeDebugCriticWithPlan.yaml
CHANGED
@@ -1,10 +1,7 @@
|
|
1 |
name: "CodeDebugCriticWithPlan_Flow"
|
2 |
description: "ToDo: add description"
|
3 |
|
4 |
-
|
5 |
-
|
6 |
-
input_data_transformations: []
|
7 |
-
input_keys:
|
8 |
- "problem_description"
|
9 |
- "input_description"
|
10 |
- "output_description"
|
@@ -13,19 +10,119 @@ input_keys:
|
|
13 |
- "code"
|
14 |
- "plan"
|
15 |
|
16 |
-
|
17 |
-
- _target_: flows.data_transformations.KeyRename
|
18 |
-
old_key2new_key:
|
19 |
-
testing_results_summary: "testing_results_summary"
|
20 |
-
all_tests_passed: "all_tests_passed"
|
21 |
-
code_feedback: "code_feedback"
|
22 |
-
output_keys:
|
23 |
- "testing_results_summary"
|
24 |
- "all_tests_passed"
|
25 |
- "code_feedback"
|
26 |
|
|
|
|
|
27 |
subflows_config:
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
name: "CodeDebugCriticWithPlan_Flow"
|
2 |
description: "ToDo: add description"
|
3 |
|
4 |
+
input_interface:
|
|
|
|
|
|
|
5 |
- "problem_description"
|
6 |
- "input_description"
|
7 |
- "output_description"
|
|
|
10 |
- "code"
|
11 |
- "plan"
|
12 |
|
13 |
+
output_interface:
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
- "testing_results_summary"
|
15 |
- "all_tests_passed"
|
16 |
- "code_feedback"
|
17 |
|
18 |
+
public_tests_key: "public_tests_individual_io"
|
19 |
+
|
20 |
subflows_config:
|
21 |
+
CodeTestingCritic:
|
22 |
+
_target_: .CF_CodeTesting.instantiate_from_default_config
|
23 |
+
CodeCriticWrongAttempt:
|
24 |
+
_target_: .CF_CodeCriticWrongAttemptWithPlan.instantiate_from_default_config
|
25 |
+
|
26 |
+
topology:
|
27 |
+
# ~~~ Code Testing Critic ~~~
|
28 |
+
- goal: "Test the code on the public tests and provide a results summary."
|
29 |
+
|
30 |
+
### Input Interface
|
31 |
+
input_interface:
|
32 |
+
_target_: flows.interfaces.KeyInterface
|
33 |
+
additional_transformations:
|
34 |
+
- _target_: flows.data_transformations.KeyMatchInput
|
35 |
+
|
36 |
+
### Flow Specification
|
37 |
+
flow: CodeTestingCritic
|
38 |
+
|
39 |
+
### Output Interface
|
40 |
+
output_interface:
|
41 |
+
_target_: flows.interfaces.KeyInterface
|
42 |
+
additional_transformations:
|
43 |
+
- _target_: martinjosifoski.CC_flows.src.data_transformations.CorrectnessFlag
|
44 |
+
input_key: "public_tests_results"
|
45 |
+
output_key: "all_tests_passed"
|
46 |
+
- _target_: martinjosifoski.CC_flows.src.data_transformations.TestingResultsSummaryGeneration
|
47 |
+
output_key: "testing_results_summary"
|
48 |
+
|
49 |
+
single_test_error_message: True
|
50 |
+
|
51 |
+
no_error_template: |2-
|
52 |
+
${.issue_title}
|
53 |
+
All of the executed tests passed.
|
54 |
+
|
55 |
+
compilation_error_template: |2-
|
56 |
+
${.issue_title}
|
57 |
+
The execution resulted in a compilation error.
|
58 |
+
## Compilation error message:
|
59 |
+
{{error_message}}
|
60 |
+
timeout_error_template: |2-
|
61 |
+
${.issue_title}
|
62 |
+
The execution timed out, the solution is not efficient enough.
|
63 |
+
runtime_error_template: |2-
|
64 |
+
${.issue_title}
|
65 |
+
The execution resulted in a runtime error on the following test.
|
66 |
+
## [Failed test] Input
|
67 |
+
```
|
68 |
+
{{test_input}}
|
69 |
+
```
|
70 |
+
## [Failed test] Runtime error message
|
71 |
+
{{error_message}}
|
72 |
+
single_test_error_template: |2-
|
73 |
+
${.issue_title}
|
74 |
+
The Python code does not solve the problem in the problem description due to logical errors. It fails the following test:
|
75 |
+
## [Failed test] Input
|
76 |
+
```
|
77 |
+
{{test_input}}
|
78 |
+
```
|
79 |
+
## [Failed test] Expected output
|
80 |
+
```
|
81 |
+
{{expected_output}}
|
82 |
+
```
|
83 |
+
## [Failed test] Generated output
|
84 |
+
```
|
85 |
+
{{generated_output}}
|
86 |
+
```
|
87 |
+
all_tests_header: |2-
|
88 |
+
${.issue_title}
|
89 |
+
The Python code does not solve the problem in the problem description due to logical errors. It fails on the following tests.
|
90 |
+
test_error_template: |2-
|
91 |
+
## [Failed test {{idx}}]
|
92 |
+
### [Failed test {{idx}}] Input
|
93 |
+
```
|
94 |
+
{{test_input}}
|
95 |
+
```
|
96 |
+
### [Failed test {{idx}}] Expected output
|
97 |
+
```
|
98 |
+
{{expected_output}}
|
99 |
+
```
|
100 |
+
### [Failed test {{idx}}] Generated output
|
101 |
+
```
|
102 |
+
{{generated_output}}
|
103 |
+
```
|
104 |
+
tests_separator: "\n\n"
|
105 |
+
|
106 |
+
issue_title: "# Issue with the last proposed solution"
|
107 |
+
|
108 |
+
# ~~~ Feedback Generator ~~~
|
109 |
+
- goal: "Generate feedback grounded in the test results summary."
|
110 |
+
|
111 |
+
### Input Interface
|
112 |
+
input_interface:
|
113 |
+
_target_: flows.interfaces.KeyInterface
|
114 |
+
additional_transformations:
|
115 |
+
- _target_: flows.data_transformations.KeyMatchInput
|
116 |
+
|
117 |
+
### Flow Specification
|
118 |
+
flow: CodeCriticWrongAttempt
|
119 |
+
|
120 |
+
### Output Interface
|
121 |
+
output_interface:
|
122 |
+
_target_: flows.interfaces.KeyInterface
|
123 |
+
additional_transformations:
|
124 |
+
- _target_: flows.data_transformations.KeyRename
|
125 |
+
old_key2new_key:
|
126 |
+
api_output: "code_feedback"
|
127 |
+
|
128 |
+
reset: true
|
CF_CodeWithPlan.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
from flows.application_flows import OpenAIChatAtomicFlow
|
2 |
|
|
|
3 |
class CF_CodeWithPlan(OpenAIChatAtomicFlow):
|
4 |
def __init__(self, **kwargs):
|
5 |
super().__init__(**kwargs)
|
|
|
1 |
from flows.application_flows import OpenAIChatAtomicFlow
|
2 |
|
3 |
+
|
4 |
class CF_CodeWithPlan(OpenAIChatAtomicFlow):
|
5 |
def __init__(self, **kwargs):
|
6 |
super().__init__(**kwargs)
|
CF_CodeWithPlan.yaml
CHANGED
@@ -70,22 +70,15 @@ init_human_message_prompt_template:
|
|
70 |
code_placeholder: "{{python_code}}"
|
71 |
template_format: jinja2
|
72 |
|
73 |
-
|
74 |
-
init_input_keys:
|
75 |
- "problem_description"
|
76 |
- "input_description"
|
77 |
- "output_description"
|
78 |
- "io_examples_and_explanation"
|
79 |
- "plan"
|
80 |
|
81 |
-
|
82 |
-
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
output_key: "code"
|
87 |
-
strip: True
|
88 |
-
assert_unique: True
|
89 |
-
|
90 |
-
output_keys:
|
91 |
-
- "code"
|
|
|
70 |
code_placeholder: "{{python_code}}"
|
71 |
template_format: jinja2
|
72 |
|
73 |
+
input_interface_non_initialized:
|
|
|
74 |
- "problem_description"
|
75 |
- "input_description"
|
76 |
- "output_description"
|
77 |
- "io_examples_and_explanation"
|
78 |
- "plan"
|
79 |
|
80 |
+
input_interface_initialized:
|
81 |
+
- "query"
|
82 |
+
|
83 |
+
output_interface:
|
84 |
+
- "api_output"
|
|
|
|
|
|
|
|
|
|
|
|
__init__.py
CHANGED
@@ -16,7 +16,7 @@ from .CF_Code import CF_Code
|
|
16 |
|
17 |
# cf-plan-code (and cf-plan_oracle-code)
|
18 |
# from .CF_Plan import CF_Plan
|
19 |
-
|
20 |
# from .CF_Plan_Code import CF_Plan_Code
|
21 |
|
22 |
# # cf-plan_reflect-code
|
@@ -33,16 +33,15 @@ from .CF_Code import CF_Code
|
|
33 |
from .CF_CodeTesting import CF_CodeTesting
|
34 |
from .CF_CodeDebug import CF_CodeDebug
|
35 |
|
36 |
-
#
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
# #
|
41 |
-
# # # cf-plan_oracle-code_debug_collab
|
42 |
-
# from .CF_CodeCriticWrongAttemptWithPlan import CF_CodeCriticWrongAttemptWithPlan
|
43 |
-
# from .CF_CodeDebugCriticWithPlan import CF_CodeDebugCriticWithPlan
|
44 |
-
# from .CF_CodeDebugCollabWithPlan import CF_CodeDebugCollabWithPlan
|
45 |
|
|
|
|
|
|
|
|
|
46 |
|
47 |
########################## LC ##########################
|
48 |
|
@@ -83,5 +82,3 @@ from .CF_CodeDebug import CF_CodeDebug
|
|
83 |
# from .LC_CodeCriticWrongAttemptWithPlan import LC_CodeCriticWrongAttemptWithPlan
|
84 |
# from .LC_CodeDebugCriticWithPlan import LC_CodeDebugCriticWithPlan
|
85 |
# from .LC_CodeDebugCollabWithPlan import LC_CodeDebugCollabWithPlan
|
86 |
-
|
87 |
-
|
|
|
16 |
|
17 |
# cf-plan-code (and cf-plan_oracle-code)
|
18 |
# from .CF_Plan import CF_Plan
|
19 |
+
from .CF_CodeWithPlan import CF_CodeWithPlan
|
20 |
# from .CF_Plan_Code import CF_Plan_Code
|
21 |
|
22 |
# # cf-plan_reflect-code
|
|
|
33 |
from .CF_CodeTesting import CF_CodeTesting
|
34 |
from .CF_CodeDebug import CF_CodeDebug
|
35 |
|
36 |
+
# cf-code_debug_collab
|
37 |
+
from .CF_CodeCriticWrongAttempt import CF_CodeCriticWrongAttempt
|
38 |
+
from .CF_CodeDebugCritic import CF_CodeDebugCritic
|
39 |
+
from .CF_CodeDebugCollab import CF_CodeDebugCollab
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
+
# cf-plan_oracle-code_debug_collab
|
42 |
+
from .CF_CodeCriticWrongAttemptWithPlan import CF_CodeCriticWrongAttemptWithPlan
|
43 |
+
from .CF_CodeDebugCriticWithPlan import CF_CodeDebugCriticWithPlan
|
44 |
+
from .CF_CodeDebugCollabWithPlan import CF_CodeDebugCollabWithPlan
|
45 |
|
46 |
########################## LC ##########################
|
47 |
|
|
|
82 |
# from .LC_CodeCriticWrongAttemptWithPlan import LC_CodeCriticWrongAttemptWithPlan
|
83 |
# from .LC_CodeDebugCriticWithPlan import LC_CodeDebugCriticWithPlan
|
84 |
# from .LC_CodeDebugCollabWithPlan import LC_CodeDebugCollabWithPlan
|
|
|
|