Torantulino · riensen · Apr 21, 2023
diff --git a/README.md b/README.md
@@ -51,6 +51,7 @@ The `ai_function` takes the following parameters:
 - `args`: A list of arguments for the function.
 - `description`: A string describing the purpose of the function.
 - `model`: (Optional) A string specifying the GPT model to use. Default is 'gpt-4'.
+- `error_correction`: (Optional) A boolean specifying if mulitple additional attempts are being made to auto-correct errors. Default is 'false'.
 
 Example usage:
 
@@ -67,21 +68,18 @@ print(result)  # Output: 12
 
 ## Limitations
 
-The table below shows the success rate of the AI functions with different GPT models:
+The table below shows the success rate of the AI functions with different GPT models without using error correction:
 
-| Description               | GPT-4 Result | GPT-3.5-turbo Result | Reason |
+| Description               | GPT-4 Result | GPT-3.5-turbo Result | Comment |
 |---------------------------|--------------|----------------------|--------|
-| Generate fake people      | PASSED       | FAILED               | Incorrect response format |
+| Generate fake people      | PASSED       | PASSED               | N/A |
 | Generate Random Password  | PASSED       | PASSED               | N/A |
-| Calculate area of triangle| FAILED       | FAILED               | Incorrect float value (GPT-4), Incorrect response format (GPT-3.5-turbo) |
+| Calculate area of triangle| PASSED       | PASSED               | N/A |
 | Calculate the nth prime number | PASSED  | PASSED               | N/A    |
 | Encrypt text              | PASSED       | PASSED               | N/A    |
 | Find missing numbers      | PASSED       | PASSED               | N/A    |
 
-It's important to note that AI Functions are not suited for certain tasks, particularly those involving mathematical calculations and precision. As observed in the case of calculating the area of a triangle and finding the nth prime number, GPT models can struggle with providing accurate results. The limitations of GPT models in such cases are mainly due to their inherent inability to perform precise arithmetic and the ambiguity in understanding user inputs.
-
-In conclusion, while AI Functions can be helpful in various scenarios, they may not be the optimal choice for tasks requiring mathematical accuracy or specific domain knowledge. For such use-cases, utilizing traditional algorithms and libraries would yield better results.
-
+We are looking for more test cases that will push the boundaries of AI Functions.
 
 
 ### test_ai_functions.py

diff --git a/ai_functions.py b/ai_functions.py
@@ -1,14 +1,116 @@
 import openai
+from util import exec_with_return, getSmallTrace
 
-def ai_function(function, args, description, model = "gpt-4"):
+
+def ai_function(
+    function: str,
+    args,
+    description: str,
+    model="gpt-4",
+    error_correction=False
+):
+    VERBOSE=False
+    MAX_ERROR_CORRECTION_RETRY = 5
     # parse args to comma separated string
     args = ", ".join(args)
-    messages = [{"role": "system", "content": f"You are now the following python function: ```# {description}\n{function}```\n\nOnly respond with your `return` value. Do not include any other explanatory text in your response."},{"role": "user", "content": args}]
+    system_prompt = f"""You are now coding the following python function: \n\n# {description}\n{function}\n\n
+    In the last line of your output, call the function with the given argument.
+    Only respond with python code. Do not include any other explanatory text in your response and do not format as markdown.
+    Do not use the print() function anywhere in your code. Only use standard python library in your import statements as dependencies."
+    """
+
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": args},
+    ]
+    code = callOpenAI(messages, model)
+
+    if VERBOSE:
+        print(f"CODE:\n{code}")
+
+    if not error_correction:
+        try:
+            result = exec_with_return(code)
+        except Exception:
+            result = ""
+    else:
+        for i in range(MAX_ERROR_CORRECTION_RETRY):
+            try:
+                result = exec_with_return(code)
+                break
+            except Exception:
+                broken_code = code
+                code = fix_code(code, messages, VERBOSE, model)
+                result = ""
+
+                # If there are no further improvements, then stop error correcting
+                if broken_code == code:
+                    break
+
+    if not result:
+        return ai_function_without_code(function, args, description)
+
+    return result
+
+
+def ai_function_without_code(
+    function: str,
+    args,
+    description: str,
+    model="gpt-4",
+):
+    messages = [
+        {
+            "role": "system",
+            "content": f"You are now the following python function: ```# {description}\n{function}```\n\nOnly respond with your `return` value. Do not include any other explanatory text in your response.",
+        },
+        {"role": "user", "content": args},
+    ]
 
     response = openai.ChatCompletion.create(
-        model=model,
-        messages=messages,
-        temperature=0
+        model=model, messages=messages, temperature=0
     )
 
+    result = response.choices[0].message["content"]
+
+    # If result is enclosed in quotes, then strip quotes
+    if (
+        isinstance(result, str)
+        and result[0] == result[-1]
+        and (result[0] == '"' or result[0] == "'")
+    ):
+        result = result.strip("'\"")
+    return result
+
+
+def fix_code(broken_code: str, messages: list[dict], verbose: bool, model):
+    error_trace = getSmallTrace()
+    if verbose:
+        print(f"ERROR:\n{error_trace}")
+    error_correction_prompt = f"""
+        Unfortunately, when executing the code the following error happens. Can you fix the code?
+        Remember, only respond output executable python code. Do not include any other explanatory text in your response.
+        Do not use the print() function anywhere in your code.
+        Only use standard python library in your import statements as dependencies.
+        Here is the error: {error_trace}
+    """
+
+    messages.extend(
+        [
+            {"role": "assistant", "content": broken_code},
+            {"role": "user", "content": error_correction_prompt},
+        ]
+    )
+    if verbose:
+        print(f"ERROR CORRECTION PROMPT:\n{messages}")
+    result = callOpenAI(messages, model)
+    if verbose:
+        print(f"IMPROVED CODE:\n{code}")
+    return result
+
+
+def callOpenAI(messages: list[dict], model="gpt-4") -> str:
+    response = openai.ChatCompletion.create(
+        model=model, messages=messages, temperature=0
+    )
     return response.choices[0].message["content"]
diff --git a/keys.py b/keys.py
@@ -1 +1 @@
-OPENAI_API_KEY="" # Get yours from:  https://beta.openai.com/account/api-keys
+OPENAI_API_KEY = ""  # Get yours from:  https://beta.openai.com/account/api-keys
diff --git a/test_ai_function.py b/test_ai_function.py
@@ -9,6 +9,7 @@
 # Initialize the OpenAI API client
 openai.api_key = keys.OPENAI_API_KEY
 
+
 # Run all tests, print the results, and return the number of failed tests
 def run_tests(model):
     test_functions = [test_1, test_2, test_3, test_4, test_5, test_6]
@@ -18,13 +19,15 @@ def run_tests(model):
         "Calculate area of triangle",
         "Calculate the nth prime number",
         "Encrypt text",
-        "Find missing numbers"
-]
+        "Find missing numbers",
+    ]
     failed_tests = []
 
     i = 0
     for test in test_functions:
-        print(f"=-=-=- Running test: {test.__name__} - {test_names[i]} with model {model} -=-=-=")
+        print(
+            f"=-=-=- Running test: {test.__name__} - {test_names[i]} with model {model} -=-=-="
+        )
         i += 1
         try:
             test(model)
@@ -39,7 +42,10 @@ def run_tests(model):
     print(f"Total tests: {len(test_functions)}")
 
     # Print the number of failed tests
-    print(f"Success Rate: {len(test_functions) - len(failed_tests)}/{len(test_functions)}")
+    print(
+        f"Success Rate: {len(test_functions) - len(failed_tests)}/{len(test_functions)}"
+    )
+
 
 # Ai function test 1
 def test_1(model):
@@ -48,49 +54,69 @@ def test_1(model):
     description_string = """Generates n examples of fake data representing people, 
             each with a name and an age."""
 
-    result_string = ai_functions.ai_function(function_string, args, description_string, model)
+    result_string = ai_functions.ai_function(
+        function_string, args, description_string, model
+    )
 
     print(f"Output: {result_string}")
-    # Assert the result can be parsed as is a list of dictionaries
-    print("Testing if result is a a string...")
-    assert isinstance(result_string, str)
-    result = None
-    try:
-        print("Testing if result can be parsed as a list of dictionaries...")
-        # Parse the result as a list of dictionaries
-        result = json.loads(result_string)
-    except Exception as e:
-        # If the result can't be parsed as a list of dictionaries, the test fails
-        assert False
-
+
+    if isinstance(result_string, list):
+        result = result_string
+    else:
+        # Assert the result can be parsed as is a list of dictionaries
+        print("Testing if result is a a string...")
+        assert isinstance(result_string, str)
+        result = None
+        try:
+            print("Testing if result can be parsed as a list of dictionaries...")
+            # Parse the result as a list of dictionaries
+            result = json.loads(result_string)
+        except Exception as e:
+            # If the result can't be parsed as a list of dictionaries, the test fails
+            assert False
+
     # Assert the length of the result is equal to the number of people requested
-    print("Testing if the length of the result is equal to the number of people requested...")
+    print(
+        "Testing if the length of the result is equal to the number of people requested..."
+    )
     if result:
         assert len(result) == int(args[0])
     else:
         assert False
 
+
 # Ai function test 2
 def test_2(model):
-    function_string = "def random_password_generator(length: int, special_chars: bool) -> str:"
+    function_string = (
+        "def random_password_generator(length: int, special_chars: bool) -> str:"
+    )
     args = ["12", "True"]
     description_string = """Generates a random password of given length with or without special characters."""
 
-    result_string = ai_functions.ai_function(function_string, args, description_string, model)
+    result_string = ai_functions.ai_function(
+        function_string, args, description_string, model
+    )
 
     print(f"Output: {result_string}")
 
     # Assert the length of the result is equal to the length requested
     print("Testing if the length of the result is equal to the length requested...")
     assert len(result_string) == int(args[0])
 
+
 # Ai function test 3
 def test_3(model):
-    function_string = "def calculate_area_of_triangle(base: float, height: float) -> float:"
+    function_string = (
+        "def calculate_area_of_triangle(base: float, height: float) -> float:"
+    )
     args = ["15", "6.5"]
-    description_string = """Calculates the area of a triangle given its base and height."""
+    description_string = (
+        """Calculates the area of a triangle given its base and height."""
+    )
 
-    result_string = ai_functions.ai_function(function_string, args, description_string, model)
+    result_string = ai_functions.ai_function(
+        function_string, args, description_string, model
+    )
     print(f"Output: {result_string}")
 
     # Assert the result can be parsed as a float
@@ -103,16 +129,22 @@ def test_3(model):
 
     # Assert the result is equal to the expected area of the triangle
     expected_area = (float(args[0]) * float(args[1])) / 2
-    print("Testing if the result is equal to the expected area of the triangle, which is: " + str(expected_area))
+    print(
+        "Testing if the result is equal to the expected area of the triangle, which is: "
+        + str(expected_area)
+    )
     assert float(result_string) == pytest.approx(expected_area)
 
+
 # Ai function test 4
 def test_4(model):
     function_string = "def get_nth_prime_number(n: int) -> int:"
     args = ["10"]
     description_string = """Finds and returns the nth prime number."""
 
-    result_string = ai_functions.ai_function(function_string, args, description_string, model)
+    result_string = ai_functions.ai_function(
+        function_string, args, description_string, model
+    )
 
     print(f"Output: {result_string}")
 
@@ -126,46 +158,63 @@ def test_4(model):
 
     # Assert the result is equal to the expected nth prime number
     expected_prime_number = 29
-    print("Testing if the result is equal to the expected nth prime number, which is: " + str(expected_prime_number))
+    print(
+        "Testing if the result is equal to the expected nth prime number, which is: "
+        + str(expected_prime_number)
+    )
     assert int(result_string) == expected_prime_number
 
+
 # Ai function test 5
 def test_5(model):
     function_string = "def encrypt_text(text: str, key: str) -> str:"
-    args = ["'Hello, World!'", "'abc123'"]
+    plain_text = "Hello, World!"
+    key = "abc123"
+    args = [f"'{plain_text}'", f"'{key}'"]
     description_string = """Encrypts the given text using a simple character substitution based on the provided key."""
 
-    result_string = ai_functions.ai_function(function_string, args, description_string, model)
+    cipher = ai_functions.ai_function(function_string, args, description_string, model)
 
-    print(f"Output: {result_string}")
+    print(f"Output: {cipher}")
 
     # Assert the result has the same length as the input text
     print("Testing if the result has the same length as the input text...")
-    assert len(result_string) == len(args[0])
+    assert len(cipher) == len(plain_text)
+
 
 # Ai function test 6
 def test_6(model):
-    function_string = "def find_missing_numbers_in_list(numbers: list[int]) -> list[int]:"
+    function_string = (
+        "def find_missing_numbers_in_list(numbers: list[int]) -> list[int]:"
+    )
     args = ["[3, 5, 8, 15, 16]"]
-    description_string = """Finds and returns a list of missing numbers in a given sorted list."""
+    description_string = (
+        """Finds and returns a list of missing numbers in a given sorted list."""
+    )
 
-    result_string = ai_functions.ai_function(function_string, args, description_string, model)
+    result_string = ai_functions.ai_function(
+        function_string, args, description_string, model
+    )
 
     print(f"Output: {result_string}")
 
-    # Assert the result can be parsed as a list
-    try:
-        result_list = ast.literal_eval(result_string)
-        print("Testing if result is a a list...")
-        assert isinstance(result_list, list)
-    except Exception as e:
-        print(e)
-        assert False
+    if isinstance(result_string, list):
+        result_list = result_string
+    else:
+        # Assert the result can be parsed as a list
+        try:
+            result_list = ast.literal_eval(result_string)
+            print("Testing if result is a a list...")
+            assert isinstance(result_list, list)
+        except Exception as e:
+            print(e)
+            assert False
 
     # Assert the result list contains the expected missing numbers
     expected_missing_numbers = [4, 6, 7, 9, 10, 11, 12, 13, 14]
     print("Testing if the result list contains the expected missing numbers...")
     assert result_list == expected_missing_numbers
 
+
 run_tests("gpt-4")
-run_tests("gpt-3.5-turbo")
+run_tests("gpt-3.5-turbo")
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		OPENAI_API_KEY="" # Get yours from: https://beta.openai.com/account/api-keys
		OPENAI_API_KEY = "" # Get yours from: https://beta.openai.com/account/api-keys