file naming when --test (#164)

2023-07-17 11:24:16 -04:00 · 2023-07-17 11:24:16 -04:00 · 8aa6452cc4
parent dffc1dfd51
commit 8aa6452cc4
11 changed files with 315 additions and 72 deletions
--- a/agbenchmark/reports/internal_info.json
+++ b/agbenchmark/reports/internal_info.json
@ -1,40 +1,72 @@
 {
-  "mini-agi": {
+    "mini-agi": {
-    "TestBasicMemory": [true, true, true],
+        "TestBasicMemory": [
-    "TestBasicRetrieval": [true, true, true],
+            true,
-    "TestCreateSimpleWebServer": [false, false, false],
+            true,
-    "TestDebugSimpleTypoWithGuidance": [
+            true
-      false,
+        ],
-      false,
+        "TestBasicRetrieval": [
-      false,
+            true,
-      false,
+            true,
-      false,
+            true
-      false
+        ],
-    ],
+        "TestCreateSimpleWebServer": [
-    "TestDebugSimpleTypoWithoutGuidance": [false, false, false],
+            false,
-    "TestReadFile": [true, true, true, true],
+            false,
-    "TestRememberMultipleIds": [true, true, true],
+            false
-    "TestRememberMultipleIdsWithNoise": [true, true, true],
+        ],
-    "TestRememberMultiplePhrasesWithNoise": [true, true, true],
+        "TestDebugSimpleTypoWithGuidance": [
-    "TestRetrieval2": [true, true, true],
+            false,
-    "TestRetrieval3": [true, true, true],
+            false,
-    "TestSearch": [true, true, true, true],
+            false
-    "TestWriteFile": [
+        ],
-      true,
+        "TestDebugSimpleTypoWithoutGuidance": [
-      true,
+            false,
-      true,
+            false,
-      false,
+            false
-      false,
+        ],
-      false,
+        "TestReadFile": [
-      false,
+            true,
-      true,
+            true,
-      false,
+            true,
-      true,
+            true,
-      false,
+            true
-      false,
+        ],
-      false,
+        "TestRememberMultipleIds": [
-      false,
+            true,
-      true
+            true,
-    ]
+            true
-  }
+        ],
-}
+        "TestRememberMultipleIdsWithNoise": [
            true,
            true,
            true
        ],
        "TestRememberMultiplePhrasesWithNoise": [
            true,
            true,
            true
        ],
        "TestRetrieval2": [
            true,
            true,
            true
        ],
        "TestRetrieval3": [
            true,
            true,
            true
        ],
        "TestSearch": [
            true,
            true,
            true,
            true
        ],
        "TestWriteFile": [
            true,
            true,
            true
        ]
    }
 }
--- a/agbenchmark/reports/mini-agi/1.1_TestWriteFile.json
+++ b/agbenchmark/reports/mini-agi/1.1_TestWriteFile.json
@ -0,0 +1,36 @@
 {
    "TestWriteFile": {
        "data_path": "agbenchmark/challenges/interface/write_file",
        "is_regression": true,
        "metrics": {
            "difficulty": "interface",
            "success": true,
            "non_mock_success_%": 100.0,
            "run_time": "0.009 seconds"
        }
    },
    "additional": {
        "model": "gpt-3.5-turbo"
    },
    "command": "agbenchmark start --test TestWriteFile",
    "completion_time": "2023-07-17-09:54",
    "config": {
        "workspace": "${os.path.join(Path.home(), 'miniagi')}"
    },
    "metrics": {
        "run_time": "22.36 seconds",
        "highest_difficulty": "interface: 1"
    },
    "tests": {
        "TestWriteFile": {
            "data_path": "agbenchmark/challenges/interface/write_file",
            "is_regression": false,
            "metrics": {
                "difficulty": "interface",
                "success": true,
                "success_%": 40.0,
                "run_time": "22.169 seconds"
            }
        }
    }
 }
--- a/agbenchmark/reports/mini-agi/1_TestWriteFIle.json
+++ b/agbenchmark/reports/mini-agi/1_TestWriteFIle.json
@ -0,0 +1,27 @@
 {
  "command": "agbenchmark start --test TestWriteFile",
  "completion_time": "2023-07-15-22:13",
  "metrics": {
    "run_time": "12.4 seconds",
    "highest_difficulty": "interface: 1"
  },
  "tests": {
    "TestWriteFile": {
      "data_path": "agbenchmark/challenges/interface/write_file",
      "is_regression": false,
      "metrics": {
        "difficulty": "interface",
        "success": true,
        "success_%": 50.0,
        "run_time": "12.127 seconds"
      }
    }
  },
  "config": {
    "workspace": "${os.path.join(Path.home(), 'miniagi')}",
    "entry_path": "agbenchmark.benchmarks"
  },
  "additional": {
    "model": "gpt-4"
  }
 }
--- a/agbenchmark/reports/mini-agi/2.1_TestReadFile.json
+++ b/agbenchmark/reports/mini-agi/2.1_TestReadFile.json
@ -0,0 +1,27 @@
 {
  "command": "agbenchmark start --test TestReadFile",
  "completion_time": "2023-07-17-10:12",
  "metrics": {
    "run_time": "65.27 seconds",
    "highest_difficulty": "interface: 1"
  },
  "tests": {
    "TestReadFile": {
      "data_path": "agbenchmark/challenges/interface/read_file",
      "is_regression": true,
      "metrics": {
        "difficulty": "interface",
        "success": true,
        "success_%": 100.0,
        "run_time": "65.074 seconds"
      }
    }
  },
  "config": {
    "workspace": "${os.path.join(Path.home(), 'miniagi')}"
  },
  "additional": {
    "model": "gpt-4",
    "reached_termination_time": true
  }
 }
--- a/agbenchmark/reports/mini-agi/2_TestReadFile.json
+++ b/agbenchmark/reports/mini-agi/2_TestReadFile.json
@ -0,0 +1,27 @@
 {
  "command": "agbenchmark start --test TestReadFile",
  "completion_time": "2023-07-15-22:13",
  "metrics": {
    "run_time": "31.2 seconds",
    "highest_difficulty": "interface: 1"
  },
  "tests": {
    "TestReadFile": {
      "data_path": "agbenchmark/challenges/interface/read_file",
      "is_regression": true,
      "metrics": {
        "difficulty": "interface",
        "success": true,
        "success_%": 100.0,
        "run_time": "30.903 seconds"
      }
    }
  },
  "config": {
    "workspace": "${os.path.join(Path.home(), 'miniagi')}",
    "entry_path": "agbenchmark.benchmarks"
  },
  "additional": {
    "model": "gpt-4"
  }
 }
--- a/agbenchmark/reports/mini-agi/3_TestSearch.json
+++ b/agbenchmark/reports/mini-agi/3_TestSearch.json
@ -0,0 +1,27 @@
 {
  "command": "agbenchmark start --test TestSearch",
  "completion_time": "2023-07-15-22:14",
  "metrics": {
    "run_time": "16.88 seconds",
    "highest_difficulty": "interface: 1"
  },
  "tests": {
    "TestSearch": {
      "data_path": "agbenchmark/challenges/interface/search",
      "is_regression": true,
      "metrics": {
        "difficulty": "interface",
        "success": true,
        "success_%": 100.0,
        "run_time": "16.572 seconds"
      }
    }
  },
  "config": {
    "workspace": "${os.path.join(Path.home(), 'miniagi')}",
    "entry_path": "agbenchmark.benchmarks"
  },
  "additional": {
    "model": "gpt-4"
  }
 }
--- a/agbenchmark/reports/mini-agi/4.1_TestDebugSimpleTypoWithGuidance.json
+++ b/agbenchmark/reports/mini-agi/4.1_TestDebugSimpleTypoWithGuidance.json
@ -0,0 +1,28 @@
 {
  "command": "agbenchmark start --test TestDebugSimpleTypoWithGuidance",
  "completion_time": "2023-07-15-22:16",
  "metrics": {
    "run_time": "45.92 seconds",
    "highest_difficulty": ": 0"
  },
  "tests": {
    "TestDebugSimpleTypoWithGuidance": {
      "data_path": "agbenchmark/challenges/code/d1",
      "is_regression": false,
      "metrics": {
        "difficulty": "basic",
        "success": false,
        "fail_reason": "assert 1 in [0.0]",
        "success_%": 0.0,
        "run_time": "45.599 seconds"
      }
    }
  },
  "config": {
    "workspace": "${os.path.join(Path.home(), 'miniagi')}",
    "entry_path": "agbenchmark.benchmarks"
  },
  "additional": {
    "model": "gpt-4"
  }
 }
--- a/agbenchmark/reports/mini-agi/4_TestDebugSimpleTypoWithGuidance.json
+++ b/agbenchmark/reports/mini-agi/4_TestDebugSimpleTypoWithGuidance.json
@ -0,0 +1,28 @@
 {
  "command": "agbenchmark start --test TestDebugSimpleTypoWithGuidance",
  "completion_time": "2023-07-15-22:15",
  "metrics": {
    "run_time": "32.99 seconds",
    "highest_difficulty": ": 0"
  },
  "tests": {
    "TestDebugSimpleTypoWithGuidance": {
      "data_path": "agbenchmark/challenges/code/d1",
      "is_regression": false,
      "metrics": {
        "difficulty": "basic",
        "success": false,
        "fail_reason": "assert 1 in [0.0]",
        "success_%": 0.0,
        "run_time": "32.582 seconds"
      }
    }
  },
  "config": {
    "workspace": "${os.path.join(Path.home(), 'miniagi')}",
    "entry_path": "agbenchmark.benchmarks"
  },
  "additional": {
    "model": "gpt-4"
  }
 }
--- a/agbenchmark/reports/mini-agi/file1_07-16-13-07.json
+++ b/agbenchmark/reports/mini-agi/file1_07-16-13-07.json
@ -1,23 +0,0 @@
 {
    "command": "agbenchmark start --test TestWriteFile",
    "completion_time": "2023-07-16-13:07",
    "metrics": {
        "run_time": "13.91 seconds",
        "highest_difficulty": "interface: 1"
    },
    "tests": {
        "TestWriteFile": {
            "data_path": "agbenchmark/challenges/interface/write_file",
            "is_regression": false,
            "metrics": {
                "difficulty": "interface",
                "success": true,
                "success_%": 30.0,
                "run_time": "13.684 seconds"
            }
        }
    },
    "config": {
        "workspace": "${os.path.join(Path.home(), 'miniagi')}"
    }
 }
--- a/agbenchmark/utils.py
+++ b/agbenchmark/utils.py
@ -1,7 +1,9 @@
 # radio charts, logs, helper functions for tests, anything else relevant.
 import glob
 import math
 import os
 import re
 import sys
 from datetime import datetime
 from pathlib import Path
 from typing import Any
@ -17,17 +19,49 @@ HOME_ENV = os.getenv("HOME_ENV")
 def calculate_info_test_path(reports_path: Path) -> str:
    command = sys.argv
    if not reports_path.exists():
        reports_path.mkdir(parents=True, exist_ok=True)
-        return str(
+
-            reports_path / f"file1_{datetime.now().strftime('%m-%d-%H-%M')}.json"
+    json_files = glob.glob(str(reports_path / "*.json"))
-        )
+
-    else:
+    # Default naming scheme
-        json_files = glob.glob(str(reports_path / "*.json"))
+    file_count = len(json_files)
-        file_count = len(json_files)
+    run_name = f"file{file_count + 1}_{datetime.now().strftime('%m-%d-%H-%M')}.json"
-        run_name = f"file{file_count + 1}_{datetime.now().strftime('%m-%d-%H-%M')}.json"
+
-        new_file_path = reports_path / run_name
+    # # If "--test" is in command
-        return str(new_file_path)
+    if "--test" in command:
        test_index = command.index("--test")
        try:
            test_arg = command[test_index + 1]  # Argument after --test
        except IndexError:
            raise ValueError("Expected an argument after --test")
        # Get all files that include the string that is the argument after --test
        related_files = [f for f in json_files if test_arg in f]
        related_file_count = len(related_files)
        # Determine the prefix based on the existing files
        if related_file_count == 0:
            # Try to find the highest prefix number among all files, then increment it
            all_prefix_numbers = []
            for f in json_files:
                number = float(Path(f).stem.split("_")[0])
                all_prefix_numbers.append(math.floor(number))
            max_prefix = max(all_prefix_numbers, default=0)
            print("HEY WE ARE HERE BIG DAWG", max_prefix)
            run_name = f"{max_prefix + 1}_{test_arg}.json"
        else:
            # Take the number from before the _ and add the .{number}
            prefix_str = Path(related_files[0]).stem.rsplit("_", 1)[0].split(".")[0]
            prefix = math.floor(float(prefix_str))
            run_name = f"{prefix}.{related_file_count}_{test_arg}.json"
    print("run_namerun_namerun_name", run_name)
    new_file_path = reports_path / run_name
    return str(new_file_path)
 def replace_backslash(value: Any) -> Any:
--- a/agent/mini-agi
+++ b/agent/mini-agi
@ -1 +1 @@
-Subproject commit bb02bf0d5cdbf045ff145271b78e4b4ee7225011
+Subproject commit 0a9fcd8c3d6352ef42d436cff7b64683a7a7ca2d
		`@ -1 +1 @@`
			`Subproject commit bb02bf0d5cdbf045ff145271b78e4b4ee7225011`				`Subproject commit 0a9fcd8c3d6352ef42d436cff7b64683a7a7ca2d`