diff --git a/reports/gpt-engineer/folder12_08-01-08-13/report.json b/reports/gpt-engineer/folder12_08-01-08-13/report.json new file mode 100644 index 000000000..0721b62e1 --- /dev/null +++ b/reports/gpt-engineer/folder12_08-01-08-13/report.json @@ -0,0 +1,14 @@ +{ + "command": "agbenchmark start", + "completion_time": "2023-08-01-08:14", + "benchmark_start_time": "2023-08-01-08:13", + "metrics": { + "run_time": "62.54 seconds", + "highest_difficulty": "No successful tests" + }, + "tests": {}, + "config": { + "workspace": "projects/my-new-project/workspace", + "entry_path": "agbenchmark.benchmarks" + } +} \ No newline at end of file diff --git a/reports/gpt-engineer/regression_tests.json b/reports/gpt-engineer/regression_tests.json index 9e26dfeeb..df8f99c37 100644 --- a/reports/gpt-engineer/regression_tests.json +++ b/reports/gpt-engineer/regression_tests.json @@ -1 +1,6 @@ -{} \ No newline at end of file +{ + "TestWriteFile": { + "difficulty": "interface", + "data_path": "agbenchmark/challenges/interface/write_file/data.json" + } +} \ No newline at end of file diff --git a/reports/gpt-engineer/success_rate.json b/reports/gpt-engineer/success_rate.json index dbbc92d6c..573192569 100644 --- a/reports/gpt-engineer/success_rate.json +++ b/reports/gpt-engineer/success_rate.json @@ -3,6 +3,12 @@ false, false ], + "TestAdaptSimpleTypoWithGuidance": [ + false + ], + "TestAdaptTeslaRevenue": [ + false + ], "TestBasicContentGen": [ false, false @@ -15,14 +21,38 @@ false, false ], + "TestDebugMultipleTypo": [ + false + ], "TestDebugSimpleTypoWithGuidance": [ false, false ], + "TestDebugSimpleTypoWithoutGuidance": [ + false + ], + "TestFunctionCodeGeneration": [ + false + ], "TestGoalDivergence": [ false, false ], + "TestGoalLoss_Hard": [ + false + ], + "TestGoalLoss_Medium": [ + false + ], + "TestGoalLoss_Simple": [ + false + ], + "TestGoalLoss_advanced": [ + false + ], + "TestInstructionFollowing": [ + false + ], "TestPlanCreation": [ false, true @@ -31,10 +61,28 @@ false, false ], + "TestRememberMultipleIds": [ + false + ], + "TestRememberMultiplePhrasesWithNoise": [ + false + ], + "TestRememberMultipleWithNoise": [ + false + ], + "TestRetrieval3": [ + false + ], + "TestReturnCode_Modify": [ + false + ], "TestReturnCode_Simple": [ false, false ], + "TestReturnCode_Tests": [ + false + ], "TestReturnCode_Write": [ false, false @@ -55,59 +103,12 @@ false, false ], - "TestWriteFile": [ - true, - true - ], - "TestDebugSimpleTypoWithoutGuidance": [ - false - ], - "TestAdaptSimpleTypoWithGuidance": [ - false - ], - "TestInstructionFollowing": [ - false - ], - "TestGoalLoss_Simple": [ - false - ], - "TestGoalLoss_advanced": [ - false - ], - "TestGoalLoss_Hard": [ - false - ], - "TestGoalLoss_Medium": [ - false - ], - "TestRememberMultipleIds": [ - false - ], - "TestFunctionCodeGeneration": [ - false - ], - "TestReturnCode_Modify": [ - false - ], - "TestDebugMultipleTypo": [ - false - ], - "TestRememberMultipleWithNoise": [ - false - ], - "TestRetrieval3": [ - false - ], - "TestAdaptTeslaRevenue": [ - false - ], "TestThreeSum": [ false ], - "TestReturnCode_Tests": [ - false - ], - "TestRememberMultiplePhrasesWithNoise": [ - false + "TestWriteFile": [ + true, + true, + true ] } \ No newline at end of file