From bcc32cc441d475a9dd5f1cfd04062b8bc5cd024c Mon Sep 17 00:00:00 2001 From: k-boikov <64261260+k-boikov@users.noreply.github.com> Date: Sun, 21 May 2023 04:20:03 +0300 Subject: [PATCH] Fix split_file when overlap = 0, add test (#3599) Co-authored-by: Nicholas Tindle --- autogpt/commands/file_operations.py | 2 +- tests/unit/test_file_operations.py | 39 +++++++++++++++++++++++++---- 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/autogpt/commands/file_operations.py b/autogpt/commands/file_operations.py index 28198ebae..528f8d669 100644 --- a/autogpt/commands/file_operations.py +++ b/autogpt/commands/file_operations.py @@ -131,7 +131,7 @@ def split_file( while start < content_length: end = start + max_length if end + overlap < content_length: - chunk = content[start : end + overlap - 1] + chunk = content[start : end + max(overlap - 1, 0)] else: chunk = content[start:content_length] diff --git a/tests/unit/test_file_operations.py b/tests/unit/test_file_operations.py index fb8300d47..98d4efb77 100644 --- a/tests/unit/test_file_operations.py +++ b/tests/unit/test_file_operations.py @@ -151,12 +151,41 @@ def test_log_operation_with_checksum(config: Config): assert f"log_test: path/to/test #ABCDEF\n" in content +@pytest.mark.parametrize( + "max_length, overlap, content, expected", + [ + ( + 4, + 1, + "abcdefghij", + ["abcd", "defg", "ghij"], + ), + ( + 4, + 0, + "abcdefghijkl", + ["abcd", "efgh", "ijkl"], + ), + ( + 4, + 0, + "abcdefghijklm", + ["abcd", "efgh", "ijkl", "m"], + ), + ( + 4, + 0, + "abcdefghijk", + ["abcd", "efgh", "ijk"], + ), + ], +) # Test splitting a file into chunks -def test_split_file(): - content = "abcdefghij" - chunks = list(file_ops.split_file(content, max_length=4, overlap=1)) - expected = ["abcd", "defg", "ghij"] - assert chunks == expected +def test_split_file(max_length, overlap, content, expected): + assert ( + list(file_ops.split_file(content, max_length=max_length, overlap=overlap)) + == expected + ) def test_read_file(test_file_with_content_path: Path, file_content):