From 4b70e778d265497d1f49cff373229c5517fb23f6 Mon Sep 17 00:00:00 2001 From: Zamil Majdy Date: Wed, 4 Jun 2025 23:32:32 +0700 Subject: [PATCH] feat(backend): Add nested dynamic pin-name support (#10082) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Suppose we have pint with list[list[int]] type, and we want directly insert the a new value inside the first index of the first list e.g: list[0][0] = X through a dynamic pin, this will be translated into list_$_0_$_0, and the system does not currently support this. ### Changes 🏗️ Add support for nested dynamic pins for list, object, and dict. ### Checklist 📋 #### For code changes: - [x] I have clearly listed my changes in the PR description - [x] I have made a test plan - [x] I have tested my changes according to the test plan: - [x] lots of unit tests - [x] Tried inserting the value directly on the `value` nested field on Google Sheets Write block. image --- .../backend/backend/executor/manager.py | 7 + .../backend/backend/executor/utils.py | 294 ++++++++++------- .../test/executor/test_execution_functions.py | 299 +++++++++++++++--- 3 files changed, 443 insertions(+), 157 deletions(-) diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py index 48d14edd5a..acc5bab91d 100644 --- a/autogpt_platform/backend/backend/executor/manager.py +++ b/autogpt_platform/backend/backend/executor/manager.py @@ -305,6 +305,13 @@ def _enqueue_next_nodes( ) def register_next_executions(node_link: Link) -> list[NodeExecutionEntry]: + try: + return _register_next_executions(node_link) + except Exception as e: + log_metadata.exception(f"Failed to register next executions: {e}") + return [] + + def _register_next_executions(node_link: Link) -> list[NodeExecutionEntry]: enqueued_executions = [] next_output_name = node_link.source_name next_input_name = node_link.sink_name diff --git a/autogpt_platform/backend/backend/executor/utils.py b/autogpt_platform/backend/backend/executor/utils.py index 986eec31aa..12e4e8c5ff 100644 --- a/autogpt_platform/backend/backend/executor/utils.py +++ b/autogpt_platform/backend/backend/executor/utils.py @@ -174,68 +174,195 @@ def _is_cost_filter_match(cost_filter: BlockInput, input_data: BlockInput) -> bo # ============ Execution Input Helpers ============ # +# --------------------------------------------------------------------------- # +# Delimiters +# --------------------------------------------------------------------------- # + LIST_SPLIT = "_$_" DICT_SPLIT = "_#_" OBJC_SPLIT = "_@_" +_DELIMS = (LIST_SPLIT, DICT_SPLIT, OBJC_SPLIT) + +# --------------------------------------------------------------------------- # +# Tokenisation utilities +# --------------------------------------------------------------------------- # + + +def _next_delim(s: str) -> tuple[str | None, int]: + """ + Return the *earliest* delimiter appearing in `s` and its index. + + If none present → (None, -1). + """ + first: str | None = None + pos = len(s) # sentinel: larger than any real index + for d in _DELIMS: + i = s.find(d) + if 0 <= i < pos: + first, pos = d, i + return first, (pos if first else -1) + + +def _tokenise(path: str) -> list[tuple[str, str]] | None: + """ + Convert the raw path string (starting with a delimiter) into + [ (delimiter, identifier), … ] or None if the syntax is malformed. + """ + tokens: list[tuple[str, str]] = [] + while path: + # 1. Which delimiter starts this chunk? + delim = next((d for d in _DELIMS if path.startswith(d)), None) + if delim is None: + return None # invalid syntax + + # 2. Slice off the delimiter, then up to the next delimiter (or EOS) + path = path[len(delim) :] + nxt_delim, pos = _next_delim(path) + token, path = ( + path[: pos if pos != -1 else len(path)], + path[pos if pos != -1 else len(path) :], + ) + if token == "": + return None # empty identifier is invalid + tokens.append((delim, token)) + return tokens + + +# --------------------------------------------------------------------------- # +# Public API – parsing (flattened ➜ concrete) +# --------------------------------------------------------------------------- # + def parse_execution_output(output: BlockData, name: str) -> Any | None: """ - Extracts partial output data by name from a given BlockData. + Retrieve a nested value out of `output` using the flattened *name*. - The function supports extracting data from lists, dictionaries, and objects - using specific naming conventions: - - For lists: _$_ - - For dictionaries: _#_ - - For objects: _@_ - - Args: - output (BlockData): A tuple containing the output name and data. - name (str): The name used to extract specific data from the output. - - Returns: - Any | None: The extracted data if found, otherwise None. - - Examples: - >>> output = ("result", [10, 20, 30]) - >>> parse_execution_output(output, "result_$_1") - 20 - - >>> output = ("config", {"key1": "value1", "key2": "value2"}) - >>> parse_execution_output(output, "config_#_key1") - 'value1' - - >>> class Sample: - ... attr1 = "value1" - ... attr2 = "value2" - >>> output = ("object", Sample()) - >>> parse_execution_output(output, "object_@_attr1") - 'value1' + On any failure (wrong name, wrong type, out-of-range, bad path) + returns **None**. """ - output_name, output_data = output + base_name, data = output - if name == output_name: - return output_data + # Exact match → whole object + if name == base_name: + return data - if name.startswith(f"{output_name}{LIST_SPLIT}"): - index = int(name.split(LIST_SPLIT)[1]) - if not isinstance(output_data, list) or len(output_data) <= index: - return None - return output_data[int(name.split(LIST_SPLIT)[1])] + # Must start with the expected name + if not name.startswith(base_name): + return None + path = name[len(base_name) :] + if not path: + return None # nothing left to parse - if name.startswith(f"{output_name}{DICT_SPLIT}"): - index = name.split(DICT_SPLIT)[1] - if not isinstance(output_data, dict) or index not in output_data: - return None - return output_data[index] - - if name.startswith(f"{output_name}{OBJC_SPLIT}"): - index = name.split(OBJC_SPLIT)[1] - if isinstance(output_data, object) and hasattr(output_data, index): - return getattr(output_data, index) + tokens = _tokenise(path) + if tokens is None: return None - return None + cur: Any = data + for delim, ident in tokens: + if delim == LIST_SPLIT: + # list[index] + try: + idx = int(ident) + except ValueError: + return None + if not isinstance(cur, list) or idx >= len(cur): + return None + cur = cur[idx] + + elif delim == DICT_SPLIT: + if not isinstance(cur, dict) or ident not in cur: + return None + cur = cur[ident] + + elif delim == OBJC_SPLIT: + if not hasattr(cur, ident): + return None + cur = getattr(cur, ident) + + else: + return None # unreachable + + return cur + + +def _assign(container: Any, tokens: list[tuple[str, str]], value: Any) -> Any: + """ + Recursive helper that *returns* the (possibly new) container with + `value` assigned along the remaining `tokens` path. + """ + if not tokens: + return value # leaf reached + + delim, ident = tokens[0] + rest = tokens[1:] + + # ---------- list ---------- + if delim == LIST_SPLIT: + try: + idx = int(ident) + except ValueError: + raise ValueError("index must be an integer") + + if container is None: + container = [] + elif not isinstance(container, list): + container = list(container) if hasattr(container, "__iter__") else [] + + while len(container) <= idx: + container.append(None) + container[idx] = _assign(container[idx], rest, value) + return container + + # ---------- dict ---------- + if delim == DICT_SPLIT: + if container is None: + container = {} + elif not isinstance(container, dict): + container = dict(container) if hasattr(container, "items") else {} + container[ident] = _assign(container.get(ident), rest, value) + return container + + # ---------- object ---------- + if delim == OBJC_SPLIT: + if container is None or not isinstance(container, MockObject): + container = MockObject() + setattr( + container, + ident, + _assign(getattr(container, ident, None), rest, value), + ) + return container + + return value # unreachable + + +def merge_execution_input(data: BlockInput) -> BlockInput: + """ + Reconstruct nested objects from a *flattened* dict of key → value. + + Raises ValueError on syntactically invalid list indices. + """ + merged: BlockInput = {} + + for key, value in data.items(): + # Split off the base name (before the first delimiter, if any) + delim, pos = _next_delim(key) + if delim is None: + merged[key] = value + continue + + base, path = key[:pos], key[pos:] + tokens = _tokenise(path) + if tokens is None: + # Invalid key; treat as scalar under the raw name + merged[key] = value + continue + + merged[base] = _assign(merged.get(base), tokens, value) + + data.update(merged) + return data def validate_exec( @@ -292,77 +419,6 @@ def validate_exec( return data, node_block.name -def merge_execution_input(data: BlockInput) -> BlockInput: - """ - Merges dynamic input pins into a single list, dictionary, or object based on naming patterns. - - This function processes input keys that follow specific patterns to merge them into a unified structure: - - `_$_` for list inputs. - - `_#_` for dictionary inputs. - - `_@_` for object inputs. - - Args: - data (BlockInput): A dictionary containing input keys and their corresponding values. - - Returns: - BlockInput: A dictionary with merged inputs. - - Raises: - ValueError: If a list index is not an integer. - - Examples: - >>> data = { - ... "list_$_0": "a", - ... "list_$_1": "b", - ... "dict_#_key1": "value1", - ... "dict_#_key2": "value2", - ... "object_@_attr1": "value1", - ... "object_@_attr2": "value2" - ... } - >>> merge_execution_input(data) - { - "list": ["a", "b"], - "dict": {"key1": "value1", "key2": "value2"}, - "object": - } - """ - - # Merge all input with _$_ into a single list. - items = list(data.items()) - - for key, value in items: - if LIST_SPLIT not in key: - continue - name, index = key.split(LIST_SPLIT) - if not index.isdigit(): - raise ValueError(f"Invalid key: {key}, #{index} index must be an integer.") - - data[name] = data.get(name, []) - if int(index) >= len(data[name]): - # Pad list with empty string on missing indices. - data[name].extend([""] * (int(index) - len(data[name]) + 1)) - data[name][int(index)] = value - - # Merge all input with _#_ into a single dict. - for key, value in items: - if DICT_SPLIT not in key: - continue - name, index = key.split(DICT_SPLIT) - data[name] = data.get(name, {}) - data[name][index] = value - - # Merge all input with _@_ into a single object. - for key, value in items: - if OBJC_SPLIT not in key: - continue - name, index = key.split(OBJC_SPLIT) - if name not in data or not isinstance(data[name], object): - data[name] = MockObject() - setattr(data[name], index, value) - - return data - - def _validate_node_input_credentials( graph: GraphModel, user_id: str, diff --git a/autogpt_platform/backend/test/executor/test_execution_functions.py b/autogpt_platform/backend/test/executor/test_execution_functions.py index 9cdf8430e4..197daa2239 100644 --- a/autogpt_platform/backend/test/executor/test_execution_functions.py +++ b/autogpt_platform/backend/test/executor/test_execution_functions.py @@ -1,55 +1,278 @@ +from typing import cast + +import pytest + from backend.executor.utils import merge_execution_input, parse_execution_output +from backend.util.mock import MockObject def test_parse_execution_output(): - # Test case for list extraction + # Test case for basic output + output = ("result", "value") + assert parse_execution_output(output, "result") == "value" + + # Test case for list output output = ("result", [10, 20, 30]) assert parse_execution_output(output, "result_$_1") == 20 - assert parse_execution_output(output, "result_$_3") is None - # Test case for dictionary extraction - output = ("config", {"key1": "value1", "key2": "value2"}) - assert parse_execution_output(output, "config_#_key1") == "value1" - assert parse_execution_output(output, "config_#_key3") is None + # Test case for dict output + output = ("result", {"key1": "value1", "key2": "value2"}) + assert parse_execution_output(output, "result_#_key1") == "value1" - # Test case for object extraction + # Test case for object output class Sample: - attr1 = "value1" - attr2 = "value2" + def __init__(self): + self.attr1 = "value1" + self.attr2 = "value2" - output = ("object", Sample()) - assert parse_execution_output(output, "object_@_attr1") == "value1" - assert parse_execution_output(output, "object_@_attr3") is None + output = ("result", Sample()) + assert parse_execution_output(output, "result_@_attr1") == "value1" - # Test case for direct match - output = ("direct", "match") - assert parse_execution_output(output, "direct") == "match" - assert parse_execution_output(output, "nomatch") is None + # Test case for nested list output + output = ("result", [[1, 2], [3, 4]]) + assert parse_execution_output(output, "result_$_0_$_1") == 2 + assert parse_execution_output(output, "result_$_1_$_0") == 3 + + # Test case for list containing dict + output = ("result", [{"key1": "value1"}, {"key2": "value2"}]) + assert parse_execution_output(output, "result_$_0_#_key1") == "value1" + assert parse_execution_output(output, "result_$_1_#_key2") == "value2" + + # Test case for dict containing list + output = ("result", {"key1": [1, 2], "key2": [3, 4]}) + assert parse_execution_output(output, "result_#_key1_$_1") == 2 + assert parse_execution_output(output, "result_#_key2_$_0") == 3 + + # Test case for complex nested structure + class NestedSample: + def __init__(self): + self.attr1 = [1, 2] + self.attr2 = {"key": "value"} + + output = ("result", [NestedSample(), {"key": [1, 2]}]) + assert parse_execution_output(output, "result_$_0_@_attr1_$_1") == 2 + assert parse_execution_output(output, "result_$_0_@_attr2_#_key") == "value" + assert parse_execution_output(output, "result_$_1_#_key_$_0") == 1 + + # Test case for non-existent paths + output = ("result", [1, 2, 3]) + assert parse_execution_output(output, "result_$_5") is None + assert parse_execution_output(output, "result_#_key") is None + assert parse_execution_output(output, "result_@_attr") is None + assert parse_execution_output(output, "wrong_name") is None + + # Test cases for delimiter processing order + # Test case 1: List -> Dict -> List + output = ("result", [[{"key": [1, 2]}], [3, 4]]) + assert parse_execution_output(output, "result_$_0_$_0_#_key_$_1") == 2 + + # Test case 2: Dict -> List -> Object + class NestedObj: + def __init__(self): + self.value = "nested" + + output = ("result", {"key": [NestedObj(), 2]}) + assert parse_execution_output(output, "result_#_key_$_0_@_value") == "nested" + + # Test case 3: Object -> List -> Dict + class ParentObj: + def __init__(self): + self.items = [{"nested": "value"}] + + output = ("result", ParentObj()) + assert parse_execution_output(output, "result_@_items_$_0_#_nested") == "value" + + # Test case 4: Complex nested structure with all types + class ComplexObj: + def __init__(self): + self.data = [{"items": [{"value": "deep"}]}] + + output = ("result", {"key": [ComplexObj()]}) + assert ( + parse_execution_output( + output, "result_#_key_$_0_@_data_$_0_#_items_$_0_#_value" + ) + == "deep" + ) + + # Test case 5: Invalid paths that should return None + output = ("result", [{"key": [1, 2]}]) + assert parse_execution_output(output, "result_$_0_#_wrong_key") is None + assert parse_execution_output(output, "result_$_0_#_key_$_5") is None + assert parse_execution_output(output, "result_$_0_@_attr") is None + + # Test case 6: Mixed delimiter types in wrong order + output = ("result", {"key": [1, 2]}) + assert ( + parse_execution_output(output, "result_#_key_$_1_@_attr") is None + ) # Should fail at @_attr + assert ( + parse_execution_output(output, "result_@_attr_$_0_#_key") is None + ) # Should fail at @_attr def test_merge_execution_input(): - # Test case for merging list inputs - data = {"list_$_0": "a", "list_$_1": "b", "list_$_3": "d"} - merged_data = merge_execution_input(data) - assert merged_data["list"] == ["a", "b", "", "d"] + # Test case for basic list extraction + data = { + "list_$_0": "a", + "list_$_1": "b", + } + result = merge_execution_input(data) + assert "list" in result + assert result["list"] == ["a", "b"] - # Test case for merging dictionary inputs - data = {"dict_#_key1": "value1", "dict_#_key2": "value2"} - merged_data = merge_execution_input(data) - assert merged_data["dict"] == {"key1": "value1", "key2": "value2"} + # Test case for basic dict extraction + data = { + "dict_#_key1": "value1", + "dict_#_key2": "value2", + } + result = merge_execution_input(data) + assert "dict" in result + assert result["dict"] == {"key1": "value1", "key2": "value2"} - # Test case for merging object inputs - data = {"object_@_attr1": "value1", "object_@_attr2": "value2"} - merged_data = merge_execution_input(data) - assert hasattr(merged_data["object"], "attr1") - assert hasattr(merged_data["object"], "attr2") - assert merged_data["object"].attr1 == "value1" - assert merged_data["object"].attr2 == "value2" + # Test case for object extraction + class Sample: + def __init__(self): + self.attr1 = None + self.attr2 = None - # Test case for mixed inputs - data = {"list_$_0": "a", "dict_#_key1": "value1", "object_@_attr1": "value1"} - merged_data = merge_execution_input(data) - assert merged_data["list"] == ["a"] - assert merged_data["dict"] == {"key1": "value1"} - assert hasattr(merged_data["object"], "attr1") - assert merged_data["object"].attr1 == "value1" + data = { + "object_@_attr1": "value1", + "object_@_attr2": "value2", + } + result = merge_execution_input(data) + assert "object" in result + assert isinstance(result["object"], MockObject) + assert result["object"].attr1 == "value1" + assert result["object"].attr2 == "value2" + + # Test case for nested list extraction + data = { + "nested_list_$_0_$_0": "a", + "nested_list_$_0_$_1": "b", + "nested_list_$_1_$_0": "c", + } + result = merge_execution_input(data) + assert "nested_list" in result + assert result["nested_list"] == [["a", "b"], ["c"]] + + # Test case for list containing dict + data = { + "list_with_dict_$_0_#_key1": "value1", + "list_with_dict_$_0_#_key2": "value2", + "list_with_dict_$_1_#_key3": "value3", + } + result = merge_execution_input(data) + assert "list_with_dict" in result + assert result["list_with_dict"] == [ + {"key1": "value1", "key2": "value2"}, + {"key3": "value3"}, + ] + + # Test case for dict containing list + data = { + "dict_with_list_#_key1_$_0": "value1", + "dict_with_list_#_key1_$_1": "value2", + "dict_with_list_#_key2_$_0": "value3", + } + result = merge_execution_input(data) + assert "dict_with_list" in result + assert result["dict_with_list"] == { + "key1": ["value1", "value2"], + "key2": ["value3"], + } + + # Test case for complex nested structure + data = { + "complex_$_0_#_key1_$_0": "value1", + "complex_$_0_#_key1_$_1": "value2", + "complex_$_0_#_key2_@_attr1": "value3", + "complex_$_1_#_key3_$_0": "value4", + } + result = merge_execution_input(data) + assert "complex" in result + assert result["complex"][0]["key1"] == ["value1", "value2"] + assert isinstance(result["complex"][0]["key2"], MockObject) + assert result["complex"][0]["key2"].attr1 == "value3" + assert result["complex"][1]["key3"] == ["value4"] + + # Test case for invalid list index + data = {"list_$_invalid": "value"} + with pytest.raises(ValueError, match="index must be an integer"): + merge_execution_input(data) + + # Test cases for delimiter ordering + # Test case 1: List -> Dict -> List + data = { + "nested_$_0_#_key_$_0": "value1", + "nested_$_0_#_key_$_1": "value2", + } + result = merge_execution_input(data) + assert "nested" in result + assert result["nested"][0]["key"] == ["value1", "value2"] + + # Test case 2: Dict -> List -> Object + data = { + "nested_#_key_$_0_@_attr": "value1", + "nested_#_key_$_1_@_attr": "value2", + } + result = merge_execution_input(data) + assert "nested" in result + assert isinstance(result["nested"]["key"][0], MockObject) + assert result["nested"]["key"][0].attr == "value1" + assert result["nested"]["key"][1].attr == "value2" + + # Test case 3: Object -> List -> Dict + data = { + "nested_@_items_$_0_#_key": "value1", + "nested_@_items_$_1_#_key": "value2", + } + result = merge_execution_input(data) + assert "nested" in result + nested = result["nested"] + assert isinstance(nested, MockObject) + items = nested.items + assert isinstance(items, list) + assert items[0]["key"] == "value1" + assert items[1]["key"] == "value2" + + # Test case 4: Complex nested structure with all types + data = { + "deep_#_key_$_0_@_data_$_0_#_items_$_0_#_value": "deep_value", + "deep_#_key_$_0_@_data_$_1_#_items_$_0_#_value": "another_value", + } + result = merge_execution_input(data) + assert "deep" in result + deep_key = result["deep"]["key"][0] + assert deep_key is not None + data0 = getattr(deep_key, "data", None) + assert isinstance(data0, list) + # Check items0 + items0 = None + if len(data0) > 0 and isinstance(data0[0], dict) and "items" in data0[0]: + items0 = data0[0]["items"] + assert isinstance(items0, list) + items0 = cast(list, items0) + assert len(items0) > 0 + assert isinstance(items0[0], dict) + assert items0[0]["value"] == "deep_value" # type: ignore + # Check items1 + items1 = None + if len(data0) > 1 and isinstance(data0[1], dict) and "items" in data0[1]: + items1 = data0[1]["items"] + assert isinstance(items1, list) + items1 = cast(list, items1) + assert len(items1) > 0 + assert isinstance(items1[0], dict) + assert items1[0]["value"] == "another_value" # type: ignore + + # Test case 5: Mixed delimiter types in different orders + # the last one should replace the type + data = { + "mixed_$_0_#_key_@_attr": "value1", # List -> Dict -> Object + "mixed_#_key_$_0_@_attr": "value2", # Dict -> List -> Object + "mixed_@_attr_$_0_#_key": "value3", # Object -> List -> Dict + } + result = merge_execution_input(data) + assert "mixed" in result + assert result["mixed"].attr[0]["key"] == "value3"