Skip to content

Regex rename

RegexOutputKey

Bases: Input

Model for Regex.

Parameters:

Name Type Description Default
regex str

Regex to test against.

required
output_key str

Term for method to output to.

required
Source code in extraction_methods/plugins/regex_rename.py
24
25
26
27
28
29
30
31
32
33
34
class RegexOutputKey(Input):
    """
    Model for Regex.
    """

    regex: str = Field(
        description="Regex to test against.",
    )
    output_key: str = Field(
        description="Term for method to output to.",
    )

RegexRenameExtract

Bases: ExtractionMethod

Takes a list of regex and output key combinations. Any existing properties that full match a regex are rename to the output key. Later regex take precedence.

Method name: regex_rename

Example configuration

.. code-block:: yaml

- method: regex_rename
  inputs:
    regex_swaps:
      - regex: README
        output_key: metadata

noqa: W605

Source code in extraction_methods/plugins/regex_rename.py
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
class RegexRenameExtract(ExtractionMethod):
    """
    Takes a list of regex and output key combinations. Any existing properties
    that full match a regex are rename to the output key.
    Later regex take precedence.

    **Method name:** ``regex_rename``

    Example configuration:
        .. code-block:: yaml

            - method: regex_rename
              inputs:
                regex_swaps:
                  - regex: README
                    output_key: metadata

    # noqa: W605
    """

    input_class = RegexRenameInput

    def matching_keys(self, keys: KeysView[str], key_regex: str) -> list[str]:
        """
        Find all keys that match regex

        :param keys: dictionary keys to test
        :type keys: KeysView
        :param key_regex: regex to test against
        :type key_regex: str

        :return: matching keys
        :rtype: list
        """

        regex = re.compile(key_regex)

        return list(filter(regex.match, keys))

    def find(
        self, body: dict[str, Any], key_parts: list[str]
    ) -> tuple[dict[str, Any], Any]:
        """
        Rename terms

        :param body: current body
        :type body: dict
        :param key_parts: key parts seperated by delimiter
        :type key_parts: list

        :return: dict
        :rtype: update body
        """
        value = None
        for key in self.matching_keys(body.keys(), key_parts[0]):
            if len(key_parts) > 1:
                body[key], value = self.find(body[key], key_parts[1:])

            else:
                value = body[key]
                del body[key]

        return body, value

    def add(
        self, body: dict[str, Any], key_parts: list[str], value: Any
    ) -> dict[str, Any]:
        """
        Rename terms

        :param body: current body
        :type body: dict
        :param key_parts: key parts seperated by delimiter
        :type key_parts: list

        :return: dict
        :rtype: update body
        """
        if len(key_parts) > 1:
            body[key_parts[0]] = self.add(body[key_parts[0]], key_parts[1:], value)

        else:
            body[key_parts[0]] = value

        return body

    @update_input
    def run(self, body: dict[str, Any]) -> dict[str, Any]:

        for swap in self.input.regex_swaps:
            key_parts = (
                swap.regex.split(self.input.delimiter)
                if self.input.delimiter
                else [swap.regex]
            )

            output_key_parts = (
                swap.output_key.split(self.input.delimiter)
                if self.input.delimiter
                else [swap.output_key]
            )

            body, value = self.find(body, key_parts)
            body = self.add(body, output_key_parts, value)

        return body

add(body, key_parts, value)

Rename terms

:param body: current body :type body: dict :param key_parts: key parts seperated by delimiter :type key_parts: list

:return: dict :rtype: update body

Source code in extraction_methods/plugins/regex_rename.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
def add(
    self, body: dict[str, Any], key_parts: list[str], value: Any
) -> dict[str, Any]:
    """
    Rename terms

    :param body: current body
    :type body: dict
    :param key_parts: key parts seperated by delimiter
    :type key_parts: list

    :return: dict
    :rtype: update body
    """
    if len(key_parts) > 1:
        body[key_parts[0]] = self.add(body[key_parts[0]], key_parts[1:], value)

    else:
        body[key_parts[0]] = value

    return body

find(body, key_parts)

Rename terms

:param body: current body :type body: dict :param key_parts: key parts seperated by delimiter :type key_parts: list

:return: dict :rtype: update body

Source code in extraction_methods/plugins/regex_rename.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
def find(
    self, body: dict[str, Any], key_parts: list[str]
) -> tuple[dict[str, Any], Any]:
    """
    Rename terms

    :param body: current body
    :type body: dict
    :param key_parts: key parts seperated by delimiter
    :type key_parts: list

    :return: dict
    :rtype: update body
    """
    value = None
    for key in self.matching_keys(body.keys(), key_parts[0]):
        if len(key_parts) > 1:
            body[key], value = self.find(body[key], key_parts[1:])

        else:
            value = body[key]
            del body[key]

    return body, value

matching_keys(keys, key_regex)

Find all keys that match regex

:param keys: dictionary keys to test :type keys: KeysView :param key_regex: regex to test against :type key_regex: str

:return: matching keys :rtype: list

Source code in extraction_methods/plugins/regex_rename.py
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def matching_keys(self, keys: KeysView[str], key_regex: str) -> list[str]:
    """
    Find all keys that match regex

    :param keys: dictionary keys to test
    :type keys: KeysView
    :param key_regex: regex to test against
    :type key_regex: str

    :return: matching keys
    :rtype: list
    """

    regex = re.compile(key_regex)

    return list(filter(regex.match, keys))

RegexRenameInput

Bases: Input

Model for Regex Rename Input.

Parameters:

Name Type Description Default
regex_swaps list[RegexOutputKey]

Regex and output key combinations.

required
delimiter str

delimiter for nested term.

''
Source code in extraction_methods/plugins/regex_rename.py
37
38
39
40
41
42
43
44
45
46
47
48
class RegexRenameInput(Input):
    """
    Model for Regex Rename Input.
    """

    regex_swaps: list[RegexOutputKey] = Field(
        description="Regex and output key combinations.",
    )
    delimiter: str = Field(
        default="",
        description="delimiter for nested term.",
    )