Skip to content

Open zip

ZipExtract

Bases: ExtractionMethod

Open a zip file and read inner files

Method name: open_zip

Example configuration

.. code-block:: yaml

- method: open_zip
  inputs:
    input_term: /path/to/a/file
    inner_files:
      - key: hello.txt
        output_key: world

noqa: W605

Source code in extraction_methods/plugins/open_zip.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
class ZipExtract(ExtractionMethod):
    """
    Open a zip file and read inner files

    **Method name:** ``open_zip``

    Example configuration:
        .. code-block:: yaml

            - method: open_zip
              inputs:
                input_term: /path/to/a/file
                inner_files:
                  - key: hello.txt
                    output_key: world

    # noqa: W605
    """

    input_class = ZipInput

    @update_input
    def run(self, body: dict[str, Any]) -> dict[str, Any]:

        # Extract the keys
        with zipfile.ZipFile(self.input.input_term) as z:
            if not self.input.inner_files:
                body[self.input.output_key] = z.read()  # type: ignore[call-arg]

            else:
                output: dict[str, Any] = {}

                for inner_file in self.input.inner_files:
                    output[inner_file.output_key] = z.read(inner_file.key)

                if self.input.output_key:
                    body[self.input.output_key] = output

                else:
                    body |= output

        return body

ZipInput

Bases: Input

Model for Zip Input.

Parameters:

Name Type Description Default
input_term str

term for method to run on.

'$uri'
inner_files list[KeyOutputKey]

list of inner zipped files to be read.

[]
output_key str

key to output to.

''
Source code in extraction_methods/plugins/open_zip.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class ZipInput(Input):
    """
    Model for Zip Input.
    """

    input_term: str = Field(
        default="$uri",
        description="term for method to run on.",
    )
    inner_files: list[KeyOutputKey] = Field(
        default=[],
        description="list of inner zipped files to be read.",
    )
    output_key: str = Field(
        default="",
        description="key to output to.",
    )

    @model_validator(mode="after")
    def check_root_read(self) -> Self:
        if not self.output_key and not self.inner_files:
            raise ValueError("`output_key` required if no `inner_files` defined")
        return self