Skip to content

Json file

JsonFileExtract

Bases: ExtractionMethod

Takes an input list of string to extract from the json file.

Method name: json_file

Example configuration

.. code-block:: yaml

- method: json_file
  inputs:
    path: /path/to/file.json
    properties:
      - key: MIP_ERA
        output_key: mip_era
Source code in extraction_methods/plugins/json_file.py
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
class JsonFileExtract(ExtractionMethod):
    """
    Takes an input list of string to extract from the json file.

    **Method name:** ``json_file``

    Example configuration:
        .. code-block:: yaml

            - method: json_file
              inputs:
                path: /path/to/file.json
                properties:
                  - key: MIP_ERA
                    output_key: mip_era
    """

    input_class = JsonFileInput

    def extract_terms(self, path: Path) -> dict[str, Any]:
        """
        Extract terms from JSON file(s) at path.

        :param path: path to file
        :type path: Path

        :return: extracted terms
        :rtype: dict
        """

        try:
            with open(path, "r", encoding="utf-8") as json_file:
                load_out = json.load(json_file)
        except ValueError as error:
            LOGGER.debug("File: %s can't be json loaded: %s", path, error)

        output = {}
        for term in self.input.properties:
            if term.key in load_out:
                output[term.output_key] = load_out[term.key]

        return output

    @update_input
    def run(self, body: dict[str, Any]) -> dict[str, Any]:

        path = Path(self.input.path)

        if path.is_dir():
            output: dict[str, Any] = defaultdict(list)
            for child in path.rglob("*.json"):
                for k, v in self.extract_terms(child):
                    if isinstance(v, list):
                        output[k].extend(v)
                    else:
                        output[k].append(v)

        if path.is_file():
            output = self.extract_terms(path)

        if self.input.output_key:
            body[self.input.output_key] = output
            return body

        return body | output

extract_terms(path)

Extract terms from JSON file(s) at path.

:param path: path to file :type path: Path

:return: extracted terms :rtype: dict

Source code in extraction_methods/plugins/json_file.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def extract_terms(self, path: Path) -> dict[str, Any]:
    """
    Extract terms from JSON file(s) at path.

    :param path: path to file
    :type path: Path

    :return: extracted terms
    :rtype: dict
    """

    try:
        with open(path, "r", encoding="utf-8") as json_file:
            load_out = json.load(json_file)
    except ValueError as error:
        LOGGER.debug("File: %s can't be json loaded: %s", path, error)

    output = {}
    for term in self.input.properties:
        if term.key in load_out:
            output[term.output_key] = load_out[term.key]

    return output

JsonFileInput

Bases: Input

Model for JSON File Input.

Parameters:

Name Type Description Default
path str

Path to directory of JSON files or single JSON file.

required
properties list[KeyOutputKey]

list of properties to extract.

required
output_key str

Key to output to. if none output is merged.

None
Source code in extraction_methods/plugins/json_file.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
class JsonFileInput(Input):
    """
    Model for JSON File Input.
    """

    path: str = Field(
        description="Path to directory of JSON files or single JSON file.",
    )
    properties: list[KeyOutputKey] = Field(
        description="list of properties to extract.",
    )
    output_key: str = Field(
        default=None,
        description="Key to output to. if none output is merged.",
    )