Skip to content

Solr

SolarParams

Bases: BaseModel

Solar parameters model.

Parameters:

Name Type Description Default
indent str

indent.

'on'
q str

query.

'*:*'
wt str

wt.

'json'
rows int

Number of rows.

10000
sort str

sort.

'id asc'
cursorMark str

cursor mark.

'*'
Source code in stac_generator/plugins/inputs/solr.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
class SolarParams(BaseModel):
    """Solar parameters model."""

    indent: str = Field(
        default="on",
        description="indent.",
    )
    q: str = Field(
        default="*:*",
        description="query.",
    )
    wt: str = Field(
        default="json",
        description="wt.",
    )
    rows: int = Field(
        default=10000,
        description="Number of rows.",
    )
    sort: str = Field(
        default="id asc",
        description="sort.",
    )
    cursorMark: str = Field(
        default="*",
        description="cursor mark.",
    )

SolrConf

Bases: BaseModel

Solar conf.

Parameters:

Name Type Description Default
url str

URL of datastore.

required
params SolarParams

Parameters to pass to solr.

required
extra_terms list[KeyOutputKey]

List of extra attributes.

[]
Source code in stac_generator/plugins/inputs/solr.py
49
50
51
52
53
54
55
56
57
58
59
60
61
class SolrConf(BaseModel):
    """Solar conf."""

    url: str = Field(
        description="URL of datastore.",
    )
    params: SolarParams = Field(
        description="Parameters to pass to solr.",
    )
    extra_terms: list[KeyOutputKey] = Field(
        default=[],
        description="List of extra attributes.",
    )

SolrInput

Bases: Input

Uses a Solr index node for a source for events.

Plugin name: solr

Example Configuration

.. code-block:: yaml

name: solr
conf:
  index_node: url.index-node.ac.uk
  search_params:
    q: "facet: value"
    rows: 10000
Source code in stac_generator/plugins/inputs/solr.py
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
class SolrInput(Input):
    """
    Uses a Solr index node for a source for events.

    **Plugin name:** ``solr``

    Example Configuration:
        .. code-block:: yaml

            name: solr
            conf:
              index_node: url.index-node.ac.uk
              search_params:
                q: "facet: value"
                rows: 10000
    """

    config_class = SolrConf

    def iter_docs(self):
        """
        Core loop to iterate through the Solr response.
        """
        n = 0
        while True:
            try:
                resp = requests.get(self.conf.url, self.conf.params.dict())
            except requests.exceptions.ConnectionError as e:
                LOGGER.error("Failed to establish connection to %s:\n%s", self.conf.url, e)
                sys.exit(1)

            resp = resp.json()
            docs = resp["response"]["docs"]

            # Return the list of files to the for loop and continue paginating
            yield from docs

            n += len(docs)
            LOGGER.info("%s/%s\n", n, resp["response"]["numFound"])
            if not docs:
                LOGGER.error("no docs found")
                break
            LOGGER.info("Next cursormark at position %s", n)

            # Change the search params to get next page.
            self.conf.params.cursorMark = resp["nextCursorMark"]

    def run(self):
        for doc in self.iter_docs():
            uri: str = doc.get("id")

            LOGGER.info("Input processing: %s", uri)

            # transform id to a uri
            # by replacing '.' with '/' up until the filename
            output = {"uri": uri.replace(".", "/", uri.split("|")[0].count(".") - 1)}

            for extra_term in self.conf.extra_terms:
                output[extra_term.output_key] = doc.get(extra_term.key)

            yield output

iter_docs()

Core loop to iterate through the Solr response.

Source code in stac_generator/plugins/inputs/solr.py
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def iter_docs(self):
    """
    Core loop to iterate through the Solr response.
    """
    n = 0
    while True:
        try:
            resp = requests.get(self.conf.url, self.conf.params.dict())
        except requests.exceptions.ConnectionError as e:
            LOGGER.error("Failed to establish connection to %s:\n%s", self.conf.url, e)
            sys.exit(1)

        resp = resp.json()
        docs = resp["response"]["docs"]

        # Return the list of files to the for loop and continue paginating
        yield from docs

        n += len(docs)
        LOGGER.info("%s/%s\n", n, resp["response"]["numFound"])
        if not docs:
            LOGGER.error("no docs found")
            break
        LOGGER.info("Next cursormark at position %s", n)

        # Change the search params to get next page.
        self.conf.params.cursorMark = resp["nextCursorMark"]