From 77bec1261eedf1f3afaa9b03c14c8a8a08b11164 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 25 Jan 2023 14:14:47 +0100 Subject: [PATCH 1/1] add streaming json writer for JSON output --- .github/workflows/ci-tests.yml | 2 +- docs/develop/Development-Environment.md | 1 + nominatim/api/v1/format.py | 25 ++-- nominatim/utils/json_writer.py | 144 +++++++++++++++++++ test/python/api/test_result_formatting_v1.py | 4 +- test/python/utils/test_json_writer.py | 106 ++++++++++++++ 6 files changed, 266 insertions(+), 16 deletions(-) create mode 100644 nominatim/utils/json_writer.py create mode 100644 test/python/utils/test_json_writer.py diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index a4de7149..e7ba35f6 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -110,7 +110,7 @@ jobs: run: pip3 install falcon sanic sanic-testing sanic-cors starlette - name: Install latest pylint/mypy - run: pip3 install -U pylint mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil types-requests typing-extensions asgi_lifespan sqlalchemy2-stubs + run: pip3 install -U pylint mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil types-requests types-ujson typing-extensions asgi_lifespan sqlalchemy2-stubs - name: PHP linting run: phpcs --report-width=120 . diff --git a/docs/develop/Development-Environment.md b/docs/develop/Development-Environment.md index 0e1bbf61..fc5008c4 100644 --- a/docs/develop/Development-Environment.md +++ b/docs/develop/Development-Environment.md @@ -64,6 +64,7 @@ sudo apt install php-cgi phpunit php-codesniffer \ pip3 install --user behave mkdocs mkdocstrings pytest pylint \ mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil \ + types-ujosn \ sanic-testing httpx asgi-lifespan ``` diff --git a/nominatim/api/v1/format.py b/nominatim/api/v1/format.py index cb2b15a7..116e2ae6 100644 --- a/nominatim/api/v1/format.py +++ b/nominatim/api/v1/format.py @@ -7,12 +7,9 @@ """ Output formatters for API version v1. """ -from typing import Dict, Any -from collections import OrderedDict -import json - from nominatim.api.result_formatting import FormatDispatcher from nominatim.api import StatusResult +from nominatim.utils.json_writer import JsonWriter dispatch = FormatDispatcher() @@ -26,13 +23,15 @@ def _format_status_text(result: StatusResult) -> str: @dispatch.format_func(StatusResult, 'json') def _format_status_json(result: StatusResult) -> str: - out: Dict[str, Any] = OrderedDict() - out['status'] = result.status - out['message'] = result.message - if result.data_updated is not None: - out['data_updated'] = result.data_updated.isoformat() - out['software_version'] = str(result.software_version) - if result.database_version is not None: - out['database_version'] = str(result.database_version) + out = JsonWriter() + + out.start_object()\ + .keyval('status', result.status)\ + .keyval('message', result.message)\ + .keyval_not_none('data_updated', result.data_updated, + lambda v: v.isoformat())\ + .keyval('software_version', str(result.software_version))\ + .keyval_not_none('database_version', result.database_version, str)\ + .end_object() - return json.dumps(out) + return out() diff --git a/nominatim/utils/json_writer.py b/nominatim/utils/json_writer.py new file mode 100644 index 00000000..e2e5b9e6 --- /dev/null +++ b/nominatim/utils/json_writer.py @@ -0,0 +1,144 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2023 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +Streaming JSON encoder. +""" +from typing import Any, TypeVar, Optional, Callable +import io +try: + import ujson as json +except ModuleNotFoundError: + import json # type: ignore[no-redef] + +T = TypeVar('T') # pylint: disable=invalid-name + +class JsonWriter: + """ JSON encoder that renders the output directly into an output + stream. This is a very simple writer which produces JSON in a + compact as possible form. + + The writer does not check for syntactic correctness. It is the + responsibility of the caller to call the write functions in an + order that produces correct JSON. + + All functions return the writer object itself so that function + calls can be chained. + """ + + def __init__(self) -> None: + self.data = io.StringIO() + self.pending = '' + + + def __call__(self) -> str: + """ Return the rendered JSON content as a string. + The writer remains usable after calling this function. + """ + if self.pending: + assert self.pending in (']', '}') + self.data.write(self.pending) + self.pending = '' + return self.data.getvalue() + + + def start_object(self) -> 'JsonWriter': + """ Write the open bracket of a JSON object. + """ + if self.pending: + self.data.write(self.pending) + self.pending = '{' + return self + + + def end_object(self) -> 'JsonWriter': + """ Write the closing bracket of a JSON object. + """ + assert self.pending in (',', '{', '') + if self.pending == '{': + self.data.write(self.pending) + self.pending = '}' + return self + + + def start_array(self) -> 'JsonWriter': + """ Write the opening bracket of a JSON array. + """ + if self.pending: + self.data.write(self.pending) + self.pending = '[' + return self + + + def end_array(self) -> 'JsonWriter': + """ Write the closing bracket of a JSON array. + """ + assert self.pending in (',', '[', '') + if self.pending == '[': + self.data.write(self.pending) + self.pending = ']' + return self + + + def key(self, name: str) -> 'JsonWriter': + """ Write the key string of a JSON object. + """ + assert self.pending + self.data.write(self.pending) + self.data.write(json.dumps(name, ensure_ascii=False)) + self.pending = ':' + return self + + + def value(self, value: Any) -> 'JsonWriter': + """ Write out a value as JSON. The function uses the json.dumps() + function for encoding the JSON. Thus any value that can be + encoded by that function is permissible here. + """ + return self.raw(json.dumps(value, ensure_ascii=False)) + + + def next(self) -> 'JsonWriter': + """ Write out a delimiter comma between JSON object or array elements. + """ + if self.pending: + self.data.write(self.pending) + self.pending = ',' + return self + + + def raw(self, raw_json: str) -> 'JsonWriter': + """ Write out the given value as is. This function is useful if + a value is already available in JSON format. + """ + if self.pending: + self.data.write(self.pending) + self.pending = '' + self.data.write(raw_json) + return self + + + def keyval(self, key: str, value: Any) -> 'JsonWriter': + """ Write out an object element with the given key and value. + This is a shortcut for calling 'key()', 'value()' and 'next()'. + """ + self.key(key) + self.value(value) + return self.next() + + + def keyval_not_none(self, key: str, value: Optional[T], + transform: Optional[Callable[[T], Any]] = None) -> 'JsonWriter': + """ Write out an object element only if the value is not None. + If 'transform' is given, it must be a function that takes the + value type and returns a JSON encodable type. The transform + function will be called before the value is written out. + """ + if value is not None: + self.key(key) + self.value(transform(value) if transform else value) + self.next() + return self diff --git a/test/python/api/test_result_formatting_v1.py b/test/python/api/test_result_formatting_v1.py index 95472916..4a5d5989 100644 --- a/test/python/api/test_result_formatting_v1.py +++ b/test/python/api/test_result_formatting_v1.py @@ -44,7 +44,7 @@ def test_status_format_json_minimal(): result = api_impl.format_result(status, 'json') - assert result == '{"status": 700, "message": "Bad format.", "software_version": "%s"}' % (NOMINATIM_VERSION, ) + assert result == '{"status":700,"message":"Bad format.","software_version":"%s"}' % (NOMINATIM_VERSION, ) def test_status_format_json_full(): @@ -54,4 +54,4 @@ def test_status_format_json_full(): result = api_impl.format_result(status, 'json') - assert result == '{"status": 0, "message": "OK", "data_updated": "2010-02-07T20:20:03+00:00", "software_version": "%s", "database_version": "5.6"}' % (NOMINATIM_VERSION, ) + assert result == '{"status":0,"message":"OK","data_updated":"2010-02-07T20:20:03+00:00","software_version":"%s","database_version":"5.6"}' % (NOMINATIM_VERSION, ) diff --git a/test/python/utils/test_json_writer.py b/test/python/utils/test_json_writer.py new file mode 100644 index 00000000..08a091f4 --- /dev/null +++ b/test/python/utils/test_json_writer.py @@ -0,0 +1,106 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2023 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +Tests for the streaming JSON writer. +""" +import json + +import pytest + +from nominatim.utils.json_writer import JsonWriter + +@pytest.mark.parametrize("inval,outstr", [(None, 'null'), + (True, 'true'), (False, 'false'), + (23, '23'), (0, '0'), (-1.3, '-1.3'), + ('g\nä', '"g\\nä"'), ('"', '"\\\""'), + ({}, '{}'), ([], '[]')]) +def test_simple_value(inval, outstr): + writer = JsonWriter() + writer.value(inval) + + assert writer() == outstr + json.loads(writer()) + + +def test_empty_array(): + writer = JsonWriter().start_array().end_array() + + assert writer() == '[]' + json.loads(writer()) + + +def test_array_with_single_value(): + writer = JsonWriter().start_array().value(None).end_array() + + assert writer() == '[null]' + json.loads(writer()) + + +@pytest.mark.parametrize("invals,outstr", [((1, ), '[1]'), + (('a', 'b'), '["a","b"]')]) +def test_array_with_data(invals, outstr): + writer = JsonWriter() + + writer.start_array() + for ival in invals: + writer.value(ival).next() + writer.end_array() + + assert writer() == outstr + json.loads(writer()) + + +def test_empty_object(): + writer = JsonWriter().start_object().end_object() + + assert writer() == '{}' + json.loads(writer()) + + +def test_object_single_entry(): + writer = JsonWriter()\ + .start_object()\ + .key('something')\ + .value(5)\ + .end_object() + + assert writer() == '{"something":5}' + json.loads(writer()) + +def test_object_many_values(): + writer = JsonWriter()\ + .start_object()\ + .keyval('foo', None)\ + .keyval('bar', {})\ + .keyval('baz', 'b\taz')\ + .end_object() + + assert writer() == '{"foo":null,"bar":{},"baz":"b\\taz"}' + json.loads(writer()) + +def test_object_many_values_without_none(): + writer = JsonWriter()\ + .start_object()\ + .keyval_not_none('foo', 0)\ + .keyval_not_none('bar', None)\ + .keyval_not_none('baz', '')\ + .keyval_not_none('eve', False, + transform = lambda v: 'yes' if v else 'no')\ + .end_object() + + assert writer() == '{"foo":0,"baz":"","eve":"no"}' + json.loads(writer()) + + +def test_raw_output(): + writer = JsonWriter()\ + .start_array()\ + .raw('{ "nicely": "formatted here" }').next()\ + .value(1)\ + .end_array() + + assert writer() == '[{ "nicely": "formatted here" },1]' -- 2.39.5