[pulseaudio-discuss] [PATCH 1/2] pulse: Add a JSON-parsing library
Tanu Kaskinen
tanuk at iki.fi
Mon May 23 15:01:09 UTC 2016
On Tue, 2016-04-26 at 17:47 +0530, arun at accosted.net wrote:
> From: Arun Raghavan <git at arunraghavan.net>
>
> Adding this to be able to drop dependency on json-c.
>
> Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=95135
> ---
> src/Makefile.am | 8 +
> src/pulse/json.c | 476 ++++++++++++++++++++++++++++++++++++++++++++++++++
> src/pulse/json.h | 57 ++++++
> src/tests/json-test.c | 240 +++++++++++++++++++++++++
> 4 files changed, 781 insertions(+)
> create mode 100644 src/pulse/json.c
> create mode 100644 src/pulse/json.h
> create mode 100644 src/tests/json-test.c
This patch could also add src/json-test to .gitignore.
> diff --git a/src/Makefile.am b/src/Makefile.am
> index b600dfb..9d952fc 100644
> --- a/src/Makefile.am
> +++ b/src/Makefile.am
> @@ -248,6 +248,7 @@ TESTS_default = \
> thread-mainloop-test \
> utf8-test \
> format-test \
> + json-test \
> get-binary-name-test \
> hook-list-test \
> memblock-test \
> @@ -375,6 +376,12 @@ format_test_CFLAGS = $(AM_CFLAGS) $(LIBCHECK_CFLAGS)
> format_test_LDADD = $(AM_LDADD) libpulsecore- at PA_MAJORMINOR@.la libpulse.la libpulsecommon- at PA_MAJORMINOR@.la
> format_test_LDFLAGS = $(AM_LDFLAGS) $(BINLDFLAGS) $(LIBCHECK_LIBS)
>
> +json_test_SOURCES = tests/json-test.c
> +json_test_CFLAGS = $(AM_CFLAGS) $(LIBCHECK_CFLAGS)
> +json_test_LDADD = $(AM_LDADD) libpulse.la libpulsecommon- at PA_MAJORMINOR@.la
> +json_test_LDFLAGS = $(AM_LDFLAGS) $(BINLDFLAGS) $(LIBCHECK_LIBS)
> +
> +srbchannel_test_SOURCES = tests/srbchannel-test.c
> srbchannel_test_SOURCES = tests/srbchannel-test.c
It's probably sufficient to set srbchannel_test_SOURCES just once.
> srbchannel_test_CFLAGS = $(AM_CFLAGS) $(LIBCHECK_CFLAGS)
> srbchannel_test_LDADD = $(AM_LDADD) libpulse.la libpulsecommon- at PA_MAJORMINOR@.la
> @@ -646,6 +653,7 @@ libpulsecommon_ at PA_MAJORMINOR@_la_SOURCES = \
> pulse/client-conf.c pulse/client-conf.h \
> pulse/fork-detect.c pulse/fork-detect.h \
> pulse/format.c pulse/format.h \
> + pulse/json.c pulse/json.h \
> pulse/xmalloc.c pulse/xmalloc.h \
> pulse/proplist.c pulse/proplist.h \
> pulse/utf8.c pulse/utf8.h \
> diff --git a/src/pulse/json.c b/src/pulse/json.c
> new file mode 100644
> index 0000000..0e53902
> --- /dev/null
> +++ b/src/pulse/json.c
> @@ -0,0 +1,476 @@
> +/***
> + This file is part of PulseAudio.
> +
> + Copyright 2016 Arun Raghavan <mail at arunraghavan.net>
> +
> + PulseAudio is free software; you can redistribute it and/or modify
> + it under the terms of the GNU Lesser General Public License as published
> + by the Free Software Foundation; either version 2.1 of the License,
> + or (at your option) any later version.
> +
> + PulseAudio is distributed in the hope that it will be useful, but
> + WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public License
> + along with PulseAudio; if not, see <http://www.gnu.org/licenses/>.
> +***/
> +
> +#include
> +
> +#include
> +#include
> +#include
> +#include
> +
> +struct pa_json_object {
> + PA_REFCNT_DECLARE;
> + pa_json_type type;
> +
> + union {
> + int int_value;
> + double double_value;
> + bool bool_value;
> + char *string_value;
> + pa_hashmap *object_values; /* name -> object */
> + pa_idxset *array_values; /* objects */
> + };
> +};
> +
> +#define JSON_OBJECT_TYPE(o) ((o)->type)
What's the rationale for this macro? Why is using the macro better than
using o->type directly?
> +
> +static const char* parse_value(const char *str, const char *end, pa_json_object **obj);
> +
> +static pa_json_object* json_object_new(void) {
> + pa_json_object *obj;
> +
> + obj = pa_xnew0(pa_json_object, 1);
> +
> + return obj;
> +}
> +
> +static bool is_whitespace(char c) {
> + return c == '\t' || c == '\f' || c == '\r' || c == ' ';
> +}
> +
> +static bool is_digit(char c) {
> + return c >= '0' && c <= '9';
> +}
> +
> +static bool is_end(const char c, const char *end) {
> + if (!end)
> + return c == '\0';
> + else {
> + while (*end) {
> + if (c == *end)
> + return true;
> + end++;
> + }
> + }
> +
> + return false;
> +}
> +
> +static const char* consume_string(const char *str, const char *expect)
> +{
The opening brace got misplaced.
> + while (*expect) {
> + if (*str != *expect)
> + return NULL;
> +
> + str++;
> + expect++;
> + }
> +
> + return str;
> +}
> +
> +static const char* parse_null(const char *str, pa_json_object *obj) {
> + str = consume_string(str, "null");
> +
> + if (str)
> + obj->type = PA_JSON_TYPE_NULL;
> +
> + return str;
> +}
> +
> +static const char* parse_boolean(const char *str, pa_json_object *obj) {
> + const char *tmp;
> +
> + tmp = consume_string(str, "true");
> +
> + if (tmp) {
> + obj->type = PA_JSON_TYPE_BOOL;
> + obj->bool_value = true;
> + } else {
> + tmp = consume_string(str, "false");
> +
> + if (str) {
> + obj->type = PA_JSON_TYPE_BOOL;
> + obj->bool_value = false;
> + }
> + }
> +
> + return tmp;
> +}
> +
> +static const char* parse_string(const char *str, pa_json_object *obj) {
> + pa_strbuf *buf = pa_strbuf_new();
> +
> + str++; /* Consume leading '"' */
> +
> + while (*str != '"') {
> + if (*str != '\\') {
> + /* Normal character, juts consume */
> + pa_strbuf_putc(buf, *str);
Control characters 0x00 <= *str <= 0x1F should cause an error, since
they are disallowed by the JSON spec (0x00 is maybe special, since it
signifies end-of-string, but that should cause an error nevertheless).
Also, JSON data can be UTF-8, UTF-16 or UTF-32 encoded, but this code
doesn't take that into account. If we are fed valid multi-byte
sequences, this code probably can get confused. I feel it would be
safest to only support 7-bit ASCII, and disallow any characters above
0x7F.
> + } else {
> + /* Need to unescape */
> + str++;
> +
> + switch (*str) {
> + case '"':
> + case '\\':
> + case '/':
> + pa_strbuf_putc(buf, *str);
> + break;
> +
> + case 'b':
> + pa_strbuf_putc(buf, '\b' /* backspace */);
> + break;
> +
> + case 'f':
> + pa_strbuf_putc(buf, '\f' /* form feed */);
> + break;
> +
> + case 'n':
> + pa_strbuf_putc(buf, '\n' /* new line */);
> + break;
> +
> + case 'r':
> + pa_strbuf_putc(buf, '\r' /* carriage return */);
> + break;
> +
> + case 't':
> + pa_strbuf_putc(buf, '\t' /* horizontal tab */);
> + break;
> +
> + case 'u':
> + pa_log("Unicode code points are currently unsupported");
> + goto error;
> +
> + default:
> + pa_log("Unexepcted escape value: %c", *str);
> + goto error;
> + }
> + }
> +
> + str++;
> + }
> +
> + if (*str != '"') {
> + pa_log("Failed to parse remainder of string: %s", str);
> + goto error;
> + }
> +
> + str++;
> +
> + obj->type = PA_JSON_TYPE_STRING;
> + obj->string_value = pa_strbuf_to_string_free(buf);
> +
> + return str;
> +
> +error:
> + pa_strbuf_free(buf);
> + return NULL;
> +}
> +
> +static const char* parse_number(const char *str, pa_json_object *obj) {
> + bool negative = false, has_fraction = false, has_exponent = false;
> + unsigned int integer = 0;
> + unsigned int fraction = 0;
> + unsigned int fraction_digits = 0;
> + int exponent = 0;
> +
> + if (*str == '-') {
> + negative = true;
> + str++;
> + }
> +
> + if (*str == '0') {
> + str++;
> + goto fraction;
> + }
> +
> + while (is_digit(*str)) {
> + integer = (integer * 10) + (*str - '0');
> + str++;
> + }
Missing overflow checking (imagine "12341234123412341234123412341234").
> +
> +fraction:
> + if (*str == '.') {
> + has_fraction = true;
> + str++;
> +
> + while (is_digit(*str)) {
> + fraction = (fraction * 10) + (*str - '0');
> + fraction_digits++;
> + str++;
> + }
Missing overflow checking.
> + }
> +
> + if (*str == 'e' || *str == 'E') {
> + bool exponent_negative = false;
> +
> + has_exponent = true;
> + str++;
> +
> + if (*str == '-') {
> + exponent_negative = true;
> + str++;
> + } else if (*str == '+')
> + str++;
> +
> + while (is_digit(*str)) {
> + exponent = (exponent * 10) + (*str - '0');
> + str++;
> + }
Missing overflow checking.
> +
> + if (exponent_negative)
> + exponent *= -1;
> + }
> +
> + if (has_fraction || has_exponent) {
> + obj->type = PA_JSON_TYPE_DOUBLE;
> + obj->double_value =
> + (negative ? -1.0 : 1.0) * (integer + (double) fraction / pow(10, fraction_digits)) * pow(10, exponent);
> + } else {
> + obj->type = PA_JSON_TYPE_INT;
> + obj->int_value = (negative ? -1 : 1) * integer;
> + }
> +
> + return str;
> +}
parse_number() doesn't seem to ever fail. Surely we should check that
we're not fed any random crap that happens to start with "-" or with a
digit? Maybe the idea is that parse_value() will fail if there's any
non-whitespace stuff left after parse_number() returns. In that case, I
think it would be good to have a comment about that in parse_number(),
and at least those cases need to be checked where some mandatory
content is missing (like "-", "1.", "1.e3" or "1e").
> +
> +static const char *parse_object(const char *str, pa_json_object *obj) {
> + pa_json_object *name, *value;
> +
> + obj->object_values = pa_hashmap_new_full(pa_idxset_string_hash_func, pa_idxset_string_compare_func,
> + pa_xfree, (pa_free_cb_t) pa_json_object_unref);
> +
> + while (*str != '}') {
> + str++; /* Consume leading '{' or ',' */
> +
> + str = parse_value(str, ":", &name);
> + if (!str || JSON_OBJECT_TYPE(name) != PA_JSON_TYPE_STRING) {
> + pa_log("Could not parse key for object: %s", str);
str is either NULL or points to after the parsed non-string value. Is
that really what you want to log?
> + goto error;
> + }
> +
> + /* Consume the ':' */
> + str++;
> +
> + str = parse_value(str, ",}", &value);
As we parse nested objects, maybe we should check that we don't recurse
too deep. Consider {"a":{"a":{"a":{"a":{.... I remember libdbus used to
be vulnerable to this kind of an attack that made libdbus allocate so
deep stack that it crashed the application.
> + if (!str) {
> + pa_log("Could not parse value for object: %s", str);
str is NULL, don't log it.
> + goto error;
> + }
> +
> + pa_hashmap_put(obj->object_values, pa_xstrdup(pa_json_object_get_string(name)), value);
> + pa_json_object_unref(name);
> + }
> +
> + /* Drop trailing '}' */
> + str++;
> +
> + /* We now know the value was correctly parsed */
> + obj->type = PA_JSON_TYPE_OBJECT;
> +
> + return str;
> +
> +error:
> + pa_hashmap_free(obj->object_values);
obj->object_values should be set to NULL.
> + return NULL;
> +}
> +
> +static const char *parse_array(const char *str, pa_json_object *obj) {
> + pa_json_object *value;
> +
> + obj->array_values = pa_idxset_new(NULL, NULL);
> +
> + while (*str != ']') {
> + str++; /* Consume leading '[' or ',' */
> +
> + /* Need to chew up whitespaces as a special case to deal with the
> + * possibility of an empty array */
> + while (is_whitespace(*str))
> + str++;
> +
> + if (*str == ']')
> + break;
> +
> + str = parse_value(str, ",]", &value);
> + if (!str) {
> + pa_log("Could not parse value for array: %s", str);
str is NULL, don't log it.
> + goto error;
> + }
> +
> + pa_idxset_put(obj->array_values, value, NULL);
> + }
> +
> + /* Drop trailing ']' */
> + str++;
> +
> + /* We now know the value was correctly parsed */
> + obj->type = PA_JSON_TYPE_ARRAY;
> +
> + return str;
> +
> +error:
> + pa_idxset_free(obj->array_values, (pa_free_cb_t) pa_json_object_unref);
obj->array_values should be set to NULL.
> + return NULL;
> +}
> +
> +typedef enum {
> + JSON_PARSER_STATE_INIT,
> + JSON_PARSER_STATE_FINISH,
> +} json_parser_state;
> +
> +static const char* parse_value(const char *str, const char *end, pa_json_object **obj) {
> + json_parser_state state = JSON_PARSER_STATE_INIT;
> +
> + pa_return_val_if_fail(str != NULL, NULL);
I think a regular assertion would fit better here.
> +
> + *obj = json_object_new();
I think it's better style to assign to obj only after the parsing has
succeeded. Now you're returning a pointer to freed data in case of
failure.
> +
> + while (!is_end(*str, end)) {
> + switch (state) {
> + case JSON_PARSER_STATE_INIT:
> + if (is_whitespace(*str)) {
> + str++;
> + } else if (*str == 'n') {
> + str = parse_null(str, *obj);
> + state = JSON_PARSER_STATE_FINISH;
> + } else if (*str == 't' || *str == 'f') {
> + str = parse_boolean(str, *obj);
> + state = JSON_PARSER_STATE_FINISH;
> + } else if (*str == '"') {
> + str = parse_string(str, *obj);
> + state = JSON_PARSER_STATE_FINISH;
> + } else if (is_digit(*str) || *str == '-') {
> + str = parse_number(str, *obj);
> + state = JSON_PARSER_STATE_FINISH;
> + } else if (*str == '{') {
> + str = parse_object(str, *obj);
> + state = JSON_PARSER_STATE_FINISH;
> + } else if (*str == '[') {
> + str = parse_array(str, *obj);
> + state = JSON_PARSER_STATE_FINISH;
> + }
There's no final "else" branch here. If none of these checks evaluate
to true, that should be an error.
> +
> + if (!str)
> + goto error;
> +
> + break;
> +
> + case JSON_PARSER_STATE_FINISH:
> + /* Consume trailing whitespaces */
> + if (is_whitespace(*str)) {
> + str++;
> + } else {
> + goto error;
> + }
> + }
> + }
> +
> + if (JSON_OBJECT_TYPE(*obj) == PA_JSON_TYPE_INIT) {
> + /* We didn't actually get any data */
> + pa_log("No data while parsing json string: '%s' till '%s'", str, end);
end can be NULL so maybe use pa_strnull()? I don't know if this is
required. I think glibc handles NULL gracefully, but the manual page
for printf appears silent on this, so probably this is undefined
behaviour.
> + goto error;
> + }
> +
> + return str;
> +
> +error:
> + pa_json_object_unref(*obj);
> + return NULL;
> +}
> +
> +
> +pa_json_object* pa_json_parse(const char *str) {
> + pa_json_object *obj;
> +
> + str = parse_value(str, NULL, &obj);
> + pa_assert(*str == '\0');
This will crash on any parse failure...
> +
> + return obj;
> +}
> +
> +pa_json_type pa_json_object_get_type(const pa_json_object *obj) {
> + return JSON_OBJECT_TYPE(obj);
> +}
> +
> +void pa_json_object_unref(pa_json_object *obj) {
> + if (PA_REFCNT_DEC(obj) > 0)
> + return;
> +
> + switch (JSON_OBJECT_TYPE(obj)) {
> + case PA_JSON_TYPE_INIT:
> + case PA_JSON_TYPE_INT:
> + case PA_JSON_TYPE_DOUBLE:
> + case PA_JSON_TYPE_BOOL:
> + case PA_JSON_TYPE_NULL:
> + break;
> +
> + case PA_JSON_TYPE_STRING:
> + pa_xfree(obj->string_value);
> + break;
> +
> + case PA_JSON_TYPE_OBJECT:
> + pa_hashmap_free(obj->object_values);
> + break;
> +
> + case PA_JSON_TYPE_ARRAY:
> + pa_idxset_free(obj->array_values, (pa_free_cb_t) pa_json_object_unref);
> + break;
> +
> + default:
> + pa_assert_not_reached();
> + }
> +
> + pa_xfree(obj);
> +}
> +
> +int pa_json_object_get_int(const pa_json_object *o) {
> + pa_return_val_if_fail(JSON_OBJECT_TYPE(o) == PA_JSON_TYPE_INT, 0);
Is it a programming error to call this function with non-int object? If
yes, a regular assertion should be used. If not, then shouldn't the
caller decide whether to log something or not? (In case you didn't
know, pa_return_val_if_fail() prints "Assertion %s failed..." at debug
log level.)
This same comment applies to the following functions too.
> + return o->int_value;
> +}
> +
> +double pa_json_object_get_double(const pa_json_object *o) {
> + pa_return_val_if_fail(JSON_OBJECT_TYPE(o) == PA_JSON_TYPE_DOUBLE, 0);
> + return o->double_value;
> +}
> +
> +bool pa_json_object_get_bool(const pa_json_object *o) {
> + pa_return_val_if_fail(JSON_OBJECT_TYPE(o) == PA_JSON_TYPE_BOOL, false);
> + return o->bool_value;
> +}
> +
> +const char* pa_json_object_get_string(const pa_json_object *o) {
> + pa_return_val_if_fail(JSON_OBJECT_TYPE(o) == PA_JSON_TYPE_STRING, NULL);
> + return o->string_value;
> +}
> +
> +const pa_json_object* pa_json_object_get_object_member(const pa_json_object *o, const char *name) {
> + pa_return_val_if_fail(JSON_OBJECT_TYPE(o) == PA_JSON_TYPE_OBJECT, NULL);
> + return pa_hashmap_get(o->object_values, name);
> +}
> +
> +int pa_json_object_get_array_length(const pa_json_object *o) {
> + pa_return_val_if_fail(JSON_OBJECT_TYPE(o) == PA_JSON_TYPE_ARRAY, 0);
> + return pa_idxset_size(o->array_values);
> +}
> +
> +const pa_json_object* pa_json_object_get_array_member(const pa_json_object *o, int index) {
> + pa_return_val_if_fail(JSON_OBJECT_TYPE(o) == PA_JSON_TYPE_ARRAY, NULL);
> + return pa_idxset_get_by_index(o->array_values, index);
> +}
> diff --git a/src/pulse/json.h b/src/pulse/json.h
> new file mode 100644
> index 0000000..6eba9a9
> --- /dev/null
> +++ b/src/pulse/json.h
> @@ -0,0 +1,57 @@
> +/***
> + This file is part of PulseAudio.
> +
> + Copyright 2016 Arun Raghavan <mail at arunraghavan.net>
> +
> + PulseAudio is free software; you can redistribute it and/or modify
> + it under the terms of the GNU Lesser General Public License as published
> + by the Free Software Foundation; either version 2.1 of the License,
> + or (at your option) any later version.
> +
> + PulseAudio is distributed in the hope that it will be useful, but
> + WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public License
> + along with PulseAudio; if not, see <http://www.gnu.org/licenses/>.
> +***/
> +
> +#ifdef HAVE_CONFIG_H
> +#include
> +#endif
This belongs in the .c file (if it's needed at all).
> +
> +#include
> +
> +PA_C_DECL_BEGIN
This isn't a public API, so this seems unnecessary.
> diff --git a/src/tests/json-test.c b/src/tests/json-test.c
> new file mode 100644
> index 0000000..9be4dac
> --- /dev/null
> +++ b/src/tests/json-test.c
> +START_TEST(object_test) {
> + pa_json_object *o;
> + const pa_json_object *v;
> +
> + o = pa_json_parse(" { \"name\" : \"A Person\" } ");
> +
> + fail_unless(o != NULL);
> + fail_unless(pa_json_object_get_type(o) == PA_JSON_TYPE_OBJECT);
> +
> + v = pa_json_object_get_object_member(o, "name");
> + fail_unless(v != NULL);
> + fail_unless(pa_json_object_get_type(v) == PA_JSON_TYPE_STRING);
> + fail_unless(pa_streq(pa_json_object_get_string(v), "A Person"));
> +
> + pa_json_object_unref(o);
> +
> + o = pa_json_parse(" { \"age\" : -45.3e-0 } ");
> +
> + fail_unless(o != NULL);
> + fail_unless(pa_json_object_get_type(o) == PA_JSON_TYPE_OBJECT);
> +
> + v = pa_json_object_get_object_member(o, "age");
> + fail_unless(v != NULL);
> + fail_unless(pa_json_object_get_type(v) == PA_JSON_TYPE_DOUBLE);
> + fail_unless(IS_EQUAL(pa_json_object_get_double(v), -45.3));
> +
> + pa_json_object_unref(o);
> +
> + o = pa_json_parse("{\"person\":true}");
> +
> + fail_unless(o != NULL);
> + fail_unless(pa_json_object_get_type(o) == PA_JSON_TYPE_OBJECT);
> +
> + v = pa_json_object_get_object_member(o, "person");
> + fail_unless(v != NULL);
> + fail_unless(pa_json_object_get_type(v) == PA_JSON_TYPE_BOOL);
> + fail_unless(IS_EQUAL(pa_json_object_get_bool(v), true));
IS_EQUAL is only supposed to be used with floating point values, right?
--
Tanu
More information about the pulseaudio-discuss
mailing list