On 10/16/2009 11:37 PM, Anthony Liguori wrote:
I already am :-) Stay tuned, I should have a patch later this
afternoon.
Was it a race? (Seriously, sorry I didn't notice a couple of hours ago).
This one is ~5% slower than the "Evil" one, but half the size. Tested
against the comments.json file from the "Evil" parser and with
valgrind too. Does all the funky Unicode stuff too.
Paolo
/*
* An event-based, asynchronous JSON parser.
*
* Copyright (C) 2009 Red Hat Inc.
*
* Authors:
* Paolo Bonzini <address@hidden>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "json.h"
#include <string.h>
#include <stdlib.h>
/* Common character classes. */
#define CASE_XDIGIT \
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': \
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F'
#define CASE_DIGIT \
case '0': case '1': case '2': case '3': case '4': \
case '5': case '6': case '7': case '8': case '9'
/* Helper function to go from \uXXXX-encoded UTF-16 to UTF-8. */
static bool hex_to_utf8 (char *buf, char **dest, char *src)
{
int i, n;
uint8_t *p;
for (i = n = 0; i < 4; i++) {
n <<= 4;
switch (src[i])
{
CASE_DIGIT: n |= src[i] - '0'; break;
CASE_XDIGIT: n |= (src[i] & ~32) - 'A' + 10; break;
default: return false;
}
}
p = (uint8_t *)*dest;
if (n < 128) {
*p++ = n;
} else if (n < 2048) {
*p++ = 0xC0 | (n >> 6);
*p++ = 0x80 | (n & 63);
} else if (n < 0xDC00 || n > 0xDFFF) {
*p++ = 0xE0 | (n >> 12);
*p++ = 0x80 | ((n >> 6) & 63);
*p++ = 0x80 | (n & 63);
} else {
/* Merge with preceding high surrogate. */
if (p - (uint8_t *)buf < 3
|| p[-3] != 0xED
|| p[-2] < 0xA0 || p[-2] > 0xAF) /* 0xD800..0xDBFF */
return false;
n += 0x10000 - 0xDC00;
n |= ((p[-2] & 15) << 16) | ((p[-1] & 63) << 10);
/* Overwrite high surrogate. */
p[-3] = 0xF0 | (n >> 18);
p[-2] = 0x80 | ((n >> 12) & 63);
p[-1] = 0x80 | ((n >> 6) & 63);
*p++ = 0x80 | (n & 63);
}
*dest = (char *)p;
return true;
}
struct json_parser {
struct json_parser_config c;
size_t n, alloc;
char *buf;
size_t sp;
uint32_t state, stack[128];
char start_buffer[4];
};