/** @file json.c * JSON parser in C. * * see specifictions: * - http://www.json.org/ * - http://www.ietf.org/rfc/rfc4627.txt */ #include #include #include #include #include "parser.h" /******************************************************************************/ #define EXPORT static #define JSON_TOKEN_MAX 4000 /** * macro to go through white space. * @see json_parser_feed */ #define JSON_PARSER_EAT_WHITESPACE(par, len, data) \ while(1) { \ PARSER_UNTIL((par)->state, 1, (len)>0); /* wait for data */ \ if(!isspace(*data)) break; /* no more identifier */ \ if(*(data)=='\n') (par)->current_line++; /* found a newline, increment the line counter. */ \ (data)++; (len)--; /* go to next character */ \ } /** * enum for the different types of data that can be represented in JSON. */ enum json_type { JSON_T_STRING, /**< UTF8 encoded string. */ JSON_T_NUMBER, /**< store values as a double. */ JSON_T_OBJECT, /**< unordered name-value pairs (hash table). */ JSON_T_ARRAY, /**< ordered list of any type. */ JSON_T_TRUE, /**< boolean value, turns into a 1 when converted to a number. */ JSON_T_FALSE, /**< boolean value, turns into a 0 when converted to a number. */ JSON_T_NULL /**< special type that has no value. */ }; struct json_parser_stack_entry { enum json_type type; /**< current thing we are processing. can only be array or object. */ char *token; struct json_parser_stack_entry *next; }; /** * holds state used for parsing. */ struct json_parser { void (*cb_xxx)(struct json_parser *); /**< callback for XXX. */ unsigned state; /**< current state. */ unsigned current_line; /**< current line number. */ char tokenbuffer[JSON_TOKEN_MAX]; /**< @todo make this a dynamically allocated array. */ size_t tokenbuffer_ofs; /**< offset into tokenbuffer. */ char num_temp[16]; /**< holds a temporary number. */ size_t num_temp_ofs; /**< offset into num_temp. */ struct json_parser_stack_entry *stack_head; }; static int json_parser_stack_push(struct json_parser *par, enum json_type type, size_t token_len, const char *token) { struct json_parser_stack_entry *ent; ent=calloc(1, sizeof *ent); if(!ent) { return 0; /* failure */ } if(token) { /* equivalent to strndup() */ ent->token=malloc(token_len+1); if(!ent->token) { free(ent); return 0; /* failure */ } memcpy(ent->token, token, token_len); ent->token[token_len]=0; } else { ent->token=NULL; /* NULL means token is not applicable */ } ent->type=type; /* push the entry */ ent->next=par->stack_head; par->stack_head=ent; return 1; /* success */ } static int json_parser_stack_pop(struct json_parser *par) { struct json_parser_stack_entry *ent=par->stack_head; if(!ent) return 0; /* no more entries */ /* unlink from stack */ par->stack_head=ent->next; /* free the entry */ free(ent->token); free(ent); return 1; /* success */ } /** * resets the parser back to default state. */ EXPORT void json_parser_reset(struct json_parser *par) { par->current_line=1; par->state=0; par->tokenbuffer_ofs=0; par->num_temp_ofs=0; while(json_parser_stack_pop(par)) ; /* free all the stack entries. */ } /** * creates a new parser handle. * @return * - NULL on failure. * - valid pointer on success. */ EXPORT struct json_parser *json_parser_create(void) { struct json_parser *ret; ret=calloc(1, sizeof *ret); if(!ret) { perror("calloc()"); return NULL; /* failure */ } json_parser_reset(ret); return ret; } /** * free a parser handle. */ EXPORT void json_parser_destroy(struct json_parser *par) { free(par); } /** * give a non-null terminated buffer to the parser for processing. * * @return * - 0 if there is a parse error. * - 1 if stream is still good. * - 2 if more data is needed for the stream. * @todo finish this function. */ EXPORT int json_parser_feed(struct json_parser *par, size_t len, const char *data) { PARSER_BEGIN(par->state); while(len>0) { JSON_PARSER_EAT_WHITESPACE(par, len, data); PARSER_UNTIL(par->state, 1, len>0); /* wait for data */ if(*data=='"') { /** handle strings */ data++; len--; /* go to next character */ /* look for ending quote */ while(1) { PARSER_UNTIL(par->state, 2, len>0); /* wait for data - more needed to end the string. */ /* look for a terminating ", but only if not in escape mode. */ if(*data=='"') { /* no more data */ data++; len--; /* go to next character */ printf("%u:identifier '%.*s'\n", par->current_line, par->tokenbuffer_ofs, par->tokenbuffer); par->tokenbuffer_ofs=0; /* we're done with the token buffer */ break; } /* enter escape mode */ if(*data=='\\') { char ch; data++; len--; /* go to next character */ PARSER_UNTIL(par->state, 2, len>0); /* wait for data - more needed to end the string. */ if(*data=='n') { ch='\n'; data++; len--; /* go to next character */ } else if(*data=='b') { ch='\b'; data++; len--; /* go to next character */ } else if(*data=='f') { ch='\f'; data++; len--; /* go to next character */ } else if(*data=='r') { ch='\r'; data++; len--; /* go to next character */ } else if(*data=='t') { ch='\t'; data++; len--; /* go to next character */ } else if(*data=='u') { /**< \uXXXX unicode character escapes. */ unsigned long num; par->num_temp_ofs=0; /* read 4 digits, and put into num_temp buffer. leaving room for null. */ data++; len--; /* go to next character */ while(par->num_temp_ofs<4) { PARSER_UNTIL(par->state, 2, len>0); /* wait for data - more needed to end the string. */ if(!isxdigit(*data)) { return 0; /* error */ } PARSER_BUFFER_APPEND(par->num_temp, par->num_temp_ofs, sizeof par->num_temp-1, *data); data++; len--; /* go to next character */ } /* null terminate the num_temp buffer. */ PARSER_BUFFER_APPEND(par->num_temp, par->num_temp_ofs, sizeof par->num_temp, 0); num=strtoul(par->num_temp, 0, 16); /* place into buffer */ ch=num; } else if(*data=='a') { /* not RFC4627. */ ch='\a'; data++; len--; /* go to next character */ } else if(*data=='v') { /* not RFC4627. */ ch='\v'; data++; len--; /* go to next character */ } else if(isdigit(*data)) { /* not RFC4627. support \000 octal escapes. */ unsigned long num; par->num_temp_ofs=0; /* read up to 3 digits, put into num_temp buffer, leaving room for null. */ PARSER_BUFFER_APPEND(par->num_temp, par->num_temp_ofs, sizeof par->num_temp-1, *data); data++; len--; /* go to next character */ PARSER_UNTIL(par->state, 2, len>0); /* wait for data - more needed to end the string. */ if(isdigit(*data)) { PARSER_BUFFER_APPEND(par->num_temp, par->num_temp_ofs, sizeof par->num_temp-1, *data); data++; len--; /* go to next character */ PARSER_UNTIL(par->state, 2, len>0); /* wait for data - more needed to end the string. */ if(isdigit(*data)) { PARSER_BUFFER_APPEND(par->num_temp, par->num_temp_ofs, sizeof par->num_temp-1, *data); data++; len--; /* go to next character */ } } /* null terminate the num_temp buffer. */ PARSER_BUFFER_APPEND(par->num_temp, par->num_temp_ofs, sizeof par->num_temp, 0); num=strtoul(par->num_temp, 0, 8); /* place into buffer */ ch=num; } else { /* anything else quoted with backslash is a literal */ ch=*data; data++; len--; /* go to next character */ } PARSER_BUFFER_APPEND(par->tokenbuffer, par->tokenbuffer_ofs, sizeof par->tokenbuffer, ch); } else { /* normal data */ PARSER_BUFFER_APPEND(par->tokenbuffer, par->tokenbuffer_ofs, sizeof par->tokenbuffer, *data); data++; len--; /* go to next character */ } } } else if(*data=='{') { /** @todo handle objects. */ /** @todo: push an entry onto the stack. */ /** @todo: look for a string using the string code. */ return 0; /* error */ } else if(*data=='t') { /* handle 'true' */ data++; len--; /* go to next character */ PARSER_UNTIL(par->state, 2, len>0); /* wait for data - more needed to end the boolean. */ if(*data!='r') return 0; /* failed to match 'true' */ data++; len--; /* go to next character */ PARSER_UNTIL(par->state, 2, len>0); /* wait for data - more needed to end the boolean. */ if(*data!='u') return 0; /* failed to match 'true' */ data++; len--; /* go to next character */ PARSER_UNTIL(par->state, 2, len>0); /* wait for data - more needed to end the boolean. */ if(*data!='e') return 0; /* failed to match 'true' */ data++; len--; /* go to next character */ printf("%u:boolean 'true'\n", par->current_line); } else if(*data=='f') { /* handle 'false' */ data++; len--; /* go to next character */ PARSER_UNTIL(par->state, 2, len>0); /* wait for data - more needed to end the boolean. */ if(*data!='a') return 0; /* failed to match 'false' */ data++; len--; /* go to next character */ PARSER_UNTIL(par->state, 2, len>0); /* wait for data - more needed to end the boolean. */ if(*data!='l') return 0; /* failed to match 'false' */ data++; len--; /* go to next character */ PARSER_UNTIL(par->state, 2, len>0); /* wait for data - more needed to end the boolean. */ if(*data!='s') return 0; /* failed to match 'false' */ data++; len--; /* go to next character */ PARSER_UNTIL(par->state, 2, len>0); /* wait for data - more needed to end the boolean. */ if(*data!='e') return 0; /* failed to match 'false' */ data++; len--; /* go to next character */ printf("%u:boolean 'false'\n", par->current_line); } else if(*data=='[') { /** @todo handle arrays. */ /** @todo: push an entry onto the stack. */ json_parser_stack_push(par, JSON_T_ARRAY, 0, NULL); printf("%u:push array\n", par->current_line); data++; len--; /* go to next character */ } else if(*data==']') { /* handle end of array. if top stack item isn't an array then this is an error. */ if(par->stack_head && par->stack_head->type==JSON_T_ARRAY) { /* pop an entry off the stack. */ printf("%u:pop array\n", par->current_line); json_parser_stack_pop(par); } else { return 0; /* ']' doesn't match an array on the stack. */ } data++; len--; /* go to next character */ } else if(*data==',') { /** @todo handle arrays and objects. */ /** @todo: look for , and handle it according to what is on the stack. if nothing is on the stack then an error. */ data++; len--; /* go to next character */ } else { /* handle errors. */ return 0; /* error */ } } PARSER_END; return 2; /* more data needed for the stream */ } /** * get the line the parser is currently or has most recently processed. * @return line number of a text file. 0 if nothing has been read yet. */ EXPORT unsigned json_parser_current_line(struct json_parser *par) { return par->current_line; } /** test code */ int main() { struct json_parser *par; char buf[16]; par=json_parser_create(); /* return until EOF or error. */ while(fgets(buf, sizeof buf, stdin)) { switch(json_parser_feed(par, strlen(buf), buf)) { case 0: fprintf(stderr, "PARSE ERROR on line %u!\n", json_parser_current_line(par)); json_parser_destroy(par); return EXIT_FAILURE; case 1: fprintf(stderr, "DATA COMPLETE on line %u\n", json_parser_current_line(par)); break; case 2: fprintf(stderr, "NEED MORE DATA on line %u\n", json_parser_current_line(par)); break; } } json_parser_destroy(par); return 0; /* success */ }