forked from simdjson/simdjson
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparsedjson.h
More file actions
154 lines (126 loc) · 4.63 KB
/
Copy pathparsedjson.h
File metadata and controls
154 lines (126 loc) · 4.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#ifndef SIMDJSON_PARSEDJSON_H
#define SIMDJSON_PARSEDJSON_H
#include "simdjson/common_defs.h"
#include "simdjson/simdjson.h"
#include <cstring>
#include <iostream>
#define JSON_VALUE_MASK 0xFFFFFFFFFFFFFF
#define DEFAULT_MAX_DEPTH \
1024 // a JSON document with a depth exceeding 1024 is probably de facto
// invalid
namespace simdjson {
/************
* The JSON is parsed to a tape, see the accompanying tape.md file
* for documentation.
***********/
class ParsedJson {
public:
// create a ParsedJson container with zero capacity, call allocate_capacity to
// allocate memory
ParsedJson();
~ParsedJson();
ParsedJson(ParsedJson &&p);
ParsedJson &operator=(ParsedJson &&o);
// if needed, allocate memory so that the object is able to process JSON
// documents having up to len bytes and max_depth "depth"
WARN_UNUSED
bool allocate_capacity(size_t len, size_t max_depth = DEFAULT_MAX_DEPTH);
// returns true if the document parsed was valid
bool is_valid() const;
// return an error code corresponding to the last parsing attempt, see
// simdjson.h will return simdjson::UNITIALIZED if no parsing was attempted
int get_error_code() const;
// return the string equivalent of "get_error_code"
std::string get_error_message() const;
// deallocate memory and set capacity to zero, called automatically by the
// destructor
void deallocate();
// this should be called when parsing (right before writing the tapes)
void init();
// print the json to stdout (should be valid)
// return false if the tape is likely wrong (e.g., you did not parse a valid
// JSON).
WARN_UNUSED
bool print_json(std::ostream &os) const;
WARN_UNUSED
bool dump_raw_tape(std::ostream &os) const;
// all nodes are stored on the tape using a 64-bit word.
//
// strings, double and ints are stored as
// a 64-bit word with a pointer to the actual value
//
//
//
// for objects or arrays, store [ or { at the beginning and } and ] at the
// end. For the openings ([ or {), we annotate them with a reference to the
// location on the tape of the end, and for then closings (} and ]), we
// annotate them with a reference to the location of the opening
//
//
// this should be considered a private function
really_inline void write_tape(uint64_t val, uint8_t c) {
tape[current_loc++] = val | ((static_cast<uint64_t>(c)) << 56);
}
really_inline void write_tape_s64(int64_t i) {
write_tape(0, 'l');
tape[current_loc++] = *(reinterpret_cast<uint64_t *>(&i));
}
really_inline void write_tape_u64(uint64_t i) {
write_tape(0, 'u');
tape[current_loc++] = i;
}
really_inline void write_tape_double(double d) {
write_tape(0, 'd');
static_assert(sizeof(d) == sizeof(tape[current_loc]), "mismatch size");
memcpy(&tape[current_loc++], &d, sizeof(double));
// tape[current_loc++] = *((uint64_t *)&d);
}
really_inline uint32_t get_current_loc() const { return current_loc; }
really_inline void annotate_previous_loc(uint32_t saved_loc, uint64_t val) {
tape[saved_loc] |= val;
}
class InvalidJSON : public std::exception {
const char *what() const throw() { return "JSON document is invalid"; }
};
template <size_t max_depth> class BasicIterator;
using Iterator = BasicIterator<DEFAULT_MAX_DEPTH>;
size_t byte_capacity{0}; // indicates how many bits are meant to be supported
size_t depth_capacity{0}; // how deep we can go
size_t tape_capacity{0};
size_t string_capacity{0};
uint32_t current_loc{0};
uint32_t n_structural_indexes{0};
uint32_t *structural_indexes;
uint64_t *tape;
uint32_t *containing_scope_offset;
#ifdef SIMDJSON_USE_COMPUTED_GOTO
void **ret_address;
#else
char *ret_address;
#endif
uint8_t *string_buf; // should be at least byte_capacity
uint8_t *current_string_buf_loc;
bool valid{false};
int error_code{simdjson::UNITIALIZED};
private:
// we don't want the default constructor to be called
ParsedJson(const ParsedJson &p) =
delete; // we don't want the default constructor to be called
// we don't want the assignment to be called
ParsedJson &operator=(const ParsedJson &o) = delete;
};
// dump bits low to high
inline void dumpbits_always(uint64_t v, const std::string &msg) {
for (uint32_t i = 0; i < 64; i++) {
std::cout << (((v >> static_cast<uint64_t>(i)) & 0x1ULL) ? "1" : "_");
}
std::cout << " " << msg.c_str() << "\n";
}
inline void dumpbits32_always(uint32_t v, const std::string &msg) {
for (uint32_t i = 0; i < 32; i++) {
std::cout << (((v >> i) & 0x1ULL) ? "1" : "_");
}
std::cout << " " << msg.c_str() << "\n";
}
} // namespace simdjson
#endif