2022-08-02 14:28:39 +02:00
// __ _____ _____ _____
// __| | __| | | | JSON for Modern C++
// | | |__ | | | | | | version 3.11.1
// |_____|_____|_____|_|___| https://github.com/nlohmann/json
//
// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
// SPDX-License-Identifier: MIT
2020-09-29 13:47:50 +02:00
# pragma once
# include <algorithm> // generate_n
# include <array> // array
# include <cmath> // ldexp
# include <cstddef> // size_t
# include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t
# include <cstdio> // snprintf
# include <cstring> // memcpy
# include <iterator> // back_inserter
# include <limits> // numeric_limits
# include <string> // char_traits, string
# include <utility> // make_pair, move
2021-12-11 23:32:21 +01:00
# include <vector> // vector
2022-08-02 14:28:39 +02:00
# include <map> // map
2020-09-29 13:47:50 +02:00
# include <nlohmann/detail/exceptions.hpp>
# include <nlohmann/detail/input/input_adapters.hpp>
# include <nlohmann/detail/input/json_sax.hpp>
# include <nlohmann/detail/input/lexer.hpp>
# include <nlohmann/detail/macro_scope.hpp>
# include <nlohmann/detail/meta/is_sax.hpp>
2021-12-11 23:32:21 +01:00
# include <nlohmann/detail/meta/type_traits.hpp>
2022-08-02 14:28:39 +02:00
# include <nlohmann/detail/string_concat.hpp>
2020-09-29 13:47:50 +02:00
# include <nlohmann/detail/value_t.hpp>
2022-08-02 14:28:39 +02:00
NLOHMANN_JSON_NAMESPACE_BEGIN
2020-09-29 13:47:50 +02:00
namespace detail
{
/// how to treat CBOR tags
enum class cbor_tag_handler_t
{
2021-12-11 23:32:21 +01:00
error , ///< throw a parse_error exception in case of a tag
ignore , ///< ignore tags
store ///< store tags as binary type
2020-09-29 13:47:50 +02:00
} ;
/*!
@ brief determine system byte order
@ return true if and only if system ' s byte order is little endian
@ note from https : //stackoverflow.com/a/1001328/266378
*/
2022-08-02 14:28:39 +02:00
static inline bool little_endianness ( int num = 1 ) noexcept
2020-09-29 13:47:50 +02:00
{
return * reinterpret_cast < char * > ( & num ) = = 1 ;
}
///////////////////
// binary reader //
///////////////////
/*!
@ brief deserialization of CBOR , MessagePack , and UBJSON values
*/
template < typename BasicJsonType , typename InputAdapterType , typename SAX = json_sax_dom_parser < BasicJsonType > >
class binary_reader
{
using number_integer_t = typename BasicJsonType : : number_integer_t ;
using number_unsigned_t = typename BasicJsonType : : number_unsigned_t ;
using number_float_t = typename BasicJsonType : : number_float_t ;
using string_t = typename BasicJsonType : : string_t ;
using binary_t = typename BasicJsonType : : binary_t ;
using json_sax_t = SAX ;
using char_type = typename InputAdapterType : : char_type ;
using char_int_type = typename std : : char_traits < char_type > : : int_type ;
public :
/*!
@ brief create a binary reader
@ param [ in ] adapter input adapter to read from
*/
2022-08-02 14:28:39 +02:00
explicit binary_reader ( InputAdapterType & & adapter , const input_format_t format = input_format_t : : json ) noexcept : ia ( std : : move ( adapter ) ) , input_format ( format )
2020-09-29 13:47:50 +02:00
{
( void ) detail : : is_sax_static_asserts < SAX , BasicJsonType > { } ;
}
// make class move-only
binary_reader ( const binary_reader & ) = delete ;
2021-12-11 23:32:21 +01:00
binary_reader ( binary_reader & & ) = default ; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
2020-09-29 13:47:50 +02:00
binary_reader & operator = ( const binary_reader & ) = delete ;
2021-12-11 23:32:21 +01:00
binary_reader & operator = ( binary_reader & & ) = default ; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
2020-09-29 13:47:50 +02:00
~ binary_reader ( ) = default ;
/*!
@ param [ in ] format the binary format to parse
@ param [ in ] sax_ a SAX event processor
@ param [ in ] strict whether to expect the input to be consumed completed
@ param [ in ] tag_handler how to treat CBOR tags
2021-12-11 23:32:21 +01:00
@ return whether parsing was successful
2020-09-29 13:47:50 +02:00
*/
JSON_HEDLEY_NON_NULL ( 3 )
bool sax_parse ( const input_format_t format ,
json_sax_t * sax_ ,
const bool strict = true ,
const cbor_tag_handler_t tag_handler = cbor_tag_handler_t : : error )
{
sax = sax_ ;
bool result = false ;
switch ( format )
{
case input_format_t : : bson :
result = parse_bson_internal ( ) ;
break ;
case input_format_t : : cbor :
result = parse_cbor_internal ( true , tag_handler ) ;
break ;
case input_format_t : : msgpack :
result = parse_msgpack_internal ( ) ;
break ;
case input_format_t : : ubjson :
2022-08-02 14:28:39 +02:00
case input_format_t : : bjdata :
2020-09-29 13:47:50 +02:00
result = parse_ubjson_internal ( ) ;
break ;
2021-12-11 23:32:21 +01:00
case input_format_t : : json : // LCOV_EXCL_LINE
2020-09-29 13:47:50 +02:00
default : // LCOV_EXCL_LINE
2021-12-11 23:32:21 +01:00
JSON_ASSERT ( false ) ; // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
2020-09-29 13:47:50 +02:00
}
// strict mode: next byte must be EOF
if ( result & & strict )
{
2022-08-02 14:28:39 +02:00
if ( input_format = = input_format_t : : ubjson | | input_format = = input_format_t : : bjdata )
2020-09-29 13:47:50 +02:00
{
get_ignore_noop ( ) ;
}
else
{
get ( ) ;
}
if ( JSON_HEDLEY_UNLIKELY ( current ! = std : : char_traits < char_type > : : eof ( ) ) )
{
2022-08-02 14:28:39 +02:00
return sax - > parse_error ( chars_read , get_token_string ( ) , parse_error : : create ( 110 , chars_read ,
exception_message ( input_format , concat ( " expected end of input; last byte: 0x " , get_token_string ( ) ) , " value " ) , nullptr ) ) ;
2020-09-29 13:47:50 +02:00
}
}
return result ;
}
private :
//////////
// BSON //
//////////
/*!
@ brief Reads in a BSON - object and passes it to the SAX - parser .
@ return whether a valid BSON - value was passed to the SAX parser
*/
bool parse_bson_internal ( )
{
std : : int32_t document_size { } ;
get_number < std : : int32_t , true > ( input_format_t : : bson , document_size ) ;
2022-08-02 14:28:39 +02:00
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > start_object ( static_cast < std : : size_t > ( - 1 ) ) ) )
2020-09-29 13:47:50 +02:00
{
return false ;
}
if ( JSON_HEDLEY_UNLIKELY ( ! parse_bson_element_list ( /*is_array*/ false ) ) )
{
return false ;
}
return sax - > end_object ( ) ;
}
/*!
@ brief Parses a C - style string from the BSON input .
2021-12-11 23:32:21 +01:00
@ param [ in , out ] result A reference to the string variable where the read
2020-09-29 13:47:50 +02:00
string is to be stored .
@ return ` true ` if the \ x00 - byte indicating the end of the string was
encountered before the EOF ; false ` indicates an unexpected EOF .
*/
bool get_bson_cstr ( string_t & result )
{
auto out = std : : back_inserter ( result ) ;
while ( true )
{
get ( ) ;
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format_t : : bson , " cstring " ) ) )
{
return false ;
}
if ( current = = 0x00 )
{
return true ;
}
* out + + = static_cast < typename string_t : : value_type > ( current ) ;
}
}
/*!
@ brief Parses a zero - terminated string of length @ a len from the BSON
input .
@ param [ in ] len The length ( including the zero - byte at the end ) of the
string to be read .
2021-12-11 23:32:21 +01:00
@ param [ in , out ] result A reference to the string variable where the read
2020-09-29 13:47:50 +02:00
string is to be stored .
@ tparam NumberType The type of the length @ a len
@ pre len > = 1
@ return ` true ` if the string was successfully parsed
*/
template < typename NumberType >
bool get_bson_string ( const NumberType len , string_t & result )
{
if ( JSON_HEDLEY_UNLIKELY ( len < 1 ) )
{
auto last_token = get_token_string ( ) ;
2022-08-02 14:28:39 +02:00
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 112 , chars_read ,
exception_message ( input_format_t : : bson , concat ( " string length must be at least 1, is " , std : : to_string ( len ) ) , " string " ) , nullptr ) ) ;
2020-09-29 13:47:50 +02:00
}
return get_string ( input_format_t : : bson , len - static_cast < NumberType > ( 1 ) , result ) & & get ( ) ! = std : : char_traits < char_type > : : eof ( ) ;
}
/*!
@ brief Parses a byte array input of length @ a len from the BSON input .
@ param [ in ] len The length of the byte array to be read .
2021-12-11 23:32:21 +01:00
@ param [ in , out ] result A reference to the binary variable where the read
2020-09-29 13:47:50 +02:00
array is to be stored .
@ tparam NumberType The type of the length @ a len
@ pre len > = 0
@ return ` true ` if the byte array was successfully parsed
*/
template < typename NumberType >
bool get_bson_binary ( const NumberType len , binary_t & result )
{
if ( JSON_HEDLEY_UNLIKELY ( len < 0 ) )
{
auto last_token = get_token_string ( ) ;
2022-08-02 14:28:39 +02:00
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 112 , chars_read ,
exception_message ( input_format_t : : bson , concat ( " byte array length cannot be negative, is " , std : : to_string ( len ) ) , " binary " ) , nullptr ) ) ;
2020-09-29 13:47:50 +02:00
}
// All BSON binary values have a subtype
std : : uint8_t subtype { } ;
get_number < std : : uint8_t > ( input_format_t : : bson , subtype ) ;
result . set_subtype ( subtype ) ;
return get_binary ( input_format_t : : bson , len , result ) ;
}
/*!
@ brief Read a BSON document element of the given @ a element_type .
@ param [ in ] element_type The BSON element type , c . f . http : //bsonspec.org/spec.html
@ param [ in ] element_type_parse_position The position in the input stream ,
where the ` element_type ` was read .
@ warning Not all BSON element types are supported yet . An unsupported
@ a element_type will give rise to a parse_error .114 :
Unsupported BSON record type 0 x . . .
@ return whether a valid BSON - object / array was passed to the SAX parser
*/
bool parse_bson_element_internal ( const char_int_type element_type ,
const std : : size_t element_type_parse_position )
{
switch ( element_type )
{
case 0x01 : // double
{
double number { } ;
return get_number < double , true > ( input_format_t : : bson , number ) & & sax - > number_float ( static_cast < number_float_t > ( number ) , " " ) ;
}
case 0x02 : // string
{
std : : int32_t len { } ;
string_t value ;
return get_number < std : : int32_t , true > ( input_format_t : : bson , len ) & & get_bson_string ( len , value ) & & sax - > string ( value ) ;
}
case 0x03 : // object
{
return parse_bson_internal ( ) ;
}
case 0x04 : // array
{
return parse_bson_array ( ) ;
}
case 0x05 : // binary
{
std : : int32_t len { } ;
binary_t value ;
return get_number < std : : int32_t , true > ( input_format_t : : bson , len ) & & get_bson_binary ( len , value ) & & sax - > binary ( value ) ;
}
case 0x08 : // boolean
{
return sax - > boolean ( get ( ) ! = 0 ) ;
}
case 0x0A : // null
{
return sax - > null ( ) ;
}
case 0x10 : // int32
{
std : : int32_t value { } ;
return get_number < std : : int32_t , true > ( input_format_t : : bson , value ) & & sax - > number_integer ( value ) ;
}
case 0x12 : // int64
{
std : : int64_t value { } ;
return get_number < std : : int64_t , true > ( input_format_t : : bson , value ) & & sax - > number_integer ( value ) ;
}
default : // anything else not supported (yet)
{
std : : array < char , 3 > cr { { } } ;
2022-08-02 14:28:39 +02:00
static_cast < void > ( ( std : : snprintf ) ( cr . data ( ) , cr . size ( ) , " %.2hhX " , static_cast < unsigned char > ( element_type ) ) ) ; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
std : : string cr_str { cr . data ( ) } ;
return sax - > parse_error ( element_type_parse_position , cr_str ,
parse_error : : create ( 114 , element_type_parse_position , concat ( " Unsupported BSON record type 0x " , cr_str ) , nullptr ) ) ;
2020-09-29 13:47:50 +02:00
}
}
}
/*!
@ brief Read a BSON element list ( as specified in the BSON - spec )
The same binary layout is used for objects and arrays , hence it must be
indicated with the argument @ a is_array which one is expected
( true - - > array , false - - > object ) .
@ param [ in ] is_array Determines if the element list being read is to be
treated as an object ( @ a is_array = = false ) , or as an
array ( @ a is_array = = true ) .
@ return whether a valid BSON - object / array was passed to the SAX parser
*/
bool parse_bson_element_list ( const bool is_array )
{
string_t key ;
while ( auto element_type = get ( ) )
{
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format_t : : bson , " element list " ) ) )
{
return false ;
}
const std : : size_t element_type_parse_position = chars_read ;
if ( JSON_HEDLEY_UNLIKELY ( ! get_bson_cstr ( key ) ) )
{
return false ;
}
if ( ! is_array & & ! sax - > key ( key ) )
{
return false ;
}
if ( JSON_HEDLEY_UNLIKELY ( ! parse_bson_element_internal ( element_type , element_type_parse_position ) ) )
{
return false ;
}
// get_bson_cstr only appends
key . clear ( ) ;
}
return true ;
}
/*!
@ brief Reads an array from the BSON input and passes it to the SAX - parser .
@ return whether a valid BSON - array was passed to the SAX parser
*/
bool parse_bson_array ( )
{
std : : int32_t document_size { } ;
get_number < std : : int32_t , true > ( input_format_t : : bson , document_size ) ;
2022-08-02 14:28:39 +02:00
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > start_array ( static_cast < std : : size_t > ( - 1 ) ) ) )
2020-09-29 13:47:50 +02:00
{
return false ;
}
if ( JSON_HEDLEY_UNLIKELY ( ! parse_bson_element_list ( /*is_array*/ true ) ) )
{
return false ;
}
return sax - > end_array ( ) ;
}
//////////
// CBOR //
//////////
/*!
@ param [ in ] get_char whether a new character should be retrieved from the
input ( true ) or whether the last read character should
be considered instead ( false )
@ param [ in ] tag_handler how CBOR tags should be treated
@ return whether a valid CBOR value was passed to the SAX parser
*/
bool parse_cbor_internal ( const bool get_char ,
const cbor_tag_handler_t tag_handler )
{
switch ( get_char ? get ( ) : current )
{
// EOF
case std : : char_traits < char_type > : : eof ( ) :
return unexpect_eof ( input_format_t : : cbor , " value " ) ;
// Integer 0x00..0x17 (0..23)
case 0x00 :
case 0x01 :
case 0x02 :
case 0x03 :
case 0x04 :
case 0x05 :
case 0x06 :
case 0x07 :
case 0x08 :
case 0x09 :
case 0x0A :
case 0x0B :
case 0x0C :
case 0x0D :
case 0x0E :
case 0x0F :
case 0x10 :
case 0x11 :
case 0x12 :
case 0x13 :
case 0x14 :
case 0x15 :
case 0x16 :
case 0x17 :
return sax - > number_unsigned ( static_cast < number_unsigned_t > ( current ) ) ;
case 0x18 : // Unsigned integer (one-byte uint8_t follows)
{
std : : uint8_t number { } ;
return get_number ( input_format_t : : cbor , number ) & & sax - > number_unsigned ( number ) ;
}
case 0x19 : // Unsigned integer (two-byte uint16_t follows)
{
std : : uint16_t number { } ;
return get_number ( input_format_t : : cbor , number ) & & sax - > number_unsigned ( number ) ;
}
case 0x1A : // Unsigned integer (four-byte uint32_t follows)
{
std : : uint32_t number { } ;
return get_number ( input_format_t : : cbor , number ) & & sax - > number_unsigned ( number ) ;
}
case 0x1B : // Unsigned integer (eight-byte uint64_t follows)
{
std : : uint64_t number { } ;
return get_number ( input_format_t : : cbor , number ) & & sax - > number_unsigned ( number ) ;
}
// Negative integer -1-0x00..-1-0x17 (-1..-24)
case 0x20 :
case 0x21 :
case 0x22 :
case 0x23 :
case 0x24 :
case 0x25 :
case 0x26 :
case 0x27 :
case 0x28 :
case 0x29 :
case 0x2A :
case 0x2B :
case 0x2C :
case 0x2D :
case 0x2E :
case 0x2F :
case 0x30 :
case 0x31 :
case 0x32 :
case 0x33 :
case 0x34 :
case 0x35 :
case 0x36 :
case 0x37 :
return sax - > number_integer ( static_cast < std : : int8_t > ( 0x20 - 1 - current ) ) ;
case 0x38 : // Negative integer (one-byte uint8_t follows)
{
std : : uint8_t number { } ;
return get_number ( input_format_t : : cbor , number ) & & sax - > number_integer ( static_cast < number_integer_t > ( - 1 ) - number ) ;
}
case 0x39 : // Negative integer -1-n (two-byte uint16_t follows)
{
std : : uint16_t number { } ;
return get_number ( input_format_t : : cbor , number ) & & sax - > number_integer ( static_cast < number_integer_t > ( - 1 ) - number ) ;
}
case 0x3A : // Negative integer -1-n (four-byte uint32_t follows)
{
std : : uint32_t number { } ;
return get_number ( input_format_t : : cbor , number ) & & sax - > number_integer ( static_cast < number_integer_t > ( - 1 ) - number ) ;
}
case 0x3B : // Negative integer -1-n (eight-byte uint64_t follows)
{
std : : uint64_t number { } ;
return get_number ( input_format_t : : cbor , number ) & & sax - > number_integer ( static_cast < number_integer_t > ( - 1 )
- static_cast < number_integer_t > ( number ) ) ;
}
// Binary data (0x00..0x17 bytes follow)
case 0x40 :
case 0x41 :
case 0x42 :
case 0x43 :
case 0x44 :
case 0x45 :
case 0x46 :
case 0x47 :
case 0x48 :
case 0x49 :
case 0x4A :
case 0x4B :
case 0x4C :
case 0x4D :
case 0x4E :
case 0x4F :
case 0x50 :
case 0x51 :
case 0x52 :
case 0x53 :
case 0x54 :
case 0x55 :
case 0x56 :
case 0x57 :
case 0x58 : // Binary data (one-byte uint8_t for n follows)
case 0x59 : // Binary data (two-byte uint16_t for n follow)
case 0x5A : // Binary data (four-byte uint32_t for n follow)
case 0x5B : // Binary data (eight-byte uint64_t for n follow)
case 0x5F : // Binary data (indefinite length)
{
binary_t b ;
return get_cbor_binary ( b ) & & sax - > binary ( b ) ;
}
// UTF-8 string (0x00..0x17 bytes follow)
case 0x60 :
case 0x61 :
case 0x62 :
case 0x63 :
case 0x64 :
case 0x65 :
case 0x66 :
case 0x67 :
case 0x68 :
case 0x69 :
case 0x6A :
case 0x6B :
case 0x6C :
case 0x6D :
case 0x6E :
case 0x6F :
case 0x70 :
case 0x71 :
case 0x72 :
case 0x73 :
case 0x74 :
case 0x75 :
case 0x76 :
case 0x77 :
case 0x78 : // UTF-8 string (one-byte uint8_t for n follows)
case 0x79 : // UTF-8 string (two-byte uint16_t for n follow)
case 0x7A : // UTF-8 string (four-byte uint32_t for n follow)
case 0x7B : // UTF-8 string (eight-byte uint64_t for n follow)
case 0x7F : // UTF-8 string (indefinite length)
{
string_t s ;
return get_cbor_string ( s ) & & sax - > string ( s ) ;
}
// array (0x00..0x17 data items follow)
case 0x80 :
case 0x81 :
case 0x82 :
case 0x83 :
case 0x84 :
case 0x85 :
case 0x86 :
case 0x87 :
case 0x88 :
case 0x89 :
case 0x8A :
case 0x8B :
case 0x8C :
case 0x8D :
case 0x8E :
case 0x8F :
case 0x90 :
case 0x91 :
case 0x92 :
case 0x93 :
case 0x94 :
case 0x95 :
case 0x96 :
case 0x97 :
2022-08-02 14:28:39 +02:00
return get_cbor_array (
conditional_static_cast < std : : size_t > ( static_cast < unsigned int > ( current ) & 0x1Fu ) , tag_handler ) ;
2020-09-29 13:47:50 +02:00
case 0x98 : // array (one-byte uint8_t for n follows)
{
std : : uint8_t len { } ;
return get_number ( input_format_t : : cbor , len ) & & get_cbor_array ( static_cast < std : : size_t > ( len ) , tag_handler ) ;
}
case 0x99 : // array (two-byte uint16_t for n follow)
{
std : : uint16_t len { } ;
return get_number ( input_format_t : : cbor , len ) & & get_cbor_array ( static_cast < std : : size_t > ( len ) , tag_handler ) ;
}
case 0x9A : // array (four-byte uint32_t for n follow)
{
std : : uint32_t len { } ;
2022-08-02 14:28:39 +02:00
return get_number ( input_format_t : : cbor , len ) & & get_cbor_array ( conditional_static_cast < std : : size_t > ( len ) , tag_handler ) ;
2020-09-29 13:47:50 +02:00
}
case 0x9B : // array (eight-byte uint64_t for n follow)
{
std : : uint64_t len { } ;
2022-08-02 14:28:39 +02:00
return get_number ( input_format_t : : cbor , len ) & & get_cbor_array ( conditional_static_cast < std : : size_t > ( len ) , tag_handler ) ;
2020-09-29 13:47:50 +02:00
}
case 0x9F : // array (indefinite length)
2022-08-02 14:28:39 +02:00
return get_cbor_array ( static_cast < std : : size_t > ( - 1 ) , tag_handler ) ;
2020-09-29 13:47:50 +02:00
// map (0x00..0x17 pairs of data items follow)
case 0xA0 :
case 0xA1 :
case 0xA2 :
case 0xA3 :
case 0xA4 :
case 0xA5 :
case 0xA6 :
case 0xA7 :
case 0xA8 :
case 0xA9 :
case 0xAA :
case 0xAB :
case 0xAC :
case 0xAD :
case 0xAE :
case 0xAF :
case 0xB0 :
case 0xB1 :
case 0xB2 :
case 0xB3 :
case 0xB4 :
case 0xB5 :
case 0xB6 :
case 0xB7 :
2022-08-02 14:28:39 +02:00
return get_cbor_object ( conditional_static_cast < std : : size_t > ( static_cast < unsigned int > ( current ) & 0x1Fu ) , tag_handler ) ;
2020-09-29 13:47:50 +02:00
case 0xB8 : // map (one-byte uint8_t for n follows)
{
std : : uint8_t len { } ;
return get_number ( input_format_t : : cbor , len ) & & get_cbor_object ( static_cast < std : : size_t > ( len ) , tag_handler ) ;
}
case 0xB9 : // map (two-byte uint16_t for n follow)
{
std : : uint16_t len { } ;
return get_number ( input_format_t : : cbor , len ) & & get_cbor_object ( static_cast < std : : size_t > ( len ) , tag_handler ) ;
}
case 0xBA : // map (four-byte uint32_t for n follow)
{
std : : uint32_t len { } ;
2022-08-02 14:28:39 +02:00
return get_number ( input_format_t : : cbor , len ) & & get_cbor_object ( conditional_static_cast < std : : size_t > ( len ) , tag_handler ) ;
2020-09-29 13:47:50 +02:00
}
case 0xBB : // map (eight-byte uint64_t for n follow)
{
std : : uint64_t len { } ;
2022-08-02 14:28:39 +02:00
return get_number ( input_format_t : : cbor , len ) & & get_cbor_object ( conditional_static_cast < std : : size_t > ( len ) , tag_handler ) ;
2020-09-29 13:47:50 +02:00
}
case 0xBF : // map (indefinite length)
2022-08-02 14:28:39 +02:00
return get_cbor_object ( static_cast < std : : size_t > ( - 1 ) , tag_handler ) ;
2020-09-29 13:47:50 +02:00
case 0xC6 : // tagged item
case 0xC7 :
case 0xC8 :
case 0xC9 :
case 0xCA :
case 0xCB :
case 0xCC :
case 0xCD :
case 0xCE :
case 0xCF :
case 0xD0 :
case 0xD1 :
case 0xD2 :
case 0xD3 :
case 0xD4 :
case 0xD8 : // tagged item (1 bytes follow)
case 0xD9 : // tagged item (2 bytes follow)
case 0xDA : // tagged item (4 bytes follow)
case 0xDB : // tagged item (8 bytes follow)
{
switch ( tag_handler )
{
case cbor_tag_handler_t : : error :
{
auto last_token = get_token_string ( ) ;
2022-08-02 14:28:39 +02:00
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 112 , chars_read ,
exception_message ( input_format_t : : cbor , concat ( " invalid byte: 0x " , last_token ) , " value " ) , nullptr ) ) ;
2020-09-29 13:47:50 +02:00
}
case cbor_tag_handler_t : : ignore :
{
2021-12-11 23:32:21 +01:00
// ignore binary subtype
2020-09-29 13:47:50 +02:00
switch ( current )
{
case 0xD8 :
{
2021-12-11 23:32:21 +01:00
std : : uint8_t subtype_to_ignore { } ;
get_number ( input_format_t : : cbor , subtype_to_ignore ) ;
2020-09-29 13:47:50 +02:00
break ;
}
case 0xD9 :
{
2021-12-11 23:32:21 +01:00
std : : uint16_t subtype_to_ignore { } ;
get_number ( input_format_t : : cbor , subtype_to_ignore ) ;
2020-09-29 13:47:50 +02:00
break ;
}
case 0xDA :
{
2021-12-11 23:32:21 +01:00
std : : uint32_t subtype_to_ignore { } ;
get_number ( input_format_t : : cbor , subtype_to_ignore ) ;
2020-09-29 13:47:50 +02:00
break ;
}
case 0xDB :
{
2021-12-11 23:32:21 +01:00
std : : uint64_t subtype_to_ignore { } ;
get_number ( input_format_t : : cbor , subtype_to_ignore ) ;
2020-09-29 13:47:50 +02:00
break ;
}
default :
break ;
}
return parse_cbor_internal ( true , tag_handler ) ;
}
2021-12-11 23:32:21 +01:00
case cbor_tag_handler_t : : store :
{
binary_t b ;
// use binary subtype and store in binary container
switch ( current )
{
case 0xD8 :
{
std : : uint8_t subtype { } ;
get_number ( input_format_t : : cbor , subtype ) ;
b . set_subtype ( detail : : conditional_static_cast < typename binary_t : : subtype_type > ( subtype ) ) ;
break ;
}
case 0xD9 :
{
std : : uint16_t subtype { } ;
get_number ( input_format_t : : cbor , subtype ) ;
b . set_subtype ( detail : : conditional_static_cast < typename binary_t : : subtype_type > ( subtype ) ) ;
break ;
}
case 0xDA :
{
std : : uint32_t subtype { } ;
get_number ( input_format_t : : cbor , subtype ) ;
b . set_subtype ( detail : : conditional_static_cast < typename binary_t : : subtype_type > ( subtype ) ) ;
break ;
}
case 0xDB :
{
std : : uint64_t subtype { } ;
get_number ( input_format_t : : cbor , subtype ) ;
b . set_subtype ( detail : : conditional_static_cast < typename binary_t : : subtype_type > ( subtype ) ) ;
break ;
}
default :
return parse_cbor_internal ( true , tag_handler ) ;
}
get ( ) ;
return get_cbor_binary ( b ) & & sax - > binary ( b ) ;
}
default : // LCOV_EXCL_LINE
JSON_ASSERT ( false ) ; // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
return false ; // LCOV_EXCL_LINE
2020-09-29 13:47:50 +02:00
}
}
case 0xF4 : // false
return sax - > boolean ( false ) ;
case 0xF5 : // true
return sax - > boolean ( true ) ;
case 0xF6 : // null
return sax - > null ( ) ;
case 0xF9 : // Half-Precision Float (two-byte IEEE 754)
{
const auto byte1_raw = get ( ) ;
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format_t : : cbor , " number " ) ) )
{
return false ;
}
const auto byte2_raw = get ( ) ;
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format_t : : cbor , " number " ) ) )
{
return false ;
}
const auto byte1 = static_cast < unsigned char > ( byte1_raw ) ;
const auto byte2 = static_cast < unsigned char > ( byte2_raw ) ;
// code from RFC 7049, Appendix D, Figure 3:
// As half-precision floating-point numbers were only added
// to IEEE 754 in 2008, today's programming platforms often
// still only have limited support for them. It is very
// easy to include at least decoding support for them even
// without such support. An example of a small decoder for
// half-precision floating-point numbers in the C language
// is shown in Fig. 3.
const auto half = static_cast < unsigned int > ( ( byte1 < < 8u ) + byte2 ) ;
const double val = [ & half ]
{
const int exp = ( half > > 10u ) & 0x1Fu ;
const unsigned int mant = half & 0x3FFu ;
JSON_ASSERT ( 0 < = exp & & exp < = 32 ) ;
JSON_ASSERT ( mant < = 1024 ) ;
switch ( exp )
{
case 0 :
return std : : ldexp ( mant , - 24 ) ;
case 31 :
return ( mant = = 0 )
? std : : numeric_limits < double > : : infinity ( )
: std : : numeric_limits < double > : : quiet_NaN ( ) ;
default :
return std : : ldexp ( mant + 1024 , exp - 25 ) ;
}
} ( ) ;
return sax - > number_float ( ( half & 0x8000u ) ! = 0
? static_cast < number_float_t > ( - val )
: static_cast < number_float_t > ( val ) , " " ) ;
}
case 0xFA : // Single-Precision Float (four-byte IEEE 754)
{
float number { } ;
return get_number ( input_format_t : : cbor , number ) & & sax - > number_float ( static_cast < number_float_t > ( number ) , " " ) ;
}
case 0xFB : // Double-Precision Float (eight-byte IEEE 754)
{
double number { } ;
return get_number ( input_format_t : : cbor , number ) & & sax - > number_float ( static_cast < number_float_t > ( number ) , " " ) ;
}
default : // anything else (0xFF is handled inside the other types)
{
auto last_token = get_token_string ( ) ;
2022-08-02 14:28:39 +02:00
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 112 , chars_read ,
exception_message ( input_format_t : : cbor , concat ( " invalid byte: 0x " , last_token ) , " value " ) , nullptr ) ) ;
2020-09-29 13:47:50 +02:00
}
}
}
/*!
@ brief reads a CBOR string
This function first reads starting bytes to determine the expected
string length and then copies this number of bytes into a string .
Additionally , CBOR ' s strings with indefinite lengths are supported .
@ param [ out ] result created string
@ return whether string creation completed
*/
bool get_cbor_string ( string_t & result )
{
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format_t : : cbor , " string " ) ) )
{
return false ;
}
switch ( current )
{
// UTF-8 string (0x00..0x17 bytes follow)
case 0x60 :
case 0x61 :
case 0x62 :
case 0x63 :
case 0x64 :
case 0x65 :
case 0x66 :
case 0x67 :
case 0x68 :
case 0x69 :
case 0x6A :
case 0x6B :
case 0x6C :
case 0x6D :
case 0x6E :
case 0x6F :
case 0x70 :
case 0x71 :
case 0x72 :
case 0x73 :
case 0x74 :
case 0x75 :
case 0x76 :
case 0x77 :
{
return get_string ( input_format_t : : cbor , static_cast < unsigned int > ( current ) & 0x1Fu , result ) ;
}
case 0x78 : // UTF-8 string (one-byte uint8_t for n follows)
{
std : : uint8_t len { } ;
return get_number ( input_format_t : : cbor , len ) & & get_string ( input_format_t : : cbor , len , result ) ;
}
case 0x79 : // UTF-8 string (two-byte uint16_t for n follow)
{
std : : uint16_t len { } ;
return get_number ( input_format_t : : cbor , len ) & & get_string ( input_format_t : : cbor , len , result ) ;
}
case 0x7A : // UTF-8 string (four-byte uint32_t for n follow)
{
std : : uint32_t len { } ;
return get_number ( input_format_t : : cbor , len ) & & get_string ( input_format_t : : cbor , len , result ) ;
}
case 0x7B : // UTF-8 string (eight-byte uint64_t for n follow)
{
std : : uint64_t len { } ;
return get_number ( input_format_t : : cbor , len ) & & get_string ( input_format_t : : cbor , len , result ) ;
}
case 0x7F : // UTF-8 string (indefinite length)
{
while ( get ( ) ! = 0xFF )
{
string_t chunk ;
if ( ! get_cbor_string ( chunk ) )
{
return false ;
}
result . append ( chunk ) ;
}
return true ;
}
default :
{
auto last_token = get_token_string ( ) ;
2022-08-02 14:28:39 +02:00
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 113 , chars_read ,
exception_message ( input_format_t : : cbor , concat ( " expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x " , last_token ) , " string " ) , nullptr ) ) ;
2020-09-29 13:47:50 +02:00
}
}
}
/*!
@ brief reads a CBOR byte array
This function first reads starting bytes to determine the expected
byte array length and then copies this number of bytes into the byte array .
Additionally , CBOR ' s byte arrays with indefinite lengths are supported .
@ param [ out ] result created byte array
@ return whether byte array creation completed
*/
bool get_cbor_binary ( binary_t & result )
{
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format_t : : cbor , " binary " ) ) )
{
return false ;
}
switch ( current )
{
// Binary data (0x00..0x17 bytes follow)
case 0x40 :
case 0x41 :
case 0x42 :
case 0x43 :
case 0x44 :
case 0x45 :
case 0x46 :
case 0x47 :
case 0x48 :
case 0x49 :
case 0x4A :
case 0x4B :
case 0x4C :
case 0x4D :
case 0x4E :
case 0x4F :
case 0x50 :
case 0x51 :
case 0x52 :
case 0x53 :
case 0x54 :
case 0x55 :
case 0x56 :
case 0x57 :
{
return get_binary ( input_format_t : : cbor , static_cast < unsigned int > ( current ) & 0x1Fu , result ) ;
}
case 0x58 : // Binary data (one-byte uint8_t for n follows)
{
std : : uint8_t len { } ;
return get_number ( input_format_t : : cbor , len ) & &
get_binary ( input_format_t : : cbor , len , result ) ;
}
case 0x59 : // Binary data (two-byte uint16_t for n follow)
{
std : : uint16_t len { } ;
return get_number ( input_format_t : : cbor , len ) & &
get_binary ( input_format_t : : cbor , len , result ) ;
}
case 0x5A : // Binary data (four-byte uint32_t for n follow)
{
std : : uint32_t len { } ;
return get_number ( input_format_t : : cbor , len ) & &
get_binary ( input_format_t : : cbor , len , result ) ;
}
case 0x5B : // Binary data (eight-byte uint64_t for n follow)
{
std : : uint64_t len { } ;
return get_number ( input_format_t : : cbor , len ) & &
get_binary ( input_format_t : : cbor , len , result ) ;
}
case 0x5F : // Binary data (indefinite length)
{
while ( get ( ) ! = 0xFF )
{
binary_t chunk ;
if ( ! get_cbor_binary ( chunk ) )
{
return false ;
}
result . insert ( result . end ( ) , chunk . begin ( ) , chunk . end ( ) ) ;
}
return true ;
}
default :
{
auto last_token = get_token_string ( ) ;
2022-08-02 14:28:39 +02:00
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 113 , chars_read ,
exception_message ( input_format_t : : cbor , concat ( " expected length specification (0x40-0x5B) or indefinite binary array type (0x5F); last byte: 0x " , last_token ) , " binary " ) , nullptr ) ) ;
2020-09-29 13:47:50 +02:00
}
}
}
/*!
2022-08-02 14:28:39 +02:00
@ param [ in ] len the length of the array or static_cast < std : : size_t > ( - 1 ) for an
2020-09-29 13:47:50 +02:00
array of indefinite size
@ param [ in ] tag_handler how CBOR tags should be treated
@ return whether array creation completed
*/
bool get_cbor_array ( const std : : size_t len ,
const cbor_tag_handler_t tag_handler )
{
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > start_array ( len ) ) )
{
return false ;
}
2022-08-02 14:28:39 +02:00
if ( len ! = static_cast < std : : size_t > ( - 1 ) )
2020-09-29 13:47:50 +02:00
{
for ( std : : size_t i = 0 ; i < len ; + + i )
{
if ( JSON_HEDLEY_UNLIKELY ( ! parse_cbor_internal ( true , tag_handler ) ) )
{
return false ;
}
}
}
else
{
while ( get ( ) ! = 0xFF )
{
if ( JSON_HEDLEY_UNLIKELY ( ! parse_cbor_internal ( false , tag_handler ) ) )
{
return false ;
}
}
}
return sax - > end_array ( ) ;
}
/*!
2022-08-02 14:28:39 +02:00
@ param [ in ] len the length of the object or static_cast < std : : size_t > ( - 1 ) for an
2020-09-29 13:47:50 +02:00
object of indefinite size
@ param [ in ] tag_handler how CBOR tags should be treated
@ return whether object creation completed
*/
bool get_cbor_object ( const std : : size_t len ,
const cbor_tag_handler_t tag_handler )
{
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > start_object ( len ) ) )
{
return false ;
}
2021-12-11 23:32:21 +01:00
if ( len ! = 0 )
2020-09-29 13:47:50 +02:00
{
2021-12-11 23:32:21 +01:00
string_t key ;
2022-08-02 14:28:39 +02:00
if ( len ! = static_cast < std : : size_t > ( - 1 ) )
2020-09-29 13:47:50 +02:00
{
2021-12-11 23:32:21 +01:00
for ( std : : size_t i = 0 ; i < len ; + + i )
2020-09-29 13:47:50 +02:00
{
2021-12-11 23:32:21 +01:00
get ( ) ;
if ( JSON_HEDLEY_UNLIKELY ( ! get_cbor_string ( key ) | | ! sax - > key ( key ) ) )
{
return false ;
}
2020-09-29 13:47:50 +02:00
2021-12-11 23:32:21 +01:00
if ( JSON_HEDLEY_UNLIKELY ( ! parse_cbor_internal ( true , tag_handler ) ) )
{
return false ;
}
key . clear ( ) ;
2020-09-29 13:47:50 +02:00
}
}
2021-12-11 23:32:21 +01:00
else
2020-09-29 13:47:50 +02:00
{
2021-12-11 23:32:21 +01:00
while ( get ( ) ! = 0xFF )
2020-09-29 13:47:50 +02:00
{
2021-12-11 23:32:21 +01:00
if ( JSON_HEDLEY_UNLIKELY ( ! get_cbor_string ( key ) | | ! sax - > key ( key ) ) )
{
return false ;
}
2020-09-29 13:47:50 +02:00
2021-12-11 23:32:21 +01:00
if ( JSON_HEDLEY_UNLIKELY ( ! parse_cbor_internal ( true , tag_handler ) ) )
{
return false ;
}
key . clear ( ) ;
2020-09-29 13:47:50 +02:00
}
}
}
return sax - > end_object ( ) ;
}
/////////////
// MsgPack //
/////////////
/*!
@ return whether a valid MessagePack value was passed to the SAX parser
*/
bool parse_msgpack_internal ( )
{
switch ( get ( ) )
{
// EOF
case std : : char_traits < char_type > : : eof ( ) :
return unexpect_eof ( input_format_t : : msgpack , " value " ) ;
// positive fixint
case 0x00 :
case 0x01 :
case 0x02 :
case 0x03 :
case 0x04 :
case 0x05 :
case 0x06 :
case 0x07 :
case 0x08 :
case 0x09 :
case 0x0A :
case 0x0B :
case 0x0C :
case 0x0D :
case 0x0E :
case 0x0F :
case 0x10 :
case 0x11 :
case 0x12 :
case 0x13 :
case 0x14 :
case 0x15 :
case 0x16 :
case 0x17 :
case 0x18 :
case 0x19 :
case 0x1A :
case 0x1B :
case 0x1C :
case 0x1D :
case 0x1E :
case 0x1F :
case 0x20 :
case 0x21 :
case 0x22 :
case 0x23 :
case 0x24 :
case 0x25 :
case 0x26 :
case 0x27 :
case 0x28 :
case 0x29 :
case 0x2A :
case 0x2B :
case 0x2C :
case 0x2D :
case 0x2E :
case 0x2F :
case 0x30 :
case 0x31 :
case 0x32 :
case 0x33 :
case 0x34 :
case 0x35 :
case 0x36 :
case 0x37 :
case 0x38 :
case 0x39 :
case 0x3A :
case 0x3B :
case 0x3C :
case 0x3D :
case 0x3E :
case 0x3F :
case 0x40 :
case 0x41 :
case 0x42 :
case 0x43 :
case 0x44 :
case 0x45 :
case 0x46 :
case 0x47 :
case 0x48 :
case 0x49 :
case 0x4A :
case 0x4B :
case 0x4C :
case 0x4D :
case 0x4E :
case 0x4F :
case 0x50 :
case 0x51 :
case 0x52 :
case 0x53 :
case 0x54 :
case 0x55 :
case 0x56 :
case 0x57 :
case 0x58 :
case 0x59 :
case 0x5A :
case 0x5B :
case 0x5C :
case 0x5D :
case 0x5E :
case 0x5F :
case 0x60 :
case 0x61 :
case 0x62 :
case 0x63 :
case 0x64 :
case 0x65 :
case 0x66 :
case 0x67 :
case 0x68 :
case 0x69 :
case 0x6A :
case 0x6B :
case 0x6C :
case 0x6D :
case 0x6E :
case 0x6F :
case 0x70 :
case 0x71 :
case 0x72 :
case 0x73 :
case 0x74 :
case 0x75 :
case 0x76 :
case 0x77 :
case 0x78 :
case 0x79 :
case 0x7A :
case 0x7B :
case 0x7C :
case 0x7D :
case 0x7E :
case 0x7F :
return sax - > number_unsigned ( static_cast < number_unsigned_t > ( current ) ) ;
// fixmap
case 0x80 :
case 0x81 :
case 0x82 :
case 0x83 :
case 0x84 :
case 0x85 :
case 0x86 :
case 0x87 :
case 0x88 :
case 0x89 :
case 0x8A :
case 0x8B :
case 0x8C :
case 0x8D :
case 0x8E :
case 0x8F :
2022-08-02 14:28:39 +02:00
return get_msgpack_object ( conditional_static_cast < std : : size_t > ( static_cast < unsigned int > ( current ) & 0x0Fu ) ) ;
2020-09-29 13:47:50 +02:00
// fixarray
case 0x90 :
case 0x91 :
case 0x92 :
case 0x93 :
case 0x94 :
case 0x95 :
case 0x96 :
case 0x97 :
case 0x98 :
case 0x99 :
case 0x9A :
case 0x9B :
case 0x9C :
case 0x9D :
case 0x9E :
case 0x9F :
2022-08-02 14:28:39 +02:00
return get_msgpack_array ( conditional_static_cast < std : : size_t > ( static_cast < unsigned int > ( current ) & 0x0Fu ) ) ;
2020-09-29 13:47:50 +02:00
// fixstr
case 0xA0 :
case 0xA1 :
case 0xA2 :
case 0xA3 :
case 0xA4 :
case 0xA5 :
case 0xA6 :
case 0xA7 :
case 0xA8 :
case 0xA9 :
case 0xAA :
case 0xAB :
case 0xAC :
case 0xAD :
case 0xAE :
case 0xAF :
case 0xB0 :
case 0xB1 :
case 0xB2 :
case 0xB3 :
case 0xB4 :
case 0xB5 :
case 0xB6 :
case 0xB7 :
case 0xB8 :
case 0xB9 :
case 0xBA :
case 0xBB :
case 0xBC :
case 0xBD :
case 0xBE :
case 0xBF :
case 0xD9 : // str 8
case 0xDA : // str 16
case 0xDB : // str 32
{
string_t s ;
return get_msgpack_string ( s ) & & sax - > string ( s ) ;
}
case 0xC0 : // nil
return sax - > null ( ) ;
case 0xC2 : // false
return sax - > boolean ( false ) ;
case 0xC3 : // true
return sax - > boolean ( true ) ;
case 0xC4 : // bin 8
case 0xC5 : // bin 16
case 0xC6 : // bin 32
case 0xC7 : // ext 8
case 0xC8 : // ext 16
case 0xC9 : // ext 32
case 0xD4 : // fixext 1
case 0xD5 : // fixext 2
case 0xD6 : // fixext 4
case 0xD7 : // fixext 8
case 0xD8 : // fixext 16
{
binary_t b ;
return get_msgpack_binary ( b ) & & sax - > binary ( b ) ;
}
case 0xCA : // float 32
{
float number { } ;
return get_number ( input_format_t : : msgpack , number ) & & sax - > number_float ( static_cast < number_float_t > ( number ) , " " ) ;
}
case 0xCB : // float 64
{
double number { } ;
return get_number ( input_format_t : : msgpack , number ) & & sax - > number_float ( static_cast < number_float_t > ( number ) , " " ) ;
}
case 0xCC : // uint 8
{
std : : uint8_t number { } ;
return get_number ( input_format_t : : msgpack , number ) & & sax - > number_unsigned ( number ) ;
}
case 0xCD : // uint 16
{
std : : uint16_t number { } ;
return get_number ( input_format_t : : msgpack , number ) & & sax - > number_unsigned ( number ) ;
}
case 0xCE : // uint 32
{
std : : uint32_t number { } ;
return get_number ( input_format_t : : msgpack , number ) & & sax - > number_unsigned ( number ) ;
}
case 0xCF : // uint 64
{
std : : uint64_t number { } ;
return get_number ( input_format_t : : msgpack , number ) & & sax - > number_unsigned ( number ) ;
}
case 0xD0 : // int 8
{
std : : int8_t number { } ;
return get_number ( input_format_t : : msgpack , number ) & & sax - > number_integer ( number ) ;
}
case 0xD1 : // int 16
{
std : : int16_t number { } ;
return get_number ( input_format_t : : msgpack , number ) & & sax - > number_integer ( number ) ;
}
case 0xD2 : // int 32
{
std : : int32_t number { } ;
return get_number ( input_format_t : : msgpack , number ) & & sax - > number_integer ( number ) ;
}
case 0xD3 : // int 64
{
std : : int64_t number { } ;
return get_number ( input_format_t : : msgpack , number ) & & sax - > number_integer ( number ) ;
}
case 0xDC : // array 16
{
std : : uint16_t len { } ;
return get_number ( input_format_t : : msgpack , len ) & & get_msgpack_array ( static_cast < std : : size_t > ( len ) ) ;
}
case 0xDD : // array 32
{
std : : uint32_t len { } ;
2022-08-02 14:28:39 +02:00
return get_number ( input_format_t : : msgpack , len ) & & get_msgpack_array ( conditional_static_cast < std : : size_t > ( len ) ) ;
2020-09-29 13:47:50 +02:00
}
case 0xDE : // map 16
{
std : : uint16_t len { } ;
return get_number ( input_format_t : : msgpack , len ) & & get_msgpack_object ( static_cast < std : : size_t > ( len ) ) ;
}
case 0xDF : // map 32
{
std : : uint32_t len { } ;
2022-08-02 14:28:39 +02:00
return get_number ( input_format_t : : msgpack , len ) & & get_msgpack_object ( conditional_static_cast < std : : size_t > ( len ) ) ;
2020-09-29 13:47:50 +02:00
}
// negative fixint
case 0xE0 :
case 0xE1 :
case 0xE2 :
case 0xE3 :
case 0xE4 :
case 0xE5 :
case 0xE6 :
case 0xE7 :
case 0xE8 :
case 0xE9 :
case 0xEA :
case 0xEB :
case 0xEC :
case 0xED :
case 0xEE :
case 0xEF :
case 0xF0 :
case 0xF1 :
case 0xF2 :
case 0xF3 :
case 0xF4 :
case 0xF5 :
case 0xF6 :
case 0xF7 :
case 0xF8 :
case 0xF9 :
case 0xFA :
case 0xFB :
case 0xFC :
case 0xFD :
case 0xFE :
case 0xFF :
return sax - > number_integer ( static_cast < std : : int8_t > ( current ) ) ;
default : // anything else
{
auto last_token = get_token_string ( ) ;
2022-08-02 14:28:39 +02:00
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 112 , chars_read ,
exception_message ( input_format_t : : msgpack , concat ( " invalid byte: 0x " , last_token ) , " value " ) , nullptr ) ) ;
2020-09-29 13:47:50 +02:00
}
}
}
/*!
@ brief reads a MessagePack string
This function first reads starting bytes to determine the expected
string length and then copies this number of bytes into a string .
@ param [ out ] result created string
@ return whether string creation completed
*/
bool get_msgpack_string ( string_t & result )
{
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format_t : : msgpack , " string " ) ) )
{
return false ;
}
switch ( current )
{
// fixstr
case 0xA0 :
case 0xA1 :
case 0xA2 :
case 0xA3 :
case 0xA4 :
case 0xA5 :
case 0xA6 :
case 0xA7 :
case 0xA8 :
case 0xA9 :
case 0xAA :
case 0xAB :
case 0xAC :
case 0xAD :
case 0xAE :
case 0xAF :
case 0xB0 :
case 0xB1 :
case 0xB2 :
case 0xB3 :
case 0xB4 :
case 0xB5 :
case 0xB6 :
case 0xB7 :
case 0xB8 :
case 0xB9 :
case 0xBA :
case 0xBB :
case 0xBC :
case 0xBD :
case 0xBE :
case 0xBF :
{
return get_string ( input_format_t : : msgpack , static_cast < unsigned int > ( current ) & 0x1Fu , result ) ;
}
case 0xD9 : // str 8
{
std : : uint8_t len { } ;
return get_number ( input_format_t : : msgpack , len ) & & get_string ( input_format_t : : msgpack , len , result ) ;
}
case 0xDA : // str 16
{
std : : uint16_t len { } ;
return get_number ( input_format_t : : msgpack , len ) & & get_string ( input_format_t : : msgpack , len , result ) ;
}
case 0xDB : // str 32
{
std : : uint32_t len { } ;
return get_number ( input_format_t : : msgpack , len ) & & get_string ( input_format_t : : msgpack , len , result ) ;
}
default :
{
auto last_token = get_token_string ( ) ;
2022-08-02 14:28:39 +02:00
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 113 , chars_read ,
exception_message ( input_format_t : : msgpack , concat ( " expected length specification (0xA0-0xBF, 0xD9-0xDB); last byte: 0x " , last_token ) , " string " ) , nullptr ) ) ;
2020-09-29 13:47:50 +02:00
}
}
}
/*!
@ brief reads a MessagePack byte array
This function first reads starting bytes to determine the expected
byte array length and then copies this number of bytes into a byte array .
@ param [ out ] result created byte array
@ return whether byte array creation completed
*/
bool get_msgpack_binary ( binary_t & result )
{
// helper function to set the subtype
auto assign_and_return_true = [ & result ] ( std : : int8_t subtype )
{
result . set_subtype ( static_cast < std : : uint8_t > ( subtype ) ) ;
return true ;
} ;
switch ( current )
{
case 0xC4 : // bin 8
{
std : : uint8_t len { } ;
return get_number ( input_format_t : : msgpack , len ) & &
get_binary ( input_format_t : : msgpack , len , result ) ;
}
case 0xC5 : // bin 16
{
std : : uint16_t len { } ;
return get_number ( input_format_t : : msgpack , len ) & &
get_binary ( input_format_t : : msgpack , len , result ) ;
}
case 0xC6 : // bin 32
{
std : : uint32_t len { } ;
return get_number ( input_format_t : : msgpack , len ) & &
get_binary ( input_format_t : : msgpack , len , result ) ;
}
case 0xC7 : // ext 8
{
std : : uint8_t len { } ;
std : : int8_t subtype { } ;
return get_number ( input_format_t : : msgpack , len ) & &
get_number ( input_format_t : : msgpack , subtype ) & &
get_binary ( input_format_t : : msgpack , len , result ) & &
assign_and_return_true ( subtype ) ;
}
case 0xC8 : // ext 16
{
std : : uint16_t len { } ;
std : : int8_t subtype { } ;
return get_number ( input_format_t : : msgpack , len ) & &
get_number ( input_format_t : : msgpack , subtype ) & &
get_binary ( input_format_t : : msgpack , len , result ) & &
assign_and_return_true ( subtype ) ;
}
case 0xC9 : // ext 32
{
std : : uint32_t len { } ;
std : : int8_t subtype { } ;
return get_number ( input_format_t : : msgpack , len ) & &
get_number ( input_format_t : : msgpack , subtype ) & &
get_binary ( input_format_t : : msgpack , len , result ) & &
assign_and_return_true ( subtype ) ;
}
case 0xD4 : // fixext 1
{
std : : int8_t subtype { } ;
return get_number ( input_format_t : : msgpack , subtype ) & &
get_binary ( input_format_t : : msgpack , 1 , result ) & &
assign_and_return_true ( subtype ) ;
}
case 0xD5 : // fixext 2
{
std : : int8_t subtype { } ;
return get_number ( input_format_t : : msgpack , subtype ) & &
get_binary ( input_format_t : : msgpack , 2 , result ) & &
assign_and_return_true ( subtype ) ;
}
case 0xD6 : // fixext 4
{
std : : int8_t subtype { } ;
return get_number ( input_format_t : : msgpack , subtype ) & &
get_binary ( input_format_t : : msgpack , 4 , result ) & &
assign_and_return_true ( subtype ) ;
}
case 0xD7 : // fixext 8
{
std : : int8_t subtype { } ;
return get_number ( input_format_t : : msgpack , subtype ) & &
get_binary ( input_format_t : : msgpack , 8 , result ) & &
assign_and_return_true ( subtype ) ;
}
case 0xD8 : // fixext 16
{
std : : int8_t subtype { } ;
return get_number ( input_format_t : : msgpack , subtype ) & &
get_binary ( input_format_t : : msgpack , 16 , result ) & &
assign_and_return_true ( subtype ) ;
}
default : // LCOV_EXCL_LINE
return false ; // LCOV_EXCL_LINE
}
}
/*!
@ param [ in ] len the length of the array
@ return whether array creation completed
*/
bool get_msgpack_array ( const std : : size_t len )
{
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > start_array ( len ) ) )
{
return false ;
}
for ( std : : size_t i = 0 ; i < len ; + + i )
{
if ( JSON_HEDLEY_UNLIKELY ( ! parse_msgpack_internal ( ) ) )
{
return false ;
}
}
return sax - > end_array ( ) ;
}
/*!
@ param [ in ] len the length of the object
@ return whether object creation completed
*/
bool get_msgpack_object ( const std : : size_t len )
{
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > start_object ( len ) ) )
{
return false ;
}
string_t key ;
for ( std : : size_t i = 0 ; i < len ; + + i )
{
get ( ) ;
if ( JSON_HEDLEY_UNLIKELY ( ! get_msgpack_string ( key ) | | ! sax - > key ( key ) ) )
{
return false ;
}
if ( JSON_HEDLEY_UNLIKELY ( ! parse_msgpack_internal ( ) ) )
{
return false ;
}
key . clear ( ) ;
}
return sax - > end_object ( ) ;
}
////////////
// UBJSON //
////////////
/*!
@ param [ in ] get_char whether a new character should be retrieved from the
input ( true , default ) or whether the last read
character should be considered instead
@ return whether a valid UBJSON value was passed to the SAX parser
*/
bool parse_ubjson_internal ( const bool get_char = true )
{
return get_ubjson_value ( get_char ? get_ignore_noop ( ) : current ) ;
}
/*!
@ brief reads a UBJSON string
This function is either called after reading the ' S ' byte explicitly
indicating a string , or in case of an object key where the ' S ' byte can be
left out .
@ param [ out ] result created string
@ param [ in ] get_char whether a new character should be retrieved from the
input ( true , default ) or whether the last read
character should be considered instead
@ return whether string creation completed
*/
bool get_ubjson_string ( string_t & result , const bool get_char = true )
{
if ( get_char )
{
get ( ) ; // TODO(niels): may we ignore N here?
}
2022-08-02 14:28:39 +02:00
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format , " value " ) ) )
2020-09-29 13:47:50 +02:00
{
return false ;
}
switch ( current )
{
case ' U ' :
{
std : : uint8_t len { } ;
2022-08-02 14:28:39 +02:00
return get_number ( input_format , len ) & & get_string ( input_format , len , result ) ;
2020-09-29 13:47:50 +02:00
}
case ' i ' :
{
std : : int8_t len { } ;
2022-08-02 14:28:39 +02:00
return get_number ( input_format , len ) & & get_string ( input_format , len , result ) ;
2020-09-29 13:47:50 +02:00
}
case ' I ' :
{
std : : int16_t len { } ;
2022-08-02 14:28:39 +02:00
return get_number ( input_format , len ) & & get_string ( input_format , len , result ) ;
2020-09-29 13:47:50 +02:00
}
case ' l ' :
{
std : : int32_t len { } ;
2022-08-02 14:28:39 +02:00
return get_number ( input_format , len ) & & get_string ( input_format , len , result ) ;
2020-09-29 13:47:50 +02:00
}
case ' L ' :
{
std : : int64_t len { } ;
2022-08-02 14:28:39 +02:00
return get_number ( input_format , len ) & & get_string ( input_format , len , result ) ;
}
case ' u ' :
{
if ( input_format ! = input_format_t : : bjdata )
{
break ;
}
std : : uint16_t len { } ;
return get_number ( input_format , len ) & & get_string ( input_format , len , result ) ;
}
case ' m ' :
{
if ( input_format ! = input_format_t : : bjdata )
{
break ;
}
std : : uint32_t len { } ;
return get_number ( input_format , len ) & & get_string ( input_format , len , result ) ;
}
case ' M ' :
{
if ( input_format ! = input_format_t : : bjdata )
{
break ;
}
std : : uint64_t len { } ;
return get_number ( input_format , len ) & & get_string ( input_format , len , result ) ;
2020-09-29 13:47:50 +02:00
}
default :
2022-08-02 14:28:39 +02:00
break ;
2020-09-29 13:47:50 +02:00
}
2022-08-02 14:28:39 +02:00
auto last_token = get_token_string ( ) ;
std : : string message ;
if ( input_format ! = input_format_t : : bjdata )
{
message = " expected length type specification (U, i, I, l, L); last byte: 0x " + last_token ;
}
else
{
message = " expected length type specification (U, i, u, I, m, l, M, L); last byte: 0x " + last_token ;
}
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 113 , chars_read , exception_message ( input_format , message , " string " ) , nullptr ) ) ;
}
/*!
@ param [ out ] dim an integer vector storing the ND array dimensions
@ return whether reading ND array size vector is successful
*/
bool get_ubjson_ndarray_size ( std : : vector < size_t > & dim )
{
std : : pair < std : : size_t , char_int_type > size_and_type ;
size_t dimlen = 0 ;
bool no_ndarray = true ;
if ( JSON_HEDLEY_UNLIKELY ( ! get_ubjson_size_type ( size_and_type , no_ndarray ) ) )
{
return false ;
}
if ( size_and_type . first ! = string_t : : npos )
{
if ( size_and_type . second ! = 0 )
{
if ( size_and_type . second ! = ' N ' )
{
for ( std : : size_t i = 0 ; i < size_and_type . first ; + + i )
{
if ( JSON_HEDLEY_UNLIKELY ( ! get_ubjson_size_value ( dimlen , no_ndarray , size_and_type . second ) ) )
{
return false ;
}
dim . push_back ( dimlen ) ;
}
}
}
else
{
for ( std : : size_t i = 0 ; i < size_and_type . first ; + + i )
{
if ( JSON_HEDLEY_UNLIKELY ( ! get_ubjson_size_value ( dimlen , no_ndarray ) ) )
{
return false ;
}
dim . push_back ( dimlen ) ;
}
}
}
else
{
while ( current ! = ' ] ' )
{
if ( JSON_HEDLEY_UNLIKELY ( ! get_ubjson_size_value ( dimlen , no_ndarray , current ) ) )
{
return false ;
}
dim . push_back ( dimlen ) ;
get_ignore_noop ( ) ;
}
}
return true ;
2020-09-29 13:47:50 +02:00
}
/*!
@ param [ out ] result determined size
2022-08-02 14:28:39 +02:00
@ param [ in , out ] is_ndarray for input , ` true ` means already inside an ndarray vector
or ndarray dimension is not allowed ; ` false ` means ndarray
is allowed ; for output , ` true ` means an ndarray is found ;
is_ndarray can only return ` true ` when its initial value
is ` false `
@ param [ in ] prefix type marker if already read , otherwise set to 0
2020-09-29 13:47:50 +02:00
@ return whether size determination completed
*/
2022-08-02 14:28:39 +02:00
bool get_ubjson_size_value ( std : : size_t & result , bool & is_ndarray , char_int_type prefix = 0 )
2020-09-29 13:47:50 +02:00
{
2022-08-02 14:28:39 +02:00
if ( prefix = = 0 )
{
prefix = get_ignore_noop ( ) ;
}
switch ( prefix )
2020-09-29 13:47:50 +02:00
{
case ' U ' :
{
std : : uint8_t number { } ;
2022-08-02 14:28:39 +02:00
if ( JSON_HEDLEY_UNLIKELY ( ! get_number ( input_format , number ) ) )
2020-09-29 13:47:50 +02:00
{
return false ;
}
result = static_cast < std : : size_t > ( number ) ;
return true ;
}
case ' i ' :
{
std : : int8_t number { } ;
2022-08-02 14:28:39 +02:00
if ( JSON_HEDLEY_UNLIKELY ( ! get_number ( input_format , number ) ) )
2020-09-29 13:47:50 +02:00
{
return false ;
}
2022-08-02 14:28:39 +02:00
if ( number < 0 )
{
return sax - > parse_error ( chars_read , get_token_string ( ) , parse_error : : create ( 113 , chars_read ,
exception_message ( input_format , " count in an optimized container must be positive " , " size " ) , nullptr ) ) ;
}
2021-12-11 23:32:21 +01:00
result = static_cast < std : : size_t > ( number ) ; // NOLINT(bugprone-signed-char-misuse,cert-str34-c): number is not a char
2020-09-29 13:47:50 +02:00
return true ;
}
case ' I ' :
{
std : : int16_t number { } ;
2022-08-02 14:28:39 +02:00
if ( JSON_HEDLEY_UNLIKELY ( ! get_number ( input_format , number ) ) )
2020-09-29 13:47:50 +02:00
{
return false ;
}
2022-08-02 14:28:39 +02:00
if ( number < 0 )
{
return sax - > parse_error ( chars_read , get_token_string ( ) , parse_error : : create ( 113 , chars_read ,
exception_message ( input_format , " count in an optimized container must be positive " , " size " ) , nullptr ) ) ;
}
2020-09-29 13:47:50 +02:00
result = static_cast < std : : size_t > ( number ) ;
return true ;
}
case ' l ' :
{
std : : int32_t number { } ;
2022-08-02 14:28:39 +02:00
if ( JSON_HEDLEY_UNLIKELY ( ! get_number ( input_format , number ) ) )
2020-09-29 13:47:50 +02:00
{
return false ;
}
2022-08-02 14:28:39 +02:00
if ( number < 0 )
{
return sax - > parse_error ( chars_read , get_token_string ( ) , parse_error : : create ( 113 , chars_read ,
exception_message ( input_format , " count in an optimized container must be positive " , " size " ) , nullptr ) ) ;
}
2020-09-29 13:47:50 +02:00
result = static_cast < std : : size_t > ( number ) ;
return true ;
}
case ' L ' :
{
std : : int64_t number { } ;
2022-08-02 14:28:39 +02:00
if ( JSON_HEDLEY_UNLIKELY ( ! get_number ( input_format , number ) ) )
2020-09-29 13:47:50 +02:00
{
return false ;
}
2022-08-02 14:28:39 +02:00
if ( number < 0 )
{
return sax - > parse_error ( chars_read , get_token_string ( ) , parse_error : : create ( 113 , chars_read ,
exception_message ( input_format , " count in an optimized container must be positive " , " size " ) , nullptr ) ) ;
}
if ( ! value_in_range_of < std : : size_t > ( number ) )
{
return sax - > parse_error ( chars_read , get_token_string ( ) , out_of_range : : create ( 408 ,
exception_message ( input_format , " integer value overflow " , " size " ) , nullptr ) ) ;
}
2020-09-29 13:47:50 +02:00
result = static_cast < std : : size_t > ( number ) ;
return true ;
}
2022-08-02 14:28:39 +02:00
case ' u ' :
2020-09-29 13:47:50 +02:00
{
2022-08-02 14:28:39 +02:00
if ( input_format ! = input_format_t : : bjdata )
{
break ;
}
std : : uint16_t number { } ;
if ( JSON_HEDLEY_UNLIKELY ( ! get_number ( input_format , number ) ) )
{
return false ;
}
result = static_cast < std : : size_t > ( number ) ;
return true ;
}
case ' m ' :
{
if ( input_format ! = input_format_t : : bjdata )
{
break ;
}
std : : uint32_t number { } ;
if ( JSON_HEDLEY_UNLIKELY ( ! get_number ( input_format , number ) ) )
{
return false ;
}
result = conditional_static_cast < std : : size_t > ( number ) ;
return true ;
}
case ' M ' :
{
if ( input_format ! = input_format_t : : bjdata )
{
break ;
}
std : : uint64_t number { } ;
if ( JSON_HEDLEY_UNLIKELY ( ! get_number ( input_format , number ) ) )
{
return false ;
}
if ( ! value_in_range_of < std : : size_t > ( number ) )
{
return sax - > parse_error ( chars_read , get_token_string ( ) , out_of_range : : create ( 408 ,
exception_message ( input_format , " integer value overflow " , " size " ) , nullptr ) ) ;
}
result = detail : : conditional_static_cast < std : : size_t > ( number ) ;
return true ;
}
case ' [ ' :
{
if ( input_format ! = input_format_t : : bjdata )
{
break ;
}
if ( is_ndarray ) // ndarray dimensional vector can only contain integers, and can not embed another array
{
return sax - > parse_error ( chars_read , get_token_string ( ) , parse_error : : create ( 113 , chars_read , exception_message ( input_format , " ndarray dimentional vector is not allowed " , " size " ) , nullptr ) ) ;
}
std : : vector < size_t > dim ;
if ( JSON_HEDLEY_UNLIKELY ( ! get_ubjson_ndarray_size ( dim ) ) )
{
return false ;
}
if ( dim . size ( ) = = 1 | | ( dim . size ( ) = = 2 & & dim . at ( 0 ) = = 1 ) ) // return normal array size if 1D row vector
{
result = dim . at ( dim . size ( ) - 1 ) ;
return true ;
}
if ( ! dim . empty ( ) ) // if ndarray, convert to an object in JData annotated array format
{
for ( auto i : dim ) // test if any dimension in an ndarray is 0, if so, return a 1D empty container
{
if ( i = = 0 )
{
result = 0 ;
return true ;
}
}
string_t key = " _ArraySize_ " ;
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > start_object ( 3 ) | | ! sax - > key ( key ) | | ! sax - > start_array ( dim . size ( ) ) ) )
{
return false ;
}
result = 1 ;
for ( auto i : dim )
{
result * = i ;
if ( result = = 0 | | result = = string_t : : npos ) // because dim elements shall not have zeros, result = 0 means overflow happened; it also can't be string_t::npos as it is used to initialize size in get_ubjson_size_type()
{
return sax - > parse_error ( chars_read , get_token_string ( ) , out_of_range : : create ( 408 , exception_message ( input_format , " excessive ndarray size caused overflow " , " size " ) , nullptr ) ) ;
}
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > number_unsigned ( static_cast < number_unsigned_t > ( i ) ) ) )
{
return false ;
}
}
is_ndarray = true ;
return sax - > end_array ( ) ;
}
result = 0 ;
return true ;
2020-09-29 13:47:50 +02:00
}
2022-08-02 14:28:39 +02:00
default :
break ;
2020-09-29 13:47:50 +02:00
}
2022-08-02 14:28:39 +02:00
auto last_token = get_token_string ( ) ;
std : : string message ;
if ( input_format ! = input_format_t : : bjdata )
{
message = " expected length type specification (U, i, I, l, L) after '#'; last byte: 0x " + last_token ;
}
else
{
message = " expected length type specification (U, i, u, I, m, l, M, L) after '#'; last byte: 0x " + last_token ;
}
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 113 , chars_read , exception_message ( input_format , message , " size " ) , nullptr ) ) ;
2020-09-29 13:47:50 +02:00
}
/*!
@ brief determine the type and size for a container
In the optimized UBJSON format , a type and a size can be provided to allow
for a more compact representation .
@ param [ out ] result pair of the size and the type
2022-08-02 14:28:39 +02:00
@ param [ in ] inside_ndarray whether the parser is parsing an ND array dimensional vector
2020-09-29 13:47:50 +02:00
@ return whether pair creation completed
*/
2022-08-02 14:28:39 +02:00
bool get_ubjson_size_type ( std : : pair < std : : size_t , char_int_type > & result , bool inside_ndarray = false )
2020-09-29 13:47:50 +02:00
{
result . first = string_t : : npos ; // size
result . second = 0 ; // type
2022-08-02 14:28:39 +02:00
bool is_ndarray = false ;
2020-09-29 13:47:50 +02:00
get_ignore_noop ( ) ;
if ( current = = ' $ ' )
{
2022-08-02 14:28:39 +02:00
std : : vector < char_int_type > bjdx = { ' [ ' , ' { ' , ' S ' , ' H ' , ' T ' , ' F ' , ' N ' , ' Z ' } ; // excluded markers in bjdata optimized type
2020-09-29 13:47:50 +02:00
result . second = get ( ) ; // must not ignore 'N', because 'N' maybe the type
2022-08-02 14:28:39 +02:00
if ( JSON_HEDLEY_UNLIKELY ( input_format = = input_format_t : : bjdata & & std : : find ( bjdx . begin ( ) , bjdx . end ( ) , result . second ) ! = bjdx . end ( ) ) )
{
auto last_token = get_token_string ( ) ;
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 112 , chars_read ,
exception_message ( input_format , concat ( " marker 0x " , last_token , " is not a permitted optimized array type " ) , " type " ) , nullptr ) ) ;
}
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format , " type " ) ) )
2020-09-29 13:47:50 +02:00
{
return false ;
}
get_ignore_noop ( ) ;
if ( JSON_HEDLEY_UNLIKELY ( current ! = ' # ' ) )
{
2022-08-02 14:28:39 +02:00
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format , " value " ) ) )
2020-09-29 13:47:50 +02:00
{
return false ;
}
auto last_token = get_token_string ( ) ;
2022-08-02 14:28:39 +02:00
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 112 , chars_read ,
exception_message ( input_format , concat ( " expected '#' after type information; last byte: 0x " , last_token ) , " size " ) , nullptr ) ) ;
2020-09-29 13:47:50 +02:00
}
2022-08-02 14:28:39 +02:00
bool is_error = get_ubjson_size_value ( result . first , is_ndarray ) ;
if ( input_format = = input_format_t : : bjdata & & is_ndarray )
{
if ( inside_ndarray )
{
return sax - > parse_error ( chars_read , get_token_string ( ) , parse_error : : create ( 112 , chars_read ,
exception_message ( input_format , " ndarray can not be recursive " , " size " ) , nullptr ) ) ;
}
result . second | = ( 1 < < 8 ) ; // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters
}
return is_error ;
2020-09-29 13:47:50 +02:00
}
if ( current = = ' # ' )
{
2022-08-02 14:28:39 +02:00
bool is_error = get_ubjson_size_value ( result . first , is_ndarray ) ;
if ( input_format = = input_format_t : : bjdata & & is_ndarray )
{
return sax - > parse_error ( chars_read , get_token_string ( ) , parse_error : : create ( 112 , chars_read ,
exception_message ( input_format , " ndarray requires both type and size " , " size " ) , nullptr ) ) ;
}
return is_error ;
2020-09-29 13:47:50 +02:00
}
return true ;
}
/*!
@ param prefix the previously read or set type prefix
@ return whether value creation completed
*/
bool get_ubjson_value ( const char_int_type prefix )
{
switch ( prefix )
{
case std : : char_traits < char_type > : : eof ( ) : // EOF
2022-08-02 14:28:39 +02:00
return unexpect_eof ( input_format , " value " ) ;
2020-09-29 13:47:50 +02:00
case ' T ' : // true
return sax - > boolean ( true ) ;
case ' F ' : // false
return sax - > boolean ( false ) ;
case ' Z ' : // null
return sax - > null ( ) ;
case ' U ' :
{
std : : uint8_t number { } ;
2022-08-02 14:28:39 +02:00
return get_number ( input_format , number ) & & sax - > number_unsigned ( number ) ;
2020-09-29 13:47:50 +02:00
}
case ' i ' :
{
std : : int8_t number { } ;
2022-08-02 14:28:39 +02:00
return get_number ( input_format , number ) & & sax - > number_integer ( number ) ;
2020-09-29 13:47:50 +02:00
}
case ' I ' :
{
std : : int16_t number { } ;
2022-08-02 14:28:39 +02:00
return get_number ( input_format , number ) & & sax - > number_integer ( number ) ;
2020-09-29 13:47:50 +02:00
}
case ' l ' :
{
std : : int32_t number { } ;
2022-08-02 14:28:39 +02:00
return get_number ( input_format , number ) & & sax - > number_integer ( number ) ;
2020-09-29 13:47:50 +02:00
}
case ' L ' :
{
std : : int64_t number { } ;
2022-08-02 14:28:39 +02:00
return get_number ( input_format , number ) & & sax - > number_integer ( number ) ;
}
case ' u ' :
{
if ( input_format ! = input_format_t : : bjdata )
{
break ;
}
std : : uint16_t number { } ;
return get_number ( input_format , number ) & & sax - > number_unsigned ( number ) ;
}
case ' m ' :
{
if ( input_format ! = input_format_t : : bjdata )
{
break ;
}
std : : uint32_t number { } ;
return get_number ( input_format , number ) & & sax - > number_unsigned ( number ) ;
}
case ' M ' :
{
if ( input_format ! = input_format_t : : bjdata )
{
break ;
}
std : : uint64_t number { } ;
return get_number ( input_format , number ) & & sax - > number_unsigned ( number ) ;
}
case ' h ' :
{
if ( input_format ! = input_format_t : : bjdata )
{
break ;
}
const auto byte1_raw = get ( ) ;
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format , " number " ) ) )
{
return false ;
}
const auto byte2_raw = get ( ) ;
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format , " number " ) ) )
{
return false ;
}
const auto byte1 = static_cast < unsigned char > ( byte1_raw ) ;
const auto byte2 = static_cast < unsigned char > ( byte2_raw ) ;
// code from RFC 7049, Appendix D, Figure 3:
// As half-precision floating-point numbers were only added
// to IEEE 754 in 2008, today's programming platforms often
// still only have limited support for them. It is very
// easy to include at least decoding support for them even
// without such support. An example of a small decoder for
// half-precision floating-point numbers in the C language
// is shown in Fig. 3.
const auto half = static_cast < unsigned int > ( ( byte2 < < 8u ) + byte1 ) ;
const double val = [ & half ]
{
const int exp = ( half > > 10u ) & 0x1Fu ;
const unsigned int mant = half & 0x3FFu ;
JSON_ASSERT ( 0 < = exp & & exp < = 32 ) ;
JSON_ASSERT ( mant < = 1024 ) ;
switch ( exp )
{
case 0 :
return std : : ldexp ( mant , - 24 ) ;
case 31 :
return ( mant = = 0 )
? std : : numeric_limits < double > : : infinity ( )
: std : : numeric_limits < double > : : quiet_NaN ( ) ;
default :
return std : : ldexp ( mant + 1024 , exp - 25 ) ;
}
} ( ) ;
return sax - > number_float ( ( half & 0x8000u ) ! = 0
? static_cast < number_float_t > ( - val )
: static_cast < number_float_t > ( val ) , " " ) ;
2020-09-29 13:47:50 +02:00
}
case ' d ' :
{
float number { } ;
2022-08-02 14:28:39 +02:00
return get_number ( input_format , number ) & & sax - > number_float ( static_cast < number_float_t > ( number ) , " " ) ;
2020-09-29 13:47:50 +02:00
}
case ' D ' :
{
double number { } ;
2022-08-02 14:28:39 +02:00
return get_number ( input_format , number ) & & sax - > number_float ( static_cast < number_float_t > ( number ) , " " ) ;
2020-09-29 13:47:50 +02:00
}
case ' H ' :
{
return get_ubjson_high_precision_number ( ) ;
}
case ' C ' : // char
{
get ( ) ;
2022-08-02 14:28:39 +02:00
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format , " char " ) ) )
2020-09-29 13:47:50 +02:00
{
return false ;
}
if ( JSON_HEDLEY_UNLIKELY ( current > 127 ) )
{
auto last_token = get_token_string ( ) ;
2022-08-02 14:28:39 +02:00
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 113 , chars_read ,
exception_message ( input_format , concat ( " byte after 'C' must be in range 0x00..0x7F; last byte: 0x " , last_token ) , " char " ) , nullptr ) ) ;
2020-09-29 13:47:50 +02:00
}
string_t s ( 1 , static_cast < typename string_t : : value_type > ( current ) ) ;
return sax - > string ( s ) ;
}
case ' S ' : // string
{
string_t s ;
return get_ubjson_string ( s ) & & sax - > string ( s ) ;
}
case ' [ ' : // array
return get_ubjson_array ( ) ;
case ' { ' : // object
return get_ubjson_object ( ) ;
default : // anything else
2022-08-02 14:28:39 +02:00
break ;
2020-09-29 13:47:50 +02:00
}
2022-08-02 14:28:39 +02:00
auto last_token = get_token_string ( ) ;
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 112 , chars_read , exception_message ( input_format , " invalid byte: 0x " + last_token , " value " ) , nullptr ) ) ;
2020-09-29 13:47:50 +02:00
}
/*!
@ return whether array creation completed
*/
bool get_ubjson_array ( )
{
std : : pair < std : : size_t , char_int_type > size_and_type ;
if ( JSON_HEDLEY_UNLIKELY ( ! get_ubjson_size_type ( size_and_type ) ) )
{
return false ;
}
2022-08-02 14:28:39 +02:00
// if bit-8 of size_and_type.second is set to 1, encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata):
// {"_ArrayType_" : "typeid", "_ArraySize_" : [n1, n2, ...], "_ArrayData_" : [v1, v2, ...]}
if ( input_format = = input_format_t : : bjdata & & size_and_type . first ! = string_t : : npos & & ( size_and_type . second & ( 1 < < 8 ) ) ! = 0 )
{
std : : map < char_int_type , string_t > bjdtype = { { ' U ' , " uint8 " } , { ' i ' , " int8 " } , { ' u ' , " uint16 " } , { ' I ' , " int16 " } ,
{ ' m ' , " uint32 " } , { ' l ' , " int32 " } , { ' M ' , " uint64 " } , { ' L ' , " int64 " } , { ' d ' , " single " } , { ' D ' , " double " } , { ' C ' , " char " }
} ;
size_and_type . second & = ~ ( static_cast < char_int_type > ( 1 ) < < 8 ) ; // use bit 8 to indicate ndarray, here we remove the bit to restore the type marker
string_t key = " _ArrayType_ " ;
if ( JSON_HEDLEY_UNLIKELY ( bjdtype . count ( size_and_type . second ) = = 0 ) )
{
auto last_token = get_token_string ( ) ;
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 112 , chars_read ,
exception_message ( input_format , " invalid byte: 0x " + last_token , " type " ) , nullptr ) ) ;
}
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > key ( key ) | | ! sax - > string ( bjdtype [ size_and_type . second ] ) ) )
{
return false ;
}
if ( size_and_type . second = = ' C ' )
{
size_and_type . second = ' U ' ;
}
key = " _ArrayData_ " ;
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > key ( key ) | | ! sax - > start_array ( size_and_type . first ) ) )
{
return false ;
}
for ( std : : size_t i = 0 ; i < size_and_type . first ; + + i )
{
if ( JSON_HEDLEY_UNLIKELY ( ! get_ubjson_value ( size_and_type . second ) ) )
{
return false ;
}
}
return ( sax - > end_array ( ) & & sax - > end_object ( ) ) ;
}
2020-09-29 13:47:50 +02:00
if ( size_and_type . first ! = string_t : : npos )
{
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > start_array ( size_and_type . first ) ) )
{
return false ;
}
if ( size_and_type . second ! = 0 )
{
if ( size_and_type . second ! = ' N ' )
{
for ( std : : size_t i = 0 ; i < size_and_type . first ; + + i )
{
if ( JSON_HEDLEY_UNLIKELY ( ! get_ubjson_value ( size_and_type . second ) ) )
{
return false ;
}
}
}
}
else
{
for ( std : : size_t i = 0 ; i < size_and_type . first ; + + i )
{
if ( JSON_HEDLEY_UNLIKELY ( ! parse_ubjson_internal ( ) ) )
{
return false ;
}
}
}
}
else
{
2022-08-02 14:28:39 +02:00
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > start_array ( static_cast < std : : size_t > ( - 1 ) ) ) )
2020-09-29 13:47:50 +02:00
{
return false ;
}
while ( current ! = ' ] ' )
{
if ( JSON_HEDLEY_UNLIKELY ( ! parse_ubjson_internal ( false ) ) )
{
return false ;
}
get_ignore_noop ( ) ;
}
}
return sax - > end_array ( ) ;
}
/*!
@ return whether object creation completed
*/
bool get_ubjson_object ( )
{
std : : pair < std : : size_t , char_int_type > size_and_type ;
if ( JSON_HEDLEY_UNLIKELY ( ! get_ubjson_size_type ( size_and_type ) ) )
{
return false ;
}
2022-08-02 14:28:39 +02:00
// do not accept ND-array size in objects in BJData
if ( input_format = = input_format_t : : bjdata & & size_and_type . first ! = string_t : : npos & & ( size_and_type . second & ( 1 < < 8 ) ) ! = 0 )
{
auto last_token = get_token_string ( ) ;
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 112 , chars_read ,
exception_message ( input_format , " BJData object does not support ND-array size in optimized format " , " object " ) , nullptr ) ) ;
}
2020-09-29 13:47:50 +02:00
string_t key ;
if ( size_and_type . first ! = string_t : : npos )
{
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > start_object ( size_and_type . first ) ) )
{
return false ;
}
if ( size_and_type . second ! = 0 )
{
for ( std : : size_t i = 0 ; i < size_and_type . first ; + + i )
{
if ( JSON_HEDLEY_UNLIKELY ( ! get_ubjson_string ( key ) | | ! sax - > key ( key ) ) )
{
return false ;
}
if ( JSON_HEDLEY_UNLIKELY ( ! get_ubjson_value ( size_and_type . second ) ) )
{
return false ;
}
key . clear ( ) ;
}
}
else
{
for ( std : : size_t i = 0 ; i < size_and_type . first ; + + i )
{
if ( JSON_HEDLEY_UNLIKELY ( ! get_ubjson_string ( key ) | | ! sax - > key ( key ) ) )
{
return false ;
}
if ( JSON_HEDLEY_UNLIKELY ( ! parse_ubjson_internal ( ) ) )
{
return false ;
}
key . clear ( ) ;
}
}
}
else
{
2022-08-02 14:28:39 +02:00
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > start_object ( static_cast < std : : size_t > ( - 1 ) ) ) )
2020-09-29 13:47:50 +02:00
{
return false ;
}
while ( current ! = ' } ' )
{
if ( JSON_HEDLEY_UNLIKELY ( ! get_ubjson_string ( key , false ) | | ! sax - > key ( key ) ) )
{
return false ;
}
if ( JSON_HEDLEY_UNLIKELY ( ! parse_ubjson_internal ( ) ) )
{
return false ;
}
get_ignore_noop ( ) ;
key . clear ( ) ;
}
}
return sax - > end_object ( ) ;
}
// Note, no reader for UBJSON binary types is implemented because they do
// not exist
bool get_ubjson_high_precision_number ( )
{
// get size of following number string
std : : size_t size { } ;
2022-08-02 14:28:39 +02:00
bool no_ndarray = true ;
auto res = get_ubjson_size_value ( size , no_ndarray ) ;
2020-09-29 13:47:50 +02:00
if ( JSON_HEDLEY_UNLIKELY ( ! res ) )
{
return res ;
}
// get number string
std : : vector < char > number_vector ;
for ( std : : size_t i = 0 ; i < size ; + + i )
{
get ( ) ;
2022-08-02 14:28:39 +02:00
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format , " number " ) ) )
2020-09-29 13:47:50 +02:00
{
return false ;
}
number_vector . push_back ( static_cast < char > ( current ) ) ;
}
// parse number string
2021-12-11 23:32:21 +01:00
using ia_type = decltype ( detail : : input_adapter ( number_vector ) ) ;
auto number_lexer = detail : : lexer < BasicJsonType , ia_type > ( detail : : input_adapter ( number_vector ) , false ) ;
2020-09-29 13:47:50 +02:00
const auto result_number = number_lexer . scan ( ) ;
const auto number_string = number_lexer . get_token_string ( ) ;
const auto result_remainder = number_lexer . scan ( ) ;
using token_type = typename detail : : lexer_base < BasicJsonType > : : token_type ;
if ( JSON_HEDLEY_UNLIKELY ( result_remainder ! = token_type : : end_of_input ) )
{
2022-08-02 14:28:39 +02:00
return sax - > parse_error ( chars_read , number_string , parse_error : : create ( 115 , chars_read ,
exception_message ( input_format , concat ( " invalid number text: " , number_lexer . get_token_string ( ) ) , " high-precision number " ) , nullptr ) ) ;
2020-09-29 13:47:50 +02:00
}
switch ( result_number )
{
case token_type : : value_integer :
return sax - > number_integer ( number_lexer . get_number_integer ( ) ) ;
case token_type : : value_unsigned :
return sax - > number_unsigned ( number_lexer . get_number_unsigned ( ) ) ;
case token_type : : value_float :
return sax - > number_float ( number_lexer . get_number_float ( ) , std : : move ( number_string ) ) ;
2021-12-11 23:32:21 +01:00
case token_type : : uninitialized :
case token_type : : literal_true :
case token_type : : literal_false :
case token_type : : literal_null :
case token_type : : value_string :
case token_type : : begin_array :
case token_type : : begin_object :
case token_type : : end_array :
case token_type : : end_object :
case token_type : : name_separator :
case token_type : : value_separator :
case token_type : : parse_error :
case token_type : : end_of_input :
case token_type : : literal_or_value :
2020-09-29 13:47:50 +02:00
default :
2022-08-02 14:28:39 +02:00
return sax - > parse_error ( chars_read , number_string , parse_error : : create ( 115 , chars_read ,
exception_message ( input_format , concat ( " invalid number text: " , number_lexer . get_token_string ( ) ) , " high-precision number " ) , nullptr ) ) ;
2020-09-29 13:47:50 +02:00
}
}
///////////////////////
// Utility functions //
///////////////////////
/*!
@ brief get next character from the input
This function provides the interface to the used input adapter . It does
not throw in case the input reached EOF , but returns a - ' ve valued
` std : : char_traits < char_type > : : eof ( ) ` in that case .
@ return character read from the input
*/
char_int_type get ( )
{
+ + chars_read ;
return current = ia . get_character ( ) ;
}
/*!
@ return character read from the input after ignoring all ' N ' entries
*/
char_int_type get_ignore_noop ( )
{
do
{
get ( ) ;
}
while ( current = = ' N ' ) ;
return current ;
}
/*
@ brief read a number from the input
@ tparam NumberType the type of the number
@ param [ in ] format the current format ( for diagnostics )
@ param [ out ] result number of type @ a NumberType
@ return whether conversion completed
2022-08-02 14:28:39 +02:00
@ note This function needs to respect the system ' s endianness , because
2020-09-29 13:47:50 +02:00
bytes in CBOR , MessagePack , and UBJSON are stored in network order
( big endian ) and therefore need reordering on little endian systems .
2022-08-02 14:28:39 +02:00
On the other hand , BSON and BJData use little endian and should reorder
on big endian systems .
2020-09-29 13:47:50 +02:00
*/
template < typename NumberType , bool InputIsLittleEndian = false >
bool get_number ( const input_format_t format , NumberType & result )
{
// step 1: read input into array with system's byte order
2021-12-11 23:32:21 +01:00
std : : array < std : : uint8_t , sizeof ( NumberType ) > vec { } ;
2020-09-29 13:47:50 +02:00
for ( std : : size_t i = 0 ; i < sizeof ( NumberType ) ; + + i )
{
get ( ) ;
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( format , " number " ) ) )
{
return false ;
}
// reverse byte order prior to conversion if necessary
2022-08-02 14:28:39 +02:00
if ( is_little_endian ! = ( InputIsLittleEndian | | format = = input_format_t : : bjdata ) )
2020-09-29 13:47:50 +02:00
{
vec [ sizeof ( NumberType ) - i - 1 ] = static_cast < std : : uint8_t > ( current ) ;
}
else
{
vec [ i ] = static_cast < std : : uint8_t > ( current ) ; // LCOV_EXCL_LINE
}
}
// step 2: convert array into number of type T and return
std : : memcpy ( & result , vec . data ( ) , sizeof ( NumberType ) ) ;
return true ;
}
/*!
@ brief create a string by reading characters from the input
@ tparam NumberType the type of the number
@ param [ in ] format the current format ( for diagnostics )
@ param [ in ] len number of characters to read
@ param [ out ] result string created by reading @ a len bytes
@ return whether string creation completed
@ note We can not reserve @ a len bytes for the result , because @ a len
may be too large . Usually , @ ref unexpect_eof ( ) detects the end of
the input before we run out of string memory .
*/
template < typename NumberType >
bool get_string ( const input_format_t format ,
const NumberType len ,
string_t & result )
{
bool success = true ;
for ( NumberType i = 0 ; i < len ; i + + )
{
get ( ) ;
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( format , " string " ) ) )
{
success = false ;
break ;
}
result . push_back ( static_cast < typename string_t : : value_type > ( current ) ) ;
2021-12-11 23:32:21 +01:00
}
2020-09-29 13:47:50 +02:00
return success ;
}
/*!
@ brief create a byte array by reading bytes from the input
@ tparam NumberType the type of the number
@ param [ in ] format the current format ( for diagnostics )
@ param [ in ] len number of bytes to read
@ param [ out ] result byte array created by reading @ a len bytes
@ return whether byte array creation completed
@ note We can not reserve @ a len bytes for the result , because @ a len
may be too large . Usually , @ ref unexpect_eof ( ) detects the end of
the input before we run out of memory .
*/
template < typename NumberType >
bool get_binary ( const input_format_t format ,
const NumberType len ,
binary_t & result )
{
bool success = true ;
for ( NumberType i = 0 ; i < len ; i + + )
{
get ( ) ;
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( format , " binary " ) ) )
{
success = false ;
break ;
}
result . push_back ( static_cast < std : : uint8_t > ( current ) ) ;
}
return success ;
}
/*!
@ param [ in ] format the current format ( for diagnostics )
@ param [ in ] context further context information ( for diagnostics )
@ return whether the last read character is not EOF
*/
JSON_HEDLEY_NON_NULL ( 3 )
bool unexpect_eof ( const input_format_t format , const char * context ) const
{
if ( JSON_HEDLEY_UNLIKELY ( current = = std : : char_traits < char_type > : : eof ( ) ) )
{
return sax - > parse_error ( chars_read , " <end of file> " ,
2022-08-02 14:28:39 +02:00
parse_error : : create ( 110 , chars_read , exception_message ( format , " unexpected end of input " , context ) , nullptr ) ) ;
2020-09-29 13:47:50 +02:00
}
return true ;
}
/*!
@ return a string representation of the last read byte
*/
std : : string get_token_string ( ) const
{
std : : array < char , 3 > cr { { } } ;
2022-08-02 14:28:39 +02:00
static_cast < void > ( ( std : : snprintf ) ( cr . data ( ) , cr . size ( ) , " %.2hhX " , static_cast < unsigned char > ( current ) ) ) ; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
2020-09-29 13:47:50 +02:00
return std : : string { cr . data ( ) } ;
}
/*!
@ param [ in ] format the current format
@ param [ in ] detail a detailed error message
@ param [ in ] context further context information
@ return a message string to use in the parse_error exceptions
*/
std : : string exception_message ( const input_format_t format ,
const std : : string & detail ,
const std : : string & context ) const
{
std : : string error_msg = " syntax error while parsing " ;
switch ( format )
{
case input_format_t : : cbor :
error_msg + = " CBOR " ;
break ;
case input_format_t : : msgpack :
error_msg + = " MessagePack " ;
break ;
case input_format_t : : ubjson :
error_msg + = " UBJSON " ;
break ;
case input_format_t : : bson :
error_msg + = " BSON " ;
break ;
2022-08-02 14:28:39 +02:00
case input_format_t : : bjdata :
error_msg + = " BJData " ;
break ;
2021-12-11 23:32:21 +01:00
case input_format_t : : json : // LCOV_EXCL_LINE
2020-09-29 13:47:50 +02:00
default : // LCOV_EXCL_LINE
2021-12-11 23:32:21 +01:00
JSON_ASSERT ( false ) ; // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
2020-09-29 13:47:50 +02:00
}
2022-08-02 14:28:39 +02:00
return concat ( error_msg , ' ' , context , " : " , detail ) ;
2020-09-29 13:47:50 +02:00
}
private :
/// input adapter
InputAdapterType ia ;
/// the current character
char_int_type current = std : : char_traits < char_type > : : eof ( ) ;
/// the number of characters read
std : : size_t chars_read = 0 ;
2022-08-02 14:28:39 +02:00
/// whether we can assume little endianness
const bool is_little_endian = little_endianness ( ) ;
/// input format
const input_format_t input_format = input_format_t : : json ;
2020-09-29 13:47:50 +02:00
/// the SAX parser
json_sax_t * sax = nullptr ;
} ;
2022-08-02 14:28:39 +02:00
2020-09-29 13:47:50 +02:00
} // namespace detail
2022-08-02 14:28:39 +02:00
NLOHMANN_JSON_NAMESPACE_END