ujson
Complete and simple JSON reader and writer written in C
All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros
ujson_utf.h
Go to the documentation of this file.
1 // SPDX-License-Identifier: LGPL-2.1-or-later
2 /*
3  * Copyright (C) 2022-2024 Cyril Hrubis <metan@ucw.cz>
4  */
5 
11 #ifndef UJSON_UTF_H
12 #define UJSON_UTF_H
13 
14 #include <stdint.h>
15 #include <stddef.h>
16 
18 #define UJSON_UTF8_IS_ASCII(ch) (!((ch) & 0x80))
20 #define UJSON_UTF8_IS_NBYTE(ch) (((ch) & 0xc0) == 0x80)
22 #define UJSON_UTF8_IS_2BYTE(ch) (((ch) & 0xe0) == 0xc0)
24 #define UJSON_UTF8_IS_3BYTE(ch) (((ch) & 0xf0) == 0xe0)
26 #define UJSON_UTF8_IS_4BYTE(ch) (((ch) & 0xf8) == 0xf0)
27 
28 #define UJSON_UTF8_NBYTE_MASK 0x3f
29 
35 static inline uint32_t ujson_utf8_next(const char **str)
36 {
37  uint32_t s0 = *str[0];
38 
39  (*str)++;
40 
41  if (UJSON_UTF8_IS_ASCII(s0))
42  return s0;
43 
44  uint32_t s1 = *str[0];
45 
46  if (!UJSON_UTF8_IS_NBYTE(s1))
47  return 0;
48 
49  s1 &= UJSON_UTF8_NBYTE_MASK;
50 
51  (*str)++;
52 
53  if (UJSON_UTF8_IS_2BYTE(s0))
54  return (s0 & 0x1f)<<6 | s1;
55 
56  uint32_t s2 = *str[0];
57 
58  if (!UJSON_UTF8_IS_NBYTE(s2))
59  return 0;
60 
61  s2 &= UJSON_UTF8_NBYTE_MASK;
62 
63  (*str)++;
64 
65  if (UJSON_UTF8_IS_3BYTE(s0))
66  return (s0 & 0x0f)<<12 | s1<<6 | s2;
67 
68  (*str)++;
69 
70  uint32_t s3 = *str[0];
71 
72  if (!UJSON_UTF8_IS_NBYTE(s2))
73  return 0;
74 
75  s3 &= UJSON_UTF8_NBYTE_MASK;
76 
77  if (UJSON_UTF8_IS_4BYTE(s0))
78  return (s0 & 0x07)<<18 | s1<<12 | s2<<6 | s3;
79 
80  return 0;
81 }
82 
90 int8_t ujson_utf8_next_chsz(const char *str, size_t off);
91 
99 int8_t ujson_utf8_prev_chsz(const char *str, size_t off);
100 
110 size_t ujson_utf8_strlen(const char *str);
111 
118 static inline unsigned int ujson_utf8_bytes(uint32_t unicode)
119 {
120  if (unicode < 0x0080)
121  return 1;
122 
123  if (unicode < 0x0800)
124  return 2;
125 
126  if (unicode < 0x10000)
127  return 3;
128 
129  return 4;
130 }
131 
141 static inline int ujson_to_utf8(uint32_t unicode, char *buf)
142 {
143  if (unicode < 0x0080) {
144  buf[0] = unicode & 0x007f;
145  return 1;
146  }
147 
148  if (unicode < 0x0800) {
149  buf[0] = 0xc0 | (0x1f & (unicode>>6));
150  buf[1] = 0x80 | (0x3f & unicode);
151  return 2;
152  }
153 
154  if (unicode < 0x10000) {
155  buf[0] = 0xe0 | (0x0f & (unicode>>12));
156  buf[1] = 0x80 | (0x3f & (unicode>>6));
157  buf[2] = 0x80 | (0x3f & unicode);
158  return 3;
159  }
160 
161  buf[0] = 0xf0 | (0x07 & (unicode>>18));
162  buf[1] = 0x80 | (0x3f & (unicode>>12));
163  buf[2] = 0x80 | (0x3f & (unicode>>6));
164  buf[3] = 0x80 | (0x3f & unicode);
165  return 4;
166 }
167 
168 #endif /* UJSON_UTF_H */
size_t ujson_utf8_strlen(const char *str)
Returns a number of characters in UTF-8 string.
#define UJSON_UTF8_IS_2BYTE(ch)
Definition: ujson_utf.h:22
static uint32_t ujson_utf8_next(const char **str)
Parses next unicode character in UTF-8 string.
Definition: ujson_utf.h:35
#define UJSON_UTF8_IS_ASCII(ch)
Definition: ujson_utf.h:18
static int ujson_to_utf8(uint32_t unicode, char *buf)
Writes an unicode character into a UTF-8 buffer.
Definition: ujson_utf.h:141
int8_t ujson_utf8_next_chsz(const char *str, size_t off)
Returns number of bytes next character is occupying in an UTF-8 string.
static unsigned int ujson_utf8_bytes(uint32_t unicode)
Returns a number of bytes needed to store unicode character into UTF-8.
Definition: ujson_utf.h:118
#define UJSON_UTF8_IS_3BYTE(ch)
Definition: ujson_utf.h:24
int8_t ujson_utf8_prev_chsz(const char *str, size_t off)
Returns number of bytes previous character is occupying in an UTF-8 string.
#define UJSON_UTF8_IS_4BYTE(ch)
Definition: ujson_utf.h:26
#define UJSON_UTF8_IS_NBYTE(ch)
Definition: ujson_utf.h:20