Libparserutils
utf8.c
Go to the documentation of this file.
1 /*
2  * This file is part of LibParserUtils.
3  * Licensed under the MIT License,
4  * http://www.opensource.org/licenses/mit-license.php
5  * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
6  */
7 
12 #include <stdbool.h>
13 #include <stdlib.h>
14 #include <string.h>
15 
18 
20 const uint8_t numContinuations[256] = {
21  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
36  3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
37 };
38 
52  uint32_t *ucs4, size_t *clen)
53 {
54  parserutils_error error;
55 
56  UTF8_TO_UCS4(s, len, ucs4, clen, error);
57 
58  return error;
59 }
60 
73  uint8_t **s, size_t *len)
74 {
75  parserutils_error error;
76 
77  UTF8_FROM_UCS4(ucs4, s, len, error);
78 
79  return error;
80 }
81 
91  size_t *len)
92 {
93  parserutils_error error;
94 
95  UTF8_LENGTH(s, max, len, error);
96 
97  return error;
98 }
99 
108  size_t *len)
109 {
110  parserutils_error error;
111 
112  UTF8_CHAR_BYTE_LENGTH(s, len, error);
113 
114  return error;
115 }
116 
126 parserutils_error parserutils_charset_utf8_prev(const uint8_t *s, uint32_t off,
127  uint32_t *prevoff)
128 {
129  parserutils_error error;
130 
131  UTF8_PREV(s, off, prevoff, error);
132 
133  return error;
134 }
135 
147  uint32_t off, uint32_t *nextoff)
148 {
149  parserutils_error error;
150 
151  UTF8_NEXT(s, len, off, nextoff, error);
152 
153  return error;
154 }
155 
167  uint32_t len, uint32_t off, uint32_t *nextoff)
168 {
169  parserutils_error error;
170 
171  UTF8_NEXT_PARANOID(s, len, off, nextoff, error);
172 
173  return error;
174 }
175 
size_t len
Definition: codec_8859.c:23
parserutils_error
Definition: errors.h:18
parserutils_error parserutils_charset_utf8_length(const uint8_t *s, size_t max, size_t *len)
Calculate the length (in characters) of a bounded UTF-8 string.
Definition: utf8.c:90
parserutils_error parserutils_charset_utf8_from_ucs4(uint32_t ucs4, uint8_t **s, size_t *len)
Convert a single UCS-4 character into a UTF-8 multibyte sequence.
Definition: utf8.c:72
parserutils_error parserutils_charset_utf8_next_paranoid(const uint8_t *s, uint32_t len, uint32_t off, uint32_t *nextoff)
Find next legal UTF-8 char in string.
Definition: utf8.c:166
parserutils_error parserutils_charset_utf8_to_ucs4(const uint8_t *s, size_t len, uint32_t *ucs4, size_t *clen)
Convert a UTF-8 multibyte sequence into a single UCS-4 character.
Definition: utf8.c:51
const uint8_t numContinuations[256]
Number of continuation bytes for a given start byte.
Definition: utf8.c:20
parserutils_error parserutils_charset_utf8_next(const uint8_t *s, uint32_t len, uint32_t off, uint32_t *nextoff)
Find next legal UTF-8 char in string.
Definition: utf8.c:146
parserutils_error parserutils_charset_utf8_prev(const uint8_t *s, uint32_t off, uint32_t *prevoff)
Find previous legal UTF-8 char in string.
Definition: utf8.c:126
parserutils_error parserutils_charset_utf8_char_byte_length(const uint8_t *s, size_t *len)
Calculate the length (in bytes) of a UTF-8 character.
Definition: utf8.c:107
UTF-8 manipulation functions (interface).
UTF-8 manipulation macros (implementation).
#define UTF8_TO_UCS4(s, len, ucs4, clen, error)
Convert a UTF-8 multibyte sequence into a single UCS-4 character.
Definition: utf8impl.h:34
#define UTF8_PREV(s, off, prevoff, error)
Find previous legal UTF-8 char in string.
Definition: utf8impl.h:249
#define UTF8_FROM_UCS4(ucs4, s, len, error)
Convert a single UCS-4 character into a UTF-8 multibyte sequence.
Definition: utf8impl.h:123
#define UTF8_NEXT(s, len, off, nextoff, error)
Find next legal UTF-8 char in string.
Definition: utf8impl.h:274
#define UTF8_LENGTH(s, max, len, error)
Calculate the length (in characters) of a bounded UTF-8 string.
Definition: utf8impl.h:182
#define UTF8_CHAR_BYTE_LENGTH(s, len, error)
Calculate the length (in bytes) of a UTF-8 character.
Definition: utf8impl.h:228
#define UTF8_NEXT_PARANOID(s, len, off, nextoff, error)
Skip to start of next sequence in UTF-8 input.
Definition: utf8impl.h:303
#define max(a, b)
Definition: utils.h:12