-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathutfconvertor.h
More file actions
157 lines (133 loc) · 5.89 KB
/
Copy pathutfconvertor.h
File metadata and controls
157 lines (133 loc) · 5.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#pragma once
#include <cpputils/utfcvutils.h>
#include <tuple>
namespace {
/* anonymous namespace for translating char types to types defined by utfcvutils.h */
template<int SIZE> struct chartypes { };
template<> struct chartypes<1> { typedef utf8char_t CHAR; };
template<> struct chartypes<2> { typedef utf16char_t CHAR; };
template<> struct chartypes<4> { typedef utf32char_t CHAR; };
template<int SIZE> struct unsignedforsize { };
template<> struct unsignedforsize<1> { typedef uint8_t type; };
template<> struct unsignedforsize<2> { typedef uint16_t type; };
template<> struct unsignedforsize<4> { typedef uint32_t type; };
template<> struct unsignedforsize<8> { typedef uint64_t type; };
/* basically stringcopy, taking care of terminating NUL, returning used items */
template<typename D, typename S>
auto identityconvert(S src, S send, D dst, D dend)
{
while (src<send && dst<dend)
*dst++ = *src++;
return std::make_tuple(src, dst);
}
}
/* char_cast converts any integer type 'T' to the correct char type of the same byte-size, as required by the utfcvutils functions */
template<typename T>
const typename chartypes<sizeof(T)>::CHAR* char_cast(const T *p)
{
typedef const typename chartypes<sizeof(T)>::CHAR* result_type;
return reinterpret_cast<result_type>(p);
}
template<typename T>
typename chartypes<sizeof(T)>::CHAR* char_cast(T *p)
{
typedef typename chartypes<sizeof(T)>::CHAR* result_type;
return reinterpret_cast<result_type>(p);
}
/* cast_to_char converts to the correct char type, given the number of bytes 'TO' needed in the type */
template<size_t TO, typename T>
const typename chartypes<TO>::CHAR* cast_to_char(const T *p)
{
typedef const typename chartypes<TO>::CHAR* result_type;
return reinterpret_cast<result_type>(p);
}
/* utfconvertor template converts from utf<FROM> to utf<TO> encoding
*
* it has two static functions:
* convert : converts utf<FROM> codepoints from <src> in to utf<TO> codepoints in <dst>
* max <maxsize> codeunits are written to <dst> including the terminating NUL.
* The number of codeunits used from <src> is returned.
* maxsize : gives a quick calculation of the maximum possible number of codeunits required
* for converting <from> utf<FROM> codeunits.
*/
template<int FROM, int TO>
struct utfconvertor {
// [ sused, dused ] convert(*src, *dst, maxsize);
// size_t maxsize(size_t from)
};
template<>
struct utfconvertor<1,2> { enum { FROM=1, TO=2 };
template<typename D, typename S>
static auto convert(S src, S send, D dst, D dend) { return utf8toutf16(src, send, dst, dend); }
template<typename S>
static auto countneeded(S src, S end) { return utf8toutf16needed(src, end); }
static size_t maxsize(size_t from) { return from; }
};
template<>
struct utfconvertor<2,1> { enum { FROM=2, TO=1 };
template<typename D, typename S>
static auto convert(S src, S send, D dst, D dend) { return utf16toutf8(src, send, dst, dend); }
template<typename S>
static auto countneeded(S src, S end) { return utf16toutf8needed(src, end); }
// for values between 0x800 and 0xffff you need 3 bytes in utf-8, and only 2 in utf-16
static size_t maxsize(size_t from) { return 3*from; }
};
template<>
struct utfconvertor<1,4> { enum { FROM=1, TO=4 };
template<typename D, typename S>
static auto convert(S src, S send, D dst, D dend) { return utf8toutf32(src, send, dst, dend); }
template<typename S>
static auto countneeded(S src, S end) { return utf8toutf32needed(src, end); }
// values below 0x80 require 4 times more size in utf-32
static size_t maxsize(size_t from) { return from; }
};
template<>
struct utfconvertor<4,1> { enum { FROM=4, TO=1 };
template<typename D, typename S>
static auto convert(S src, S send, D dst, D dend) { return utf32toutf8(src, send, dst, dend); }
template<typename S>
static auto countneeded(S src, S end) { return utf32toutf8needed(src, end); }
// values above 0x10000 require 4 utf-8 bytes
static size_t maxsize(size_t from) { return 4*from; }
};
template<>
struct utfconvertor<4,2> { enum { FROM=4, TO=2 };
template<typename D, typename S>
static auto convert(S src, S send, D dst, D dend) { return utf32toutf16(src, send, dst, dend); }
template<typename S>
static auto countneeded(S src, S end) { return utf32toutf16needed(src, end); }
// values above 0x10000 require 2 utf-16 bytes
static size_t maxsize(size_t from) { return 2*from; }
};
template<>
struct utfconvertor<2,4> { enum { FROM=2, TO=4 };
template<typename D, typename S>
static auto convert(S src, S send, D dst, D dend) { return utf16toutf32(src, send, dst, dend); }
template<typename S>
static auto countneeded(S src, S end) { return utf16toutf32needed(src, end); }
static size_t maxsize(size_t from) { return from; }
};
template<>
struct utfconvertor<1,1> { enum { FROM=1, TO=1 };
template<typename D, typename S>
static auto convert(S src, S send, D dst, D dend) { return identityconvert(src, send, dst, dend); }
template<typename S>
static auto countneeded(S src, S end) { return std::distance(src, end); }
static size_t maxsize(size_t from) { return from; }
};
template<>
struct utfconvertor<2,2> { enum { FROM=2, TO=2 };
template<typename D, typename S>
static auto convert(S src, S send, D dst, D dend) { return identityconvert(src, send, dst, dend); }
template<typename S>
static auto countneeded(S src, S end) { return std::distance(src, end); }
static size_t maxsize(size_t from) { return from; }
};
template<>
struct utfconvertor<4,4> { enum { FROM=4, TO=4 };
template<typename D, typename S>
static auto convert(S src, S send, D dst, D dend) { return identityconvert(src, send, dst, dend); }
template<typename S>
static auto countneeded(S src, S end) { return std::distance(src, end); }
static size_t maxsize(size_t from) { return from; }
};