1/*
2 * LegacyClonk
3 *
4 * Copyright (c) 2024, The LegacyClonk Team and contributors
5 *
6 * Distributed under the terms of the ISC license; see accompanying file
7 * "COPYING" for details.
8 *
9 * "Clonk" is a registered trademark of Matthes Bender, used with permission.
10 * See accompanying file "TRADEMARK" for details.
11 *
12 * To redistribute this file separately, substitute the full license texts
13 * for the above references.
14 */
15
16#pragma once
17
18#include <cstring>
19#include <string>
20#include <string_view>
21
22#ifdef _WIN32
23#include "StdStringEncodingConverter.h"
24#elif defined(HAVE_ICONV)
25#include <functional>
26#include <mutex>
27#include <utility>
28
29#include <iconv.h>
30#endif
31
32enum class C4TextEncoding
33{
34 Ansi,
35 Clonk = Ansi,
36#ifdef _WIN32
37 System = Ansi,
38#endif
39 Utf8,
40#ifdef _WIN32
41 Utf16,
42#else
43 System = Utf8,
44#endif
45};
46
47
48class C4TextEncodingConverter
49{
50#ifdef HAVE_ICONV
51private:
52 class IconvPtr
53 {
54 public:
55 IconvPtr() = default;
56 IconvPtr(iconv_t ptr) noexcept : ptr{ptr} {}
57
58 IconvPtr(const IconvPtr &) = delete;
59 IconvPtr &operator=(const IconvPtr &ptr) = delete;
60 IconvPtr(IconvPtr &&other) noexcept : ptr{std::exchange(obj&: other.ptr, new_val: Invalid())} {}
61 IconvPtr &operator=(IconvPtr &&other) noexcept
62 {
63 reset();
64 ptr = std::exchange(obj&: other.ptr, new_val: Invalid());
65 return *this;
66 }
67
68 ~IconvPtr() noexcept
69 {
70 reset();
71 }
72
73 public:
74 void reset()
75 {
76 if (*this)
77 {
78 iconv_close(cd: std::exchange(obj&: ptr, new_val: Invalid()));
79 }
80 }
81
82 iconv_t get() const noexcept
83 {
84 return ptr;
85 }
86
87 explicit operator bool() const noexcept
88 {
89 return ptr != Invalid();
90 }
91
92 const iconv_t *operator &() const noexcept
93 {
94 return &ptr;
95 }
96
97 static iconv_t Invalid() noexcept
98 {
99 return reinterpret_cast<iconv_t>(-1);
100 }
101
102 private:
103 iconv_t ptr{Invalid()};
104 };
105#endif
106
107private:
108 template<typename CharType, C4TextEncoding Encoding>
109 static consteval bool IsCompatible()
110 {
111 switch (Encoding)
112 {
113 case C4TextEncoding::Ansi:
114 return std::is_same_v<CharType, char>;
115
116 case C4TextEncoding::Utf8:
117 return std::is_same_v<CharType, char> || std::is_same_v<CharType, char8_t>;
118
119#ifdef _WIN32
120 case C4TextEncoding::Utf16:
121 return std::is_same_v<CharType, wchar_t>;
122#endif
123 }
124
125 return false;
126 }
127
128 template<typename OutputString, typename InputCharType, typename InputCharTraits>
129 static OutputString CopyThrough(const std::basic_string_view<InputCharType, InputCharTraits> input)
130 {
131 using OutputCharType = typename OutputString::value_type;
132 if constexpr (std::is_same_v<InputCharType, OutputCharType>)
133 {
134 return OutputString{input};
135 }
136 else if constexpr (sizeof(InputCharType) == sizeof(OutputCharType))
137 {
138 // Easy conversion between u8string and string
139 OutputString result;
140 result.resize_and_overwrite(input.size(), [&input](OutputCharType *const ptr, const std::size_t size)
141 {
142 std::memcpy(dest: ptr, src: input.data(), n: size * sizeof(OutputCharType));
143 return size;
144 });
145
146 return result;
147 }
148 }
149
150#ifdef _WIN32
151 static consteval std::uint32_t ToCodePage(const C4TextEncoding encoding)
152 {
153 switch (encoding)
154 {
155 case C4TextEncoding::Ansi:
156 return CP_ACP;
157
158 case C4TextEncoding::Utf8:
159 return CP_UTF8;
160
161 default:
162 throw "Invalid encoding";
163 }
164 }
165#elif defined(HAVE_ICONV)
166 static consteval IconvPtr C4TextEncodingConverter::*GetConverter(const C4TextEncoding from, const C4TextEncoding to)
167 {
168 if (from == C4TextEncoding::Clonk && to == C4TextEncoding::System)
169 {
170 return &C4TextEncodingConverter::clonkToSystem;
171 }
172 else if (from == C4TextEncoding::System && to == C4TextEncoding::Clonk)
173 {
174 return &C4TextEncodingConverter::systemToClonk;
175 }
176 else if (from == C4TextEncoding::Clonk && to == C4TextEncoding::Utf8)
177 {
178 return &C4TextEncodingConverter::clonkToUtf8;
179 }
180 else if (from == C4TextEncoding::Utf8 && to == C4TextEncoding::Clonk)
181 {
182 return &C4TextEncodingConverter::utf8ToClonk;
183 }
184 else
185 {
186 throw "Invalid encoding";
187 }
188 }
189#endif
190
191public:
192 C4TextEncodingConverter() = default;
193
194public:
195 template<
196 C4TextEncoding From,
197 C4TextEncoding To,
198 typename _OutputCharType = void,
199 typename _OutputCharTraits = void,
200 typename _OutputAlloc = void,
201 typename InputCharType,
202 typename InputCharTraits
203 >
204 auto Convert(const std::basic_string<InputCharType, InputCharTraits> &input)
205 {
206 return Convert<From, To, _OutputCharType, _OutputCharTraits, _OutputAlloc>(static_cast<std::basic_string_view<InputCharType, InputCharTraits>>(input));
207 }
208
209 template<
210 C4TextEncoding From,
211 C4TextEncoding To,
212 typename _OutputCharType = void,
213 typename _OutputCharTraits = void,
214 typename _OutputAlloc = void,
215 typename InputCharType,
216 typename InputCharTraits
217 >
218 auto Convert(std::basic_string_view<InputCharType, InputCharTraits> input)
219 {
220 using OutputCharType = std::conditional_t<
221 std::is_same_v<_OutputCharType, void>,
222 std::conditional_t<
223 To == C4TextEncoding::Utf8,
224 char8_t,
225#ifdef _WIN32
226 std::conditional_t<
227 To == C4TextEncoding::Utf16,
228 wchar_t,
229 char>
230 >,
231#else
232 char>,
233#endif
234 _OutputCharType>;
235
236 using OutputCharTraits = std::conditional_t<
237 std::is_same_v<_OutputCharTraits, void>,
238 std::char_traits<OutputCharType>,
239 _OutputCharTraits>;
240
241 using OutputAlloc = std::conditional_t<
242 std::is_same_v<_OutputAlloc, void>,
243 std::allocator<OutputCharType>,
244 _OutputAlloc>;
245
246 static_assert(std::is_integral_v<InputCharType> && std::is_integral_v<OutputCharType>);
247 static_assert(IsCompatible<InputCharType, From>() && IsCompatible<OutputCharType, To>());
248
249 using OutputString = std::basic_string<OutputCharType, OutputCharTraits, OutputAlloc>;
250
251#ifndef _WIN32
252 static_assert(!std::is_same_v<InputCharType, wchar_t> && !std::is_same_v<OutputCharType, wchar_t>, "wchar_t is not supported on this platform");
253#endif
254
255 if (input.empty())
256 {
257 return OutputString{};
258 }
259
260 if constexpr (From == To)
261 {
262 return CopyThrough<OutputString>(input);
263 }
264
265#ifdef _WIN32
266 if constexpr (std::is_same_v<InputCharType, wchar_t>)
267 {
268 OutputString result;
269 result.resize_and_overwrite(StdStringEncodingConverter::WideCharToMultiByte(ToCodePage(To), input, {}), [&input](OutputCharType *const ptr, const std::size_t size)
270 {
271 return StdStringEncodingConverter::WideCharToMultiByte(ToCodePage(To), input, {reinterpret_cast<char *>(ptr), size});
272 });
273
274 return result;
275 }
276 else if constexpr (std::is_same_v<OutputCharType, wchar_t>)
277 {
278 OutputString result;
279 const std::span<const char> inputChar{reinterpret_cast<const char *>(input.data()), input.size()};
280
281 result.resize_and_overwrite(StdStringEncodingConverter::MultiByteToWideChar(ToCodePage(From), inputChar, {}), [&inputChar](OutputCharType *const ptr, const std::size_t size)
282 {
283 return StdStringEncodingConverter::MultiByteToWideChar(ToCodePage(From), inputChar, {ptr, size});
284 });
285
286 return result;
287 }
288 else
289 {
290 using TempAlloc = typename std::allocator_traits<OutputAlloc>::template rebind_alloc<wchar_t>;
291 auto temp = Convert<From, C4TextEncoding::Utf16, wchar_t, void, TempAlloc>(input);
292 return Convert<C4TextEncoding::Utf16, To, OutputCharType, OutputCharTraits, OutputAlloc>(temp);
293 }
294#elif defined(HAVE_ICONV)
295 IconvPtr &converterPtr{std::invoke(fn: GetConverter(from: From, to: To), args: this)};
296
297 const std::lock_guard lock{iconvMutex};
298
299 if (!converterPtr)
300 {
301 return CopyThrough<OutputString>(input);
302 }
303
304 const iconv_t converter{converterPtr.get()};
305
306 OutputString result;
307
308 // Reset converter
309 iconv(cd: converter, inbuf: nullptr, inbytesleft: nullptr, outbuf: nullptr, outbytesleft: nullptr);
310
311 result.resize(input.size());
312 const char *inbuf{reinterpret_cast<const char *>(input.data())};
313 std::size_t inlen{input.size()};
314 char *outbuf{reinterpret_cast<char *>(result.data())};
315 std::size_t outlen{result.size()};
316
317 while (inlen > 0)
318 {
319 if (iconv(cd: converter, inbuf: const_cast<ICONV_CONST char **>(&inbuf), inbytesleft: &inlen, outbuf: &outbuf, outbytesleft: &outlen) == static_cast<std::size_t>(-1))
320 {
321 switch (errno)
322 {
323 // There is not sufficient room at *outbuf.
324 case E2BIG:
325 {
326 const std::ptrdiff_t done{outbuf - reinterpret_cast<char *>(result.data())};
327 result.resize(result.size() + inlen * 2);
328 outbuf = reinterpret_cast<char *>(result.data()) + done;
329 outlen += inlen * 2;
330 break;
331 }
332 // An invalid multibyte sequence has been encountered in the input.
333 case EILSEQ:
334 ++inbuf;
335 --inlen;
336 break;
337 // An incomplete multibyte sequence has been encountered in the input.
338 case EINVAL:
339 default:
340 break;
341 }
342 }
343 }
344
345 if (outlen)
346 {
347 result.resize(result.size() - outlen);
348 }
349
350 return result;
351#else
352 static_assert(!std::is_same_v<std::type_identity_t<OutputCharType>, OutputCharType>, "Unsupported conversion");
353#endif
354 }
355
356 template<typename CharType = char8_t>
357 std::basic_string<CharType> ClonkToUtf8(const std::string_view input)
358 {
359 return Convert<C4TextEncoding::Clonk, C4TextEncoding::Utf8, CharType>(input);
360 }
361
362 std::string Utf8ToClonk(const std::u8string_view input)
363 {
364 return Convert<C4TextEncoding::Utf8, C4TextEncoding::Clonk>(input);
365 }
366
367 std::string Utf8ToClonk(const std::string_view input)
368 {
369 return Convert<C4TextEncoding::Utf8, C4TextEncoding::Clonk>(input);
370 }
371
372 std::string ClonkToSystem(const std::string_view input)
373 {
374 return Convert<C4TextEncoding::Clonk, C4TextEncoding::System, char>(input);
375 }
376
377 std::string SystemToClonk(const std::string_view input)
378 {
379 return Convert<C4TextEncoding::System, C4TextEncoding::Clonk>(input);
380 }
381
382
383#ifdef HAVE_ICONV
384 void CreateConverters(const char *charsetCodeName);
385#endif
386
387#ifdef HAVE_ICONV
388private:
389 std::mutex iconvMutex;
390 IconvPtr clonkToSystem;
391 IconvPtr systemToClonk;
392 IconvPtr clonkToUtf8;
393 IconvPtr utf8ToClonk;
394#endif
395};
396
397extern C4TextEncodingConverter TextEncodingConverter;
398