1/*
2 * LegacyClonk
3 *
4 * Copyright (c) RedWolf Design
5 * Copyright (c) 2017-2021, The LegacyClonk Team and contributors
6 *
7 * Distributed under the terms of the ISC license; see accompanying file
8 * "COPYING" for details.
9 *
10 * "Clonk" is a registered trademark of Matthes Bender, used with permission.
11 * See accompanying file "TRADEMARK" for details.
12 *
13 * To redistribute this file separately, substitute the full license texts
14 * for the above references.
15 */
16
17#include <Standard.h>
18#include <StdBuf.h>
19#include <StdCompiler.h>
20#include <StdAdaptors.h>
21#include <StdFile.h>
22
23#include <stdarg.h>
24#include <stdio.h>
25#ifdef _WIN32
26#include <io.h>
27#else
28#define O_BINARY 0
29#define O_SEQUENTIAL 0
30#include <unistd.h>
31#include <stdlib.h>
32#endif
33#include <ctype.h>
34#include <fcntl.h>
35#include <sys/stat.h>
36
37#include <ios>
38#include <fstream>
39
40// *** StdBuf
41
42bool StdBuf::LoadFromFile(const char *szFile) try
43{
44 std::ifstream file{szFile, std::ios::binary};
45 // Create buf
46 New(inSize: FileSize(fname: szFile));
47 return file && file.read(s: static_cast<char *>(getMData()), n: getSize());
48}
49catch (const std::ios_base::failure &)
50{
51 return false;
52}
53
54bool StdBuf::SaveToFile(const char *szFile) const try
55{
56 std::ofstream file{szFile, std::ios::binary | std::ios::trunc};
57 return file && file.write(s: static_cast<const char *>(getData()), n: getSize());
58}
59catch (const std::ios_base::failure &)
60{
61 return false;
62}
63
64bool StdStrBuf::LoadFromFile(const char *szFile) try
65{
66 std::ifstream file{szFile, std::ios::binary};
67 // Create buf
68 SetLength(FileSize(fname: szFile));
69 return file && file.read(s: getMData(), n: getLength());
70}
71catch (const std::ios_base::failure &)
72{
73 return false;
74}
75
76bool StdStrBuf::SaveToFile(const char *szFile) const try
77{
78 std::ofstream file{szFile, std::ios::binary | std::ios::trunc};
79 return file && file.write(s: getData(), n: getLength());
80}
81catch (const std::ios_base::failure &)
82{
83 return false;
84}
85
86void StdBuf::CompileFunc(StdCompiler *pComp, int iType)
87{
88 // Size (guess it is a small value most of the time - if it's big, an extra byte won't hurt anyway)
89 auto tmp = static_cast<uint32_t>(iSize); pComp->Value(rStruct: mkIntPackAdapt(rVal&: tmp)); iSize = tmp;
90 pComp->Separator(eSep: StdCompiler::SEP_PART2);
91 // Read/write data
92 if (pComp->isCompiler())
93 {
94 New(inSize: iSize);
95 pComp->Raw(pData: getMData(), iSize, eType: StdCompiler::RawCompileType(iType));
96 }
97 else
98 {
99 pComp->Raw(pData: const_cast<void *>(getData()), iSize, eType: StdCompiler::RawCompileType(iType));
100 }
101}
102
103// *** StdStringBuf
104
105void StdStrBuf::CompileFunc(StdCompiler *pComp, int iRawType)
106{
107 if (pComp->isCompiler())
108 {
109 std::string data;
110 pComp->String(str&: data, type: StdCompiler::RawCompileType(iRawType));
111 Copy(pnData: data.c_str(), iChars: data.size());
112 }
113 else
114 {
115 // pData is only read anyway, since it is a decompiler
116 const char *data{getData()};
117 if (!data)
118 {
119 data = "";
120 }
121 pComp->String(string: data, maxLength: getLength(), type: StdCompiler::RawCompileType(iRawType));
122 }
123}
124
125// replace all occurences of one string with another. Return number of replacements.
126int StdStrBuf::Replace(const char *szOld, const char *szNew, size_t iStartSearch)
127{
128 if (!getPtr(i: 0) || !szOld) return 0;
129 if (!szNew) szNew = "";
130 int cnt = 0;
131 size_t iOldLen = strlen(s: szOld), iNewLen = strlen(s: szNew);
132 if (iOldLen != iNewLen)
133 {
134 // count number of occurences to calculate new string length
135 size_t iResultLen = getLength();
136 const char *szPos = getPtr(i: iStartSearch);
137 while (szPos = SSearch(szString: szPos, szIndex: szOld))
138 {
139 iResultLen += iNewLen - iOldLen;
140 ++cnt;
141 }
142 if (!cnt) return 0;
143 // now construct new string by replacement
144 StdStrBuf sResult;
145 sResult.New(inSize: iResultLen + 1);
146 const char *szRPos = getPtr(i: 0), *szRNextPos;
147 char *szWrite = sResult.getMPtr(i: 0);
148 if (iStartSearch)
149 {
150 memcpy(dest: szWrite, src: szRPos, n: iStartSearch * sizeof(char));
151 szRPos += iStartSearch;
152 szWrite += iStartSearch;
153 }
154 while (szRNextPos = SSearch(szString: szRPos, szIndex: szOld))
155 {
156 memcpy(dest: szWrite, src: szRPos, n: (szRNextPos - szRPos - iOldLen) * sizeof(char));
157 szWrite += (szRNextPos - szRPos - iOldLen);
158 memcpy(dest: szWrite, src: szNew, n: iNewLen * sizeof(char));
159 szWrite += iNewLen;
160 szRPos = szRNextPos;
161 }
162 strcpy(dest: szWrite, src: szRPos);
163 Take(Buf2&: sResult);
164 }
165 else
166 {
167 // replace directly in this string
168 char *szRPos = getMPtr(i: iStartSearch);
169 while (szRPos = const_cast<char *>(SSearch(szString: szRPos, szIndex: szOld)))
170 {
171 memcpy(dest: szRPos - iOldLen, src: szNew, n: iOldLen * sizeof(char));
172 ++cnt;
173 }
174 }
175 return cnt;
176}
177
178int StdStrBuf::ReplaceChar(char cOld, char cNew, size_t iStartSearch)
179{
180 if (isNull()) return 0;
181 char *szPos = getMPtr(i: 0);
182 if (!cOld) return 0;
183 if (!cNew) cNew = '_';
184 int cnt = 0;
185 while (szPos = strchr(s: szPos, c: cOld))
186 {
187 *szPos++ = cNew;
188 ++cnt;
189 }
190 return cnt;
191}
192
193void StdStrBuf::ReplaceEnd(size_t iPos, const char *szNewEnd)
194{
195 size_t iLen = getLength();
196 assert(iPos <= iLen); if (iPos > iLen) return;
197 size_t iEndLen = strlen(s: szNewEnd);
198 if (iLen - iPos != iEndLen) SetLength(iPos + iEndLen);
199 memcpy(dest: getMPtr(i: iPos), src: szNewEnd, n: iEndLen * sizeof(char));
200}
201
202bool StdStrBuf::ValidateChars(const char *szInitialChars, const char *szMidChars)
203{
204 // only given chars may be in string
205 for (size_t i = 0; i < getLength(); ++i)
206 if (!strchr(s: i ? szMidChars : szInitialChars, c: getData()[i]))
207 return false;
208 return true;
209}
210
211bool StdStrBuf::GetSection(size_t idx, StdStrBuf *psOutSection, char cSeparator) const
212{
213 assert(psOutSection);
214 psOutSection->Clear();
215 const char *szStr = getData(), *szSepPos;
216 if (!szStr) return false; // invalid argument
217 while ((szSepPos = strchr(s: szStr, c: cSeparator)) && idx) { szStr = szSepPos + 1; --idx; }
218 if (idx) return false; // indexed section not found
219 // fill output buffer with section, if not empty
220 if (!szSepPos) szSepPos = getData() + getLength();
221 if (szSepPos != szStr) psOutSection->Copy(pnData: szStr, iChars: szSepPos - szStr);
222 // return true even if section is empty, because the section obviously exists
223 // (to enable loops like while (buf.GetSection(i++, &sect)) if (sect) ...)
224 return true;
225}
226
227void StdStrBuf::EnsureUnicode()
228{
229 bool valid = true;
230 int need_continuation_bytes = 0;
231 // Check wether valid UTF-8
232 for (size_t i = 0; i < getSize(); ++i)
233 {
234 unsigned char c = *getPtr(i);
235 // remaining of a code point started before
236 if (need_continuation_bytes)
237 {
238 --need_continuation_bytes;
239 // (10000000-10111111)
240 if (0x80 <= c && c <= 0xBF)
241 continue;
242 else
243 {
244 valid = false;
245 break;
246 }
247 }
248 // ASCII
249 if (c < 0x80)
250 continue;
251 // Two byte sequence (11000010-11011111)
252 // Note: 1100000x is an invalid overlong sequence
253 if (0xC2 <= c && c <= 0xDF)
254 {
255 need_continuation_bytes = 1;
256 continue;
257 }
258 // Three byte sequence (11100000-11101111)
259 if (0xE0 <= c && c <= 0xEF)
260 {
261 need_continuation_bytes = 2;
262 continue;
263 // FIXME: could check for UTF-16 surrogates from a broken utf-16->utf-8 converter here
264 }
265 // Four byte sequence (11110000-11110100)
266 if (0xF0 <= c && c <= 0xF4)
267 {
268 need_continuation_bytes = 3;
269 continue;
270 }
271 valid = false;
272 break;
273 }
274 if (need_continuation_bytes)
275 valid = false;
276 // assume that it's windows-1252 and convert to utf-8
277 if (!valid)
278 {
279 size_t j = 0;
280 StdStrBuf buf;
281 buf.Grow(iGrow: getLength());
282 // totally unfounded statistic: most texts have less than 20 umlauts.
283 enum { GROWSIZE = 20 };
284 for (size_t i = 0; i < getSize(); ++i)
285 {
286 unsigned char c = *getPtr(i);
287 if (c < 0x80)
288 {
289 if (j >= buf.getLength())
290 buf.Grow(iGrow: GROWSIZE);
291 *buf.getMPtr(i: j++) = c;
292 continue;
293 }
294 if (0xA0 <= c)
295 {
296 if (j + 1 >= buf.getLength())
297 buf.Grow(iGrow: GROWSIZE);
298 *buf.getMPtr(i: j++) = (0xC0 | c >> 6);
299 *buf.getMPtr(i: j++) = (0x80 | c & 0x3F);
300 continue;
301 }
302 // Extra windows-1252-characters
303 buf.SetLength(j);
304 // Let's hope that no editor mangles these UTF-8 strings...
305 static const char *extra_chars[] =
306 {
307 "€", "?", "‚", "ƒ", "„", "…", "†", "‡", "ˆ", "‰", "Š", "‹", "Œ", "?", "Ž", "?",
308 "?", "‘", "’", "“", "”", "•", "–", "—", "˜", "™", "š", "›", "œ", "?", "ž", "Ÿ"
309 };
310 buf.Append(pnData: extra_chars[c - 0x80]);
311 j += strlen(s: extra_chars[c - 0x80]);
312 }
313 buf.SetLength(j);
314 Take(Buf2&: buf);
315 }
316}
317
318bool StdStrBuf::TrimSpaces()
319{
320 // get left trim
321 size_t iSpaceLeftCount = 0, iLength = getLength();
322 if (!iLength) return false;
323 const char *szStr = getData();
324 while (iSpaceLeftCount < iLength)
325 if (isspace(static_cast<unsigned char>(szStr[iSpaceLeftCount])))
326 ++iSpaceLeftCount;
327 else
328 break;
329 // only spaces? Clear!
330 if (iSpaceLeftCount == iLength)
331 {
332 Clear();
333 return true;
334 }
335 // get right trim
336 size_t iSpaceRightCount = 0;
337 while (isspace(static_cast<unsigned char>(szStr[iLength - 1 - iSpaceRightCount]))) ++iSpaceRightCount;
338 // anything to trim?
339 if (!iSpaceLeftCount && !iSpaceRightCount) return false;
340 // only right trim? Can do this by shortening
341 if (!iSpaceLeftCount)
342 {
343 SetLength(iLength - iSpaceRightCount);
344 return true;
345 }
346 // left trim involved - move text and shorten
347 memmove(dest: getMPtr(i: 0), src: szStr + iSpaceLeftCount, n: iLength - iSpaceLeftCount - iSpaceRightCount);
348 SetLength(iLength - iSpaceLeftCount - iSpaceRightCount);
349 return true;
350}
351