| 1 | /* |
| 2 | * LegacyClonk |
| 3 | * |
| 4 | * Copyright (c) RedWolf Design |
| 5 | * Copyright (c) 2017-2021, The LegacyClonk Team and contributors |
| 6 | * |
| 7 | * Distributed under the terms of the ISC license; see accompanying file |
| 8 | * "COPYING" for details. |
| 9 | * |
| 10 | * "Clonk" is a registered trademark of Matthes Bender, used with permission. |
| 11 | * See accompanying file "TRADEMARK" for details. |
| 12 | * |
| 13 | * To redistribute this file separately, substitute the full license texts |
| 14 | * for the above references. |
| 15 | */ |
| 16 | |
| 17 | #include <Standard.h> |
| 18 | #include <StdBuf.h> |
| 19 | #include <StdCompiler.h> |
| 20 | #include <StdAdaptors.h> |
| 21 | #include <StdFile.h> |
| 22 | |
| 23 | #include <stdarg.h> |
| 24 | #include <stdio.h> |
| 25 | #ifdef _WIN32 |
| 26 | #include <io.h> |
| 27 | #else |
| 28 | #define O_BINARY 0 |
| 29 | #define O_SEQUENTIAL 0 |
| 30 | #include <unistd.h> |
| 31 | #include <stdlib.h> |
| 32 | #endif |
| 33 | #include <ctype.h> |
| 34 | #include <fcntl.h> |
| 35 | #include <sys/stat.h> |
| 36 | |
| 37 | #include <ios> |
| 38 | #include <fstream> |
| 39 | |
| 40 | // *** StdBuf |
| 41 | |
| 42 | bool StdBuf::LoadFromFile(const char *szFile) try |
| 43 | { |
| 44 | std::ifstream file{szFile, std::ios::binary}; |
| 45 | // Create buf |
| 46 | New(inSize: FileSize(fname: szFile)); |
| 47 | return file && file.read(s: static_cast<char *>(getMData()), n: getSize()); |
| 48 | } |
| 49 | catch (const std::ios_base::failure &) |
| 50 | { |
| 51 | return false; |
| 52 | } |
| 53 | |
| 54 | bool StdBuf::SaveToFile(const char *szFile) const try |
| 55 | { |
| 56 | std::ofstream file{szFile, std::ios::binary | std::ios::trunc}; |
| 57 | return file && file.write(s: static_cast<const char *>(getData()), n: getSize()); |
| 58 | } |
| 59 | catch (const std::ios_base::failure &) |
| 60 | { |
| 61 | return false; |
| 62 | } |
| 63 | |
| 64 | bool StdStrBuf::LoadFromFile(const char *szFile) try |
| 65 | { |
| 66 | std::ifstream file{szFile, std::ios::binary}; |
| 67 | // Create buf |
| 68 | SetLength(FileSize(fname: szFile)); |
| 69 | return file && file.read(s: getMData(), n: getLength()); |
| 70 | } |
| 71 | catch (const std::ios_base::failure &) |
| 72 | { |
| 73 | return false; |
| 74 | } |
| 75 | |
| 76 | bool StdStrBuf::SaveToFile(const char *szFile) const try |
| 77 | { |
| 78 | std::ofstream file{szFile, std::ios::binary | std::ios::trunc}; |
| 79 | return file && file.write(s: getData(), n: getLength()); |
| 80 | } |
| 81 | catch (const std::ios_base::failure &) |
| 82 | { |
| 83 | return false; |
| 84 | } |
| 85 | |
| 86 | void StdBuf::CompileFunc(StdCompiler *pComp, int iType) |
| 87 | { |
| 88 | // Size (guess it is a small value most of the time - if it's big, an extra byte won't hurt anyway) |
| 89 | auto tmp = static_cast<uint32_t>(iSize); pComp->Value(rStruct: mkIntPackAdapt(rVal&: tmp)); iSize = tmp; |
| 90 | pComp->Separator(eSep: StdCompiler::SEP_PART2); |
| 91 | // Read/write data |
| 92 | if (pComp->isCompiler()) |
| 93 | { |
| 94 | New(inSize: iSize); |
| 95 | pComp->Raw(pData: getMData(), iSize, eType: StdCompiler::RawCompileType(iType)); |
| 96 | } |
| 97 | else |
| 98 | { |
| 99 | pComp->Raw(pData: const_cast<void *>(getData()), iSize, eType: StdCompiler::RawCompileType(iType)); |
| 100 | } |
| 101 | } |
| 102 | |
| 103 | // *** StdStringBuf |
| 104 | |
| 105 | void StdStrBuf::CompileFunc(StdCompiler *pComp, int iRawType) |
| 106 | { |
| 107 | if (pComp->isCompiler()) |
| 108 | { |
| 109 | std::string data; |
| 110 | pComp->String(str&: data, type: StdCompiler::RawCompileType(iRawType)); |
| 111 | Copy(pnData: data.c_str(), iChars: data.size()); |
| 112 | } |
| 113 | else |
| 114 | { |
| 115 | // pData is only read anyway, since it is a decompiler |
| 116 | const char *data{getData()}; |
| 117 | if (!data) |
| 118 | { |
| 119 | data = "" ; |
| 120 | } |
| 121 | pComp->String(string: data, maxLength: getLength(), type: StdCompiler::RawCompileType(iRawType)); |
| 122 | } |
| 123 | } |
| 124 | |
| 125 | // replace all occurences of one string with another. Return number of replacements. |
| 126 | int StdStrBuf::Replace(const char *szOld, const char *szNew, size_t iStartSearch) |
| 127 | { |
| 128 | if (!getPtr(i: 0) || !szOld) return 0; |
| 129 | if (!szNew) szNew = "" ; |
| 130 | int cnt = 0; |
| 131 | size_t iOldLen = strlen(s: szOld), iNewLen = strlen(s: szNew); |
| 132 | if (iOldLen != iNewLen) |
| 133 | { |
| 134 | // count number of occurences to calculate new string length |
| 135 | size_t iResultLen = getLength(); |
| 136 | const char *szPos = getPtr(i: iStartSearch); |
| 137 | while (szPos = SSearch(szString: szPos, szIndex: szOld)) |
| 138 | { |
| 139 | iResultLen += iNewLen - iOldLen; |
| 140 | ++cnt; |
| 141 | } |
| 142 | if (!cnt) return 0; |
| 143 | // now construct new string by replacement |
| 144 | StdStrBuf sResult; |
| 145 | sResult.New(inSize: iResultLen + 1); |
| 146 | const char *szRPos = getPtr(i: 0), *szRNextPos; |
| 147 | char *szWrite = sResult.getMPtr(i: 0); |
| 148 | if (iStartSearch) |
| 149 | { |
| 150 | memcpy(dest: szWrite, src: szRPos, n: iStartSearch * sizeof(char)); |
| 151 | szRPos += iStartSearch; |
| 152 | szWrite += iStartSearch; |
| 153 | } |
| 154 | while (szRNextPos = SSearch(szString: szRPos, szIndex: szOld)) |
| 155 | { |
| 156 | memcpy(dest: szWrite, src: szRPos, n: (szRNextPos - szRPos - iOldLen) * sizeof(char)); |
| 157 | szWrite += (szRNextPos - szRPos - iOldLen); |
| 158 | memcpy(dest: szWrite, src: szNew, n: iNewLen * sizeof(char)); |
| 159 | szWrite += iNewLen; |
| 160 | szRPos = szRNextPos; |
| 161 | } |
| 162 | strcpy(dest: szWrite, src: szRPos); |
| 163 | Take(Buf2&: sResult); |
| 164 | } |
| 165 | else |
| 166 | { |
| 167 | // replace directly in this string |
| 168 | char *szRPos = getMPtr(i: iStartSearch); |
| 169 | while (szRPos = const_cast<char *>(SSearch(szString: szRPos, szIndex: szOld))) |
| 170 | { |
| 171 | memcpy(dest: szRPos - iOldLen, src: szNew, n: iOldLen * sizeof(char)); |
| 172 | ++cnt; |
| 173 | } |
| 174 | } |
| 175 | return cnt; |
| 176 | } |
| 177 | |
| 178 | int StdStrBuf::ReplaceChar(char cOld, char cNew, size_t iStartSearch) |
| 179 | { |
| 180 | if (isNull()) return 0; |
| 181 | char *szPos = getMPtr(i: 0); |
| 182 | if (!cOld) return 0; |
| 183 | if (!cNew) cNew = '_'; |
| 184 | int cnt = 0; |
| 185 | while (szPos = strchr(s: szPos, c: cOld)) |
| 186 | { |
| 187 | *szPos++ = cNew; |
| 188 | ++cnt; |
| 189 | } |
| 190 | return cnt; |
| 191 | } |
| 192 | |
| 193 | void StdStrBuf::ReplaceEnd(size_t iPos, const char *szNewEnd) |
| 194 | { |
| 195 | size_t iLen = getLength(); |
| 196 | assert(iPos <= iLen); if (iPos > iLen) return; |
| 197 | size_t iEndLen = strlen(s: szNewEnd); |
| 198 | if (iLen - iPos != iEndLen) SetLength(iPos + iEndLen); |
| 199 | memcpy(dest: getMPtr(i: iPos), src: szNewEnd, n: iEndLen * sizeof(char)); |
| 200 | } |
| 201 | |
| 202 | bool StdStrBuf::ValidateChars(const char *szInitialChars, const char *szMidChars) |
| 203 | { |
| 204 | // only given chars may be in string |
| 205 | for (size_t i = 0; i < getLength(); ++i) |
| 206 | if (!strchr(s: i ? szMidChars : szInitialChars, c: getData()[i])) |
| 207 | return false; |
| 208 | return true; |
| 209 | } |
| 210 | |
| 211 | bool StdStrBuf::GetSection(size_t idx, StdStrBuf *psOutSection, char cSeparator) const |
| 212 | { |
| 213 | assert(psOutSection); |
| 214 | psOutSection->Clear(); |
| 215 | const char *szStr = getData(), *szSepPos; |
| 216 | if (!szStr) return false; // invalid argument |
| 217 | while ((szSepPos = strchr(s: szStr, c: cSeparator)) && idx) { szStr = szSepPos + 1; --idx; } |
| 218 | if (idx) return false; // indexed section not found |
| 219 | // fill output buffer with section, if not empty |
| 220 | if (!szSepPos) szSepPos = getData() + getLength(); |
| 221 | if (szSepPos != szStr) psOutSection->Copy(pnData: szStr, iChars: szSepPos - szStr); |
| 222 | // return true even if section is empty, because the section obviously exists |
| 223 | // (to enable loops like while (buf.GetSection(i++, §)) if (sect) ...) |
| 224 | return true; |
| 225 | } |
| 226 | |
| 227 | void StdStrBuf::EnsureUnicode() |
| 228 | { |
| 229 | bool valid = true; |
| 230 | int need_continuation_bytes = 0; |
| 231 | // Check wether valid UTF-8 |
| 232 | for (size_t i = 0; i < getSize(); ++i) |
| 233 | { |
| 234 | unsigned char c = *getPtr(i); |
| 235 | // remaining of a code point started before |
| 236 | if (need_continuation_bytes) |
| 237 | { |
| 238 | --need_continuation_bytes; |
| 239 | // (10000000-10111111) |
| 240 | if (0x80 <= c && c <= 0xBF) |
| 241 | continue; |
| 242 | else |
| 243 | { |
| 244 | valid = false; |
| 245 | break; |
| 246 | } |
| 247 | } |
| 248 | // ASCII |
| 249 | if (c < 0x80) |
| 250 | continue; |
| 251 | // Two byte sequence (11000010-11011111) |
| 252 | // Note: 1100000x is an invalid overlong sequence |
| 253 | if (0xC2 <= c && c <= 0xDF) |
| 254 | { |
| 255 | need_continuation_bytes = 1; |
| 256 | continue; |
| 257 | } |
| 258 | // Three byte sequence (11100000-11101111) |
| 259 | if (0xE0 <= c && c <= 0xEF) |
| 260 | { |
| 261 | need_continuation_bytes = 2; |
| 262 | continue; |
| 263 | // FIXME: could check for UTF-16 surrogates from a broken utf-16->utf-8 converter here |
| 264 | } |
| 265 | // Four byte sequence (11110000-11110100) |
| 266 | if (0xF0 <= c && c <= 0xF4) |
| 267 | { |
| 268 | need_continuation_bytes = 3; |
| 269 | continue; |
| 270 | } |
| 271 | valid = false; |
| 272 | break; |
| 273 | } |
| 274 | if (need_continuation_bytes) |
| 275 | valid = false; |
| 276 | // assume that it's windows-1252 and convert to utf-8 |
| 277 | if (!valid) |
| 278 | { |
| 279 | size_t j = 0; |
| 280 | StdStrBuf buf; |
| 281 | buf.Grow(iGrow: getLength()); |
| 282 | // totally unfounded statistic: most texts have less than 20 umlauts. |
| 283 | enum { GROWSIZE = 20 }; |
| 284 | for (size_t i = 0; i < getSize(); ++i) |
| 285 | { |
| 286 | unsigned char c = *getPtr(i); |
| 287 | if (c < 0x80) |
| 288 | { |
| 289 | if (j >= buf.getLength()) |
| 290 | buf.Grow(iGrow: GROWSIZE); |
| 291 | *buf.getMPtr(i: j++) = c; |
| 292 | continue; |
| 293 | } |
| 294 | if (0xA0 <= c) |
| 295 | { |
| 296 | if (j + 1 >= buf.getLength()) |
| 297 | buf.Grow(iGrow: GROWSIZE); |
| 298 | *buf.getMPtr(i: j++) = (0xC0 | c >> 6); |
| 299 | *buf.getMPtr(i: j++) = (0x80 | c & 0x3F); |
| 300 | continue; |
| 301 | } |
| 302 | // Extra windows-1252-characters |
| 303 | buf.SetLength(j); |
| 304 | // Let's hope that no editor mangles these UTF-8 strings... |
| 305 | static const char *[] = |
| 306 | { |
| 307 | "€" , "?" , "‚" , "ƒ" , "„" , "…" , "†" , "‡" , "ˆ" , "‰" , "Š" , "‹" , "Œ" , "?" , "Ž" , "?" , |
| 308 | "?" , "‘" , "’" , "“" , "”" , "•" , "–" , "—" , "˜" , "™" , "š" , "›" , "œ" , "?" , "ž" , "Ÿ" |
| 309 | }; |
| 310 | buf.Append(pnData: extra_chars[c - 0x80]); |
| 311 | j += strlen(s: extra_chars[c - 0x80]); |
| 312 | } |
| 313 | buf.SetLength(j); |
| 314 | Take(Buf2&: buf); |
| 315 | } |
| 316 | } |
| 317 | |
| 318 | bool StdStrBuf::TrimSpaces() |
| 319 | { |
| 320 | // get left trim |
| 321 | size_t iSpaceLeftCount = 0, iLength = getLength(); |
| 322 | if (!iLength) return false; |
| 323 | const char *szStr = getData(); |
| 324 | while (iSpaceLeftCount < iLength) |
| 325 | if (isspace(static_cast<unsigned char>(szStr[iSpaceLeftCount]))) |
| 326 | ++iSpaceLeftCount; |
| 327 | else |
| 328 | break; |
| 329 | // only spaces? Clear! |
| 330 | if (iSpaceLeftCount == iLength) |
| 331 | { |
| 332 | Clear(); |
| 333 | return true; |
| 334 | } |
| 335 | // get right trim |
| 336 | size_t iSpaceRightCount = 0; |
| 337 | while (isspace(static_cast<unsigned char>(szStr[iLength - 1 - iSpaceRightCount]))) ++iSpaceRightCount; |
| 338 | // anything to trim? |
| 339 | if (!iSpaceLeftCount && !iSpaceRightCount) return false; |
| 340 | // only right trim? Can do this by shortening |
| 341 | if (!iSpaceLeftCount) |
| 342 | { |
| 343 | SetLength(iLength - iSpaceRightCount); |
| 344 | return true; |
| 345 | } |
| 346 | // left trim involved - move text and shorten |
| 347 | memmove(dest: getMPtr(i: 0), src: szStr + iSpaceLeftCount, n: iLength - iSpaceLeftCount - iSpaceRightCount); |
| 348 | SetLength(iLength - iSpaceLeftCount - iSpaceRightCount); |
| 349 | return true; |
| 350 | } |
| 351 | |