StdBuf.cpp source code [LegacyClonk/src/StdBuf.cpp]

1	/*
2	* LegacyClonk
3	*
4	* Copyright (c) RedWolf Design
5	* Copyright (c) 2017-2021, The LegacyClonk Team and contributors
6	*
7	* Distributed under the terms of the ISC license; see accompanying file
8	* "COPYING" for details.
9	*
10	* "Clonk" is a registered trademark of Matthes Bender, used with permission.
11	* See accompanying file "TRADEMARK" for details.
12	*
13	* To redistribute this file separately, substitute the full license texts
14	* for the above references.
15	*/
16
17	#include <Standard.h>
18	#include <StdBuf.h>
19	#include <StdCompiler.h>
20	#include <StdAdaptors.h>
21	#include <StdFile.h>
22
23	#include <stdarg.h>
24	#include <stdio.h>
25	#ifdef _WIN32
26	#include <io.h>
27	#else
28	#define O_BINARY 0
29	#define O_SEQUENTIAL 0
30	#include <unistd.h>
31	#include <stdlib.h>
32	#endif
33	#include <ctype.h>
34	#include <fcntl.h>
35	#include <sys/stat.h>
36
37	#include <ios>
38	#include <fstream>
39
40	// ** StdBuf*
41
42	bool StdBuf::LoadFromFile(const char szFile) try*
43	{
44	std::ifstream file{szFile, std::ios::binary};
45	// Create buf
46	New(inSize: FileSize(fname: szFile));
47	return file && file.read(s: static_cast<char *>(getMData()), n: getSize());
48	}
49	catch (const std::ios_base::failure &)
50	{
51	return false;
52	}
53
54	bool StdBuf::SaveToFile(const char szFile) const* try
55	{
56	std::ofstream file{szFile, std::ios::binary \| std::ios::trunc};
57	return file && file.write(s: static_cast<const char *>(getData()), n: getSize());
58	}
59	catch (const std::ios_base::failure &)
60	{
61	return false;
62	}
63
64	bool StdStrBuf::LoadFromFile(const char szFile) try*
65	{
66	std::ifstream file{szFile, std::ios::binary};
67	// Create buf
68	SetLength(FileSize(fname: szFile));
69	return file && file.read(s: getMData(), n: getLength());
70	}
71	catch (const std::ios_base::failure &)
72	{
73	return false;
74	}
75
76	bool StdStrBuf::SaveToFile(const char szFile) const* try
77	{
78	std::ofstream file{szFile, std::ios::binary \| std::ios::trunc};
79	return file && file.write(s: getData(), n: getLength());
80	}
81	catch (const std::ios_base::failure &)
82	{
83	return false;
84	}
85
86	void StdBuf::CompileFunc(StdCompiler pComp, int* iType)
87	{
88	// Size (guess it is a small value most of the time - if it's big, an extra byte won't hurt anyway)
89	auto tmp = static_cast<uint32_t>(iSize); pComp->Value(rStruct: mkIntPackAdapt(rVal&: tmp)); iSize = tmp;
90	pComp->Separator(eSep: StdCompiler::SEP_PART2);
91	// Read/write data
92	if (pComp->isCompiler())
93	{
94	New(inSize: iSize);
95	pComp->Raw(pData: getMData(), iSize, eType: StdCompiler::RawCompileType(iType));
96	}
97	else
98	{
99	pComp->Raw(pData: const_cast<void *>(getData()), iSize, eType: StdCompiler::RawCompileType(iType));
100	}
101	}
102
103	// ** StdStringBuf*
104
105	void StdStrBuf::CompileFunc(StdCompiler pComp, int* iRawType)
106	{
107	if (pComp->isCompiler())
108	{
109	std::string data;
110	pComp->String(str&: data, type: StdCompiler::RawCompileType(iRawType));
111	Copy(pnData: data.c_str(), iChars: data.size());
112	}
113	else
114	{
115	// pData is only read anyway, since it is a decompiler
116	const char *data{getData()};
117	if (!data)
118	{
119	data = "";
120	}
121	pComp->String(string: data, maxLength: getLength(), type: StdCompiler::RawCompileType(iRawType));
122	}
123	}
124
125	// replace all occurences of one string with another. Return number of replacements.
126	int StdStrBuf::Replace(const char szOld, const* char *szNew, size_t iStartSearch)
127	{
128	if (!getPtr(i: `0`) \|\| !szOld) return `0`;
129	if (!szNew) szNew = "";
130	int cnt = `0`;
131	size_t iOldLen = strlen(s: szOld), iNewLen = strlen(s: szNew);
132	if (iOldLen != iNewLen)
133	{
134	// count number of occurences to calculate new string length
135	size_t iResultLen = getLength();
136	const char *szPos = getPtr(i: iStartSearch);
137	while (szPos = SSearch(szString: szPos, szIndex: szOld))
138	{
139	iResultLen += iNewLen - iOldLen;
140	++cnt;
141	}
142	if (!cnt) return `0`;
143	// now construct new string by replacement
144	StdStrBuf sResult;
145	sResult.New(inSize: iResultLen + `1`);
146	const char szRPos = getPtr(i: `0`), szRNextPos;
147	char *szWrite = sResult.getMPtr(i: `0`);
148	if (iStartSearch)
149	{
150	memcpy(dest: szWrite, src: szRPos, n: iStartSearch * sizeof(char));
151	szRPos += iStartSearch;
152	szWrite += iStartSearch;
153	}
154	while (szRNextPos = SSearch(szString: szRPos, szIndex: szOld))
155	{
156	memcpy(dest: szWrite, src: szRPos, n: (szRNextPos - szRPos - iOldLen) * sizeof(char));
157	szWrite += (szRNextPos - szRPos - iOldLen);
158	memcpy(dest: szWrite, src: szNew, n: iNewLen * sizeof(char));
159	szWrite += iNewLen;
160	szRPos = szRNextPos;
161	}
162	strcpy(dest: szWrite, src: szRPos);
163	Take(Buf2&: sResult);
164	}
165	else
166	{
167	// replace directly in this string
168	char *szRPos = getMPtr(i: iStartSearch);
169	while (szRPos = const_cast<char *>(SSearch(szString: szRPos, szIndex: szOld)))
170	{
171	memcpy(dest: szRPos - iOldLen, src: szNew, n: iOldLen * sizeof(char));
172	++cnt;
173	}
174	}
175	return cnt;
176	}
177
178	int StdStrBuf::ReplaceChar(char cOld, char cNew, size_t iStartSearch)
179	{
180	if (isNull()) return `0`;
181	char *szPos = getMPtr(i: `0`);
182	if (!cOld) return `0`;
183	if (!cNew) cNew = `'_'`;
184	int cnt = `0`;
185	while (szPos = strchr(s: szPos, c: cOld))
186	{
187	*szPos++ = cNew;
188	++cnt;
189	}
190	return cnt;
191	}
192
193	void StdStrBuf::ReplaceEnd(size_t iPos, const char *szNewEnd)
194	{
195	size_t iLen = getLength();
196	assert(iPos <= iLen); if (iPos > iLen) return;
197	size_t iEndLen = strlen(s: szNewEnd);
198	if (iLen - iPos != iEndLen) SetLength(iPos + iEndLen);
199	memcpy(dest: getMPtr(i: iPos), src: szNewEnd, n: iEndLen * sizeof(char));
200	}
201
202	bool StdStrBuf::ValidateChars(const char szInitialChars, const* char *szMidChars)
203	{
204	// only given chars may be in string
205	for (size_t i = `0`; i < getLength(); ++i)
206	if (!strchr(s: i ? szMidChars : szInitialChars, c: getData()[i]))
207	return false;
208	return true;
209	}
210
211	bool StdStrBuf::GetSection(size_t idx, StdStrBuf psOutSection, char* cSeparator) const
212	{
213	assert(psOutSection);
214	psOutSection->Clear();
215	const char szStr = getData(), szSepPos;
216	if (!szStr) return false; // invalid argument
217	while ((szSepPos = strchr(s: szStr, c: cSeparator)) && idx) { szStr = szSepPos + `1`; --idx; }
218	if (idx) return false; // indexed section not found
219	// fill output buffer with section, if not empty
220	if (!szSepPos) szSepPos = getData() + getLength();
221	if (szSepPos != szStr) psOutSection->Copy(pnData: szStr, iChars: szSepPos - szStr);
222	// return true even if section is empty, because the section obviously exists
223	// (to enable loops like while (buf.GetSection(i++, &sect)) if (sect) ...)
224	return true;
225	}
226
227	void StdStrBuf::EnsureUnicode()
228	{
229	bool valid = true;
230	int need_continuation_bytes = `0`;
231	// Check wether valid UTF-8
232	for (size_t i = `0`; i < getSize(); ++i)
233	{
234	unsigned char c = *getPtr(i);
235	// remaining of a code point started before
236	if (need_continuation_bytes)
237	{
238	--need_continuation_bytes;
239	// (10000000-10111111)
240	if (`0x80` <= c && c <= `0xBF`)
241	continue;
242	else
243	{
244	valid = false;
245	break;
246	}
247	}
248	// ASCII
249	if (c < `0x80`)
250	continue;
251	// Two byte sequence (11000010-11011111)
252	// Note: 1100000x is an invalid overlong sequence
253	if (`0xC2` <= c && c <= `0xDF`)
254	{
255	need_continuation_bytes = `1`;
256	continue;
257	}
258	// Three byte sequence (11100000-11101111)
259	if (`0xE0` <= c && c <= `0xEF`)
260	{
261	need_continuation_bytes = `2`;
262	continue;
263	// FIXME: could check for UTF-16 surrogates from a broken utf-16->utf-8 converter here
264	}
265	// Four byte sequence (11110000-11110100)
266	if (`0xF0` <= c && c <= `0xF4`)
267	{
268	need_continuation_bytes = `3`;
269	continue;
270	}
271	valid = false;
272	break;
273	}
274	if (need_continuation_bytes)
275	valid = false;
276	// assume that it's windows-1252 and convert to utf-8
277	if (!valid)
278	{
279	size_t j = `0`;
280	StdStrBuf buf;
281	buf.Grow(iGrow: getLength());
282	// totally unfounded statistic: most texts have less than 20 umlauts.
283	enum { GROWSIZE = `20` };
284	for (size_t i = `0`; i < getSize(); ++i)
285	{
286	unsigned char c = *getPtr(i);
287	if (c < `0x80`)
288	{
289	if (j >= buf.getLength())
290	buf.Grow(iGrow: GROWSIZE);
291	*buf.getMPtr(i: j++) = c;
292	continue;
293	}
294	if (`0xA0` <= c)
295	{
296	if (j + `1` >= buf.getLength())
297	buf.Grow(iGrow: GROWSIZE);
298	*buf.getMPtr(i: j++) = (`0xC0` \| c >> `6`);
299	*buf.getMPtr(i: j++) = (`0x80` \| c & `0x3F`);
300	continue;
301	}
302	// Extra windows-1252-characters
303	buf.SetLength(j);
304	// Let's hope that no editor mangles these UTF-8 strings...
305	static const char *extra_chars[] =
306	{
307	"€", "?", "‚", "ƒ", "„", "…", "†", "‡", "ˆ", "‰", "Š", "‹", "Œ", "?", "Ž", "?",
308	"?", "‘", "’", "“", "”", "•", "–", "—", "˜", "™", "š", "›", "œ", "?", "ž", "Ÿ"
309	};
310	buf.Append(pnData: extra_chars[c - `0x80`]);
311	j += strlen(s: extra_chars[c - `0x80`]);
312	}
313	buf.SetLength(j);
314	Take(Buf2&: buf);
315	}
316	}
317
318	bool StdStrBuf::TrimSpaces()
319	{
320	// get left trim
321	size_t iSpaceLeftCount = `0`, iLength = getLength();
322	if (!iLength) return false;
323	const char *szStr = getData();
324	while (iSpaceLeftCount < iLength)
325	if (isspace(static_cast<unsigned char>(szStr[iSpaceLeftCount])))
326	++iSpaceLeftCount;
327	else
328	break;
329	// only spaces? Clear!
330	if (iSpaceLeftCount == iLength)
331	{
332	Clear();
333	return true;
334	}
335	// get right trim
336	size_t iSpaceRightCount = `0`;
337	while (isspace(static_cast<unsigned char>(szStr[iLength - `1` - iSpaceRightCount]))) ++iSpaceRightCount;
338	// anything to trim?
339	if (!iSpaceLeftCount && !iSpaceRightCount) return false;
340	// only right trim? Can do this by shortening
341	if (!iSpaceLeftCount)
342	{
343	SetLength(iLength - iSpaceRightCount);
344	return true;
345	}
346	// left trim involved - move text and shorten
347	memmove(dest: getMPtr(i: `0`), src: szStr + iSpaceLeftCount, n: iLength - iSpaceLeftCount - iSpaceRightCount);
348	SetLength(iLength - iSpaceLeftCount - iSpaceRightCount);
349	return true;
350	}
351

Browse the source code of LegacyClonk/src/StdBuf.cpp