1/*
2 * LegacyClonk
3 *
4 * Copyright (c) RedWolf Design
5 * Copyright (c) 2005, Sven2
6 * Copyright (c) 2017-2022, The LegacyClonk Team and contributors
7 *
8 * Distributed under the terms of the ISC license; see accompanying file
9 * "COPYING" for details.
10 *
11 * "Clonk" is a registered trademark of Matthes Bender, used with permission.
12 * See accompanying file "TRADEMARK" for details.
13 *
14 * To redistribute this file separately, substitute the full license texts
15 * for the above references.
16 */
17
18// RTF file parsing functionality
19
20#include "C4Include.h"
21#include "C4RTF.h"
22
23#include <algorithm>
24
25extern C4RTFFile::KeywordTableEntry RTFKeywordTable[];
26
27C4RTFFile::C4RTFFile() : pState(nullptr) {}
28
29C4RTFFile::~C4RTFFile()
30{
31 ClearState();
32}
33
34void C4RTFFile::ClearState()
35{
36 PropertyState *psNext = pState, *ps;
37 while (ps = psNext)
38 {
39 psNext = ps->pNext;
40 delete ps;
41 }
42 pState = nullptr;
43 fSkipDestIfUnknownKeyword = false;
44}
45
46void C4RTFFile::AssertNoEOF(size_t iPos)
47{
48 if (iPos >= sRTF.getSize()) throw ParserError("Unexpected end of file");
49}
50
51void C4RTFFile::ChangeDest(StdStrBuf &sResult, int iDest)
52{
53 // nothing to do if text is already skipped
54 if (pState->dest == dsSkip) return;
55 // otherwise, set new dest (always skip)
56 pState->dest = static_cast<DestState>(iDest);
57}
58
59void C4RTFFile::SpecialKeyword(StdStrBuf &sResult, int iKeyw, int iParam)
60{
61 switch (iKeyw)
62 {
63 case specBin:
64 if (iParam > 0)
65 {
66 pState->eState = psBinary;
67 pState->iHexBinCnt = iParam;
68 }
69 break;
70 case specHex:
71 pState->eState = psHex;
72 pState->iHexBinCnt = 2;
73 break;
74 case specSkipDest:
75 fSkipDestIfUnknownKeyword = true;
76 break;
77 };
78}
79
80void C4RTFFile::TranslateKeyword(StdStrBuf &sResult, const char *szKeyword, int iParam, bool fHasIntParam)
81{
82 // get keyword from table
83 KeywordTableEntry *pKw = RTFKeywordTable;
84 while (pKw->szKeyword) if (!strcmp(s1: szKeyword, s2: pKw->szKeyword)) break; else ++pKw;
85 // no found?
86 if (!pKw->szKeyword)
87 {
88 // unknown destination: Skip
89 if (fSkipDestIfUnknownKeyword)
90 {
91 pState->dest = dsSkip;
92 fSkipDestIfUnknownKeyword = false;
93 }
94 return;
95 }
96 fSkipDestIfUnknownKeyword = false;
97 // keyword known - handle it
98 switch (pKw->eType)
99 {
100 case KeywordTableEntry::kwdProp:
101 // property: Use default param if none given or forced
102 ApplyPropChange(iProp: pKw->idx, iParam: (pKw->fForceDefaultParam || !fHasIntParam) ? pKw->iDefaultParam : iParam);
103 break;
104 case KeywordTableEntry::kwdChars:
105 // direct chars
106 ParseChars(sResult, szChars: pKw->szChars);
107 break;
108 case KeywordTableEntry::kwdDest:
109 ChangeDest(sResult, iDest: pKw->idx);
110 break;
111 case KeywordTableEntry::kwdSpec:
112 SpecialKeyword(sResult, iKeyw: pKw->idx, iParam);
113 break;
114 }
115}
116
117void C4RTFFile::ParseKeyword(StdStrBuf &sResult, size_t &iPos)
118{
119 bool fHasIntParam = false;
120 int iSign = +1;
121 int iParamInt = 0; // parameter as integer
122 char szKeyword[30 + 1]; *szKeyword = 0;
123 char szParameter[20 + 1]; *szParameter = 0;
124
125 AssertNoEOF(iPos);
126 const auto rtfString = static_cast<const char *>(sRTF.getData());
127 char c = rtfString[iPos++];
128 if (!isalpha(static_cast<unsigned char>(c)))
129 {
130 // parse direct control symbol
131 szKeyword[0] = c;
132 szKeyword[1] = 0;
133 }
134 else
135 {
136 // get keyword string
137 char *szWrite = szKeyword;
138 do
139 {
140 *szWrite = c;
141 // do not overflow buffer - longer keywords will be read, not recognized and silently discarded
142 if (szWrite - szKeyword < 30) ++szWrite;
143 // do not go past rtf file
144 if (iPos >= sRTF.getSize()) break;
145 // next char
146 c = rtfString[iPos++];
147 } while (isalpha(static_cast<unsigned char>(c)));
148 *szWrite = 0;
149 // parameter is a negative number?
150 if (c == '-')
151 {
152 iSign = -1;
153 if (iPos < sRTF.getSize()) c = rtfString[iPos++];
154 }
155 if (isdigit(static_cast<unsigned char>(c)))
156 {
157 // get parameter as number
158 char *szWrite = szParameter;
159 do
160 {
161 *szWrite = c;
162 // do not overflow buffer - longer parameters will be read, not recognized and silently discarded
163 if (szWrite - szParameter < 20) ++szWrite;
164 // do not go past rtf file
165 if (iPos >= sRTF.getSize()) break;
166 // next char
167 c = rtfString[iPos++];
168 } while (isdigit(static_cast<unsigned char>(c)));
169 *szWrite = 0;
170 iParamInt = atoi(nptr: szParameter) * iSign;
171 fHasIntParam = true;
172 }
173 // if next char is not a spacing for the command, it does not belong to the keyword and must be re-parsed
174 if (c != ' ') --iPos;
175 }
176 // execute keyword action
177 TranslateKeyword(sResult, szKeyword, iParam: iParamInt, fHasIntParam);
178}
179
180void C4RTFFile::ParseChar(StdStrBuf &sResult, char c)
181{
182 // parse as 1-char-string
183 char buf[2];
184 buf[0] = c; buf[1] = '\0';
185 ParseChars(sResult, szChars: buf);
186}
187
188void C4RTFFile::ParseChars(StdStrBuf &sResult, const char *szChars)
189{
190 // route the characters to the appropriate destination stream.
191 switch (pState->dest)
192 {
193 case dsNormal:
194 // process characters: Append to result buffer
195 sResult.Append(pnData: szChars);
196 break;
197
198 case dsSkip:
199 // skip character
200 break;
201 }
202}
203
204void C4RTFFile::ParseHexChar(StdStrBuf &sResult, char c)
205{
206 pState->bHex = pState->bHex << 4;
207 if (isdigit(static_cast<unsigned char>(c)))
208 pState->bHex += c - '0';
209 else if (Inside<char>(ival: c, lbound: 'a', rbound: 'f'))
210 pState->bHex += c - 'a' + 10;
211 else if (Inside<char>(ival: c, lbound: 'A', rbound: 'F'))
212 pState->bHex += c - 'A' + 10;
213 else
214 throw ParserError("Invalid hex character");
215 if (!--pState->iHexBinCnt)
216 {
217 pState->eState = psNormal;
218 ParseChar(sResult, c: pState->bHex);
219 }
220}
221
222void C4RTFFile::PushState()
223{
224 // store current state to new
225 PropertyState *pNew = new PropertyState(*pState);
226 pNew->pNext = pState;
227 // update current state to new; beginning in default parser mode
228 pState = pNew;
229 pState->eState = psNormal;
230}
231
232void C4RTFFile::PopState()
233{
234 if (!pState->pNext) throw ParserError("Too many brackets closed");
235 // if the destination ends, finish it
236 if (pState->dest != pState->pNext->dest) EndGroupAction();
237 // return to last state
238 PropertyState *pKill = pState;
239 pState = pState->pNext;
240 delete pKill;
241 pState->eState = psNormal;
242}
243
244StdStrBuf C4RTFFile::GetPlainText()
245{
246 // clear any previous crap
247 ClearState();
248 // start with a fresh state
249 pState = new PropertyState();
250 pState->eState = psNormal;
251 StdStrBuf sResult;
252 // nothing to do for empty RTFs
253 if (sRTF.getSize() <= 0) return sResult;
254 // parse through all chars
255 try
256 {
257 char c; size_t iPos = 0;
258 while (iPos < sRTF.getSize())
259 {
260 c = reinterpret_cast<const char *>(sRTF.getData())[iPos++];
261 // binary parsing?
262 if (pState->eState == psBinary)
263 {
264 if (!--pState->iHexBinCnt) pState->eState = psNormal;
265 ParseChar(sResult, c);
266 continue;
267 }
268 // normal parsing: Handle state blocks
269 switch (c)
270 {
271 case '{': PushState(); break;
272 case '}': PopState(); break;
273 case '\\':
274 ParseKeyword(sResult, iPos);
275 break;
276 case 0x0d: case 0x0a: // ignored chars
277 break;
278 default:
279 // regular char parsing
280 if (pState->eState == psNormal)
281 // normal mode
282 ParseChar(sResult, c);
283 else if (pState->eState == psHex)
284 ParseHexChar(sResult, c);
285 else
286 throw ParserError("Invalid State");
287 break;
288 }
289 // next char
290 }
291 // all states must be closed in the end
292 if (pState->pNext) throw ParserError("Block not closed");
293 }
294 catch (const ParserError &pe)
295 {
296 // invalid RTF file: Display error message instead
297 sResult = "Invalid RTF file: ";
298 sResult.Append(Buf2: pe.ErrorText);
299 }
300 // cleanup
301 ClearState();
302 // return result
303 return sResult;
304}
305
306#define kwdChars C4RTFFile::KeywordTableEntry::kwdChars
307#define kwdSpec C4RTFFile::KeywordTableEntry::kwdSpec
308#define kwdDest C4RTFFile::KeywordTableEntry::kwdDest
309
310// Keyword descriptions
311C4RTFFile::KeywordTableEntry RTFKeywordTable[] =
312{
313 // keyword iDefaultPar fForceDef eType idx
314 { .szKeyword: "par", .iDefaultParam: 0, .fForceDefaultParam: false, kwdChars, .szChars: "\n", .idx: 0 },
315 { .szKeyword: "\0x0a", .iDefaultParam: 0, .fForceDefaultParam: false, kwdChars, .szChars: "\n", .idx: 0 },
316 { .szKeyword: "\0x0d", .iDefaultParam: 0, .fForceDefaultParam: false, kwdChars, .szChars: "\n", .idx: 0 },
317 { .szKeyword: "tab", .iDefaultParam: 0, .fForceDefaultParam: false, kwdChars, .szChars: "\n", .idx: 0 },
318 { .szKeyword: "ldblquote", .iDefaultParam: 0, .fForceDefaultParam: false, kwdChars, .szChars: "\"", .idx: 0 },
319 { .szKeyword: "rdblquote", .iDefaultParam: 0, .fForceDefaultParam: false, kwdChars, .szChars: "\"", .idx: 0 },
320 { .szKeyword: "lquote", .iDefaultParam: 0, .fForceDefaultParam: false, kwdChars, .szChars: "'", .idx: 0 },
321 { .szKeyword: "rquote", .iDefaultParam: 0, .fForceDefaultParam: false, kwdChars, .szChars: "'", .idx: 0 },
322 { .szKeyword: "bin", .iDefaultParam: 0, .fForceDefaultParam: false, kwdSpec, .szChars: nullptr, .idx: C4RTFFile::specBin },
323 { .szKeyword: "*", .iDefaultParam: 0, .fForceDefaultParam: false, kwdSpec, .szChars: nullptr, .idx: C4RTFFile::specSkipDest },
324 { .szKeyword: "'", .iDefaultParam: 0, .fForceDefaultParam: false, kwdSpec, .szChars: nullptr, .idx: C4RTFFile::specHex },
325 { .szKeyword: "author", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
326 { .szKeyword: "buptim", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
327 { .szKeyword: "colortbl", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
328 { .szKeyword: "comment", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
329 { .szKeyword: "creatim", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
330 { .szKeyword: "doccomm", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
331 { .szKeyword: "fonttbl", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
332 { .szKeyword: "footer", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
333 { .szKeyword: "footerf", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
334 { .szKeyword: "footerl", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
335 { .szKeyword: "footerr", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
336 { .szKeyword: "footnote", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
337 { .szKeyword: "ftncn", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
338 { .szKeyword: "ftnsep", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
339 { .szKeyword: "ftnsepc", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
340 { .szKeyword: "header", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
341 { .szKeyword: "headerf", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
342 { .szKeyword: "headerl", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
343 { .szKeyword: "headerr", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
344 { .szKeyword: "info", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
345 { .szKeyword: "keywords", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
346 { .szKeyword: "operator", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
347 { .szKeyword: "pict", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
348 { .szKeyword: "printim", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
349 { .szKeyword: "private1", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
350 { .szKeyword: "revtim", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
351 { .szKeyword: "rxe", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
352 { .szKeyword: "stylesheet", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
353 { .szKeyword: "subject", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
354 { .szKeyword: "tc", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
355 { .szKeyword: "title", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
356 { .szKeyword: "txe", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
357 { .szKeyword: "xe", .iDefaultParam: 0, .fForceDefaultParam: false, kwdDest, .szChars: nullptr, .idx: C4RTFFile::dsSkip },
358 { .szKeyword: "{", .iDefaultParam: 0, .fForceDefaultParam: false, kwdChars, .szChars: "{", .idx: 0 },
359 { .szKeyword: "}", .iDefaultParam: 0, .fForceDefaultParam: false, kwdChars, .szChars: "}", .idx: 0 },
360 { .szKeyword: "\\", .iDefaultParam: 0, .fForceDefaultParam: false, kwdChars, .szChars: "\\", .idx: 0 },
361 { .szKeyword: nullptr, .iDefaultParam: 0, .fForceDefaultParam: false, kwdChars, .szChars: nullptr, .idx: 0 }
362};
363
364std::string RtfEscape(std::string_view plainText)
365{
366 static constexpr std::string_view needEscapeChars{"\\{}"};
367 constexpr auto needsEscape = [](char c) constexpr noexcept
368 {
369 return needEscapeChars.find(c: c) != std::string_view::npos;
370 };
371 std::string result;
372 result.reserve(res: plainText.size() + std::count_if(first: plainText.begin(), last: plainText.end(), pred: needsEscape));
373
374 for (std::size_t last = 0; last < plainText.size(); )
375 {
376 const auto next = plainText.find_first_of(str: needEscapeChars, pos: last);
377 result.append(svt: plainText.substr(pos: last, n: next - last));
378 last = next;
379 if (next != std::string_view::npos)
380 {
381 result.push_back(c: '\\');
382 result.push_back(c: plainText[next]);
383 ++last;
384 }
385 }
386
387 return result;
388}
389