BodytextParser.php 10.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
<?php
/**
 * Created by PhpStorm.
 * User: crose
 * Date: 3/18/16
 * Time: 5:43 PM
 */

namespace qfq;

11
12
13
const NESTING_TOKEN_OPEN = '#&nesting-open-&#';
const NESTING_TOKEN_CLOSE = '#&nesting-close&#';
const NESTING_TOKEN_LENGTH = 17;
14
15


16
17
18
19
/**
 * Class BodytextParser
 * @package qfq
 */
20
class BodytextParser {
21

22
    /**
Carsten  Rose's avatar
Carsten Rose committed
23
     * @param string $bodyText
Carsten  Rose's avatar
Carsten Rose committed
24
     *
25
26
     * @return mixed|string
     * @throws UserFormException
27
     */
Carsten  Rose's avatar
Carsten Rose committed
28
    public function process($bodyText) {
29
30
31
32

        $nestingOpen = '';
        $nestingClose = '';

Carsten  Rose's avatar
Carsten Rose committed
33
        $bodyText = $this->trimAndRemoveCommentAndEmptyLine($bodyText, $nestingOpen, $nestingClose);
34
        // Encrypt double curly braces to prevent false positives with nesting: form = {{form}}\n
Carsten  Rose's avatar
Carsten Rose committed
35
36
        $bodyText = Support::encryptDoubleCurlyBraces($bodyText);
        $bodyText = $this->joinLine($bodyText, $nestingOpen, $nestingClose);
37

Carsten  Rose's avatar
Carsten Rose committed
38
39
        $bodyText = $this->encryptNestingDelimeter($bodyText, $nestingOpen, $nestingClose);
        $bodyText = $this->unNest($bodyText, $nestingOpen, $nestingClose);
40

Carsten  Rose's avatar
Carsten Rose committed
41
42
        $bodyText = $this->trimAndRemoveCommentAndEmptyLine($bodyText, $nestingOpen, $nestingClose);
        $bodyText = Support::decryptDoubleCurlyBraces($bodyText);
43

Carsten  Rose's avatar
Carsten Rose committed
44
        if (strpos($bodyText, NESTING_TOKEN_OPEN) !== false) {
45
            throw new UserFormException(
Marc Egger's avatar
Marc Egger committed
46
                json_encode([ERROR_MESSAGE_TO_USER => 'Report: Missing close delimiter', ERROR_MESSAGE_TO_DEVELOPER => $bodyText]), ERROR_MISSING_CLOSE_DELIMITER);
47
        }
Carsten  Rose's avatar
Carsten Rose committed
48

Carsten  Rose's avatar
Carsten Rose committed
49
        return $bodyText;
50
51
52
53
    }

    /**
     * Trim all lines, remove all empty lines and  all lines which start with '#'
Carsten  Rose's avatar
Carsten Rose committed
54
     *
55
     * @param $bodytext
Carsten  Rose's avatar
Carsten Rose committed
56
     *
57
58
     * @param $nestingOpen
     * @param $nestingClose
59
60
61
     * @return string
     */

62
    private function trimAndRemoveCommentAndEmptyLine($bodytext, &$nestingOpen, &$nestingClose) {
63
64
65
        $data = array();

        $src = explode(PHP_EOL, $bodytext);
66
67
68
69
70
71
        if ($src === false) {
            return '';
        }

        $firstLine = trim($src[0]);

72
73
        foreach ($src as $row) {
            $row = trim($row);
74

75
76
77
78
79
80
            if ($row === '' || $row[0] === '#') {
                continue;
            }
            $data[] = $row;
        }

81
82
        $this->setNestingToken($firstLine, $nestingOpen, $nestingClose);

83
84
85
        return implode(PHP_EOL, $data);
    }

86
    /**
87
88
89
     * Set the 'nesting token for this tt-conten record. Valid tokens are {}, <>, [], ().
     * If the first line of bodytext is a comment line and the last char of that line is a valid token: set that one.
     * If not: set {} as nesting token.
90
     *
91
92
93
94
95
96
97
98
     * Example:
     *   # Some nice text       - no token found, take {}
     *   # ]                    - []
     *   # Powefull QFQ: <      - <>
     *
     * @param $firstLine
     * @param $nestingOpen
     * @param $nestingClose
99
     */
100
101
102
103
    private function setNestingToken($firstLine, &$nestingOpen, &$nestingClose) {

        if ($nestingOpen !== '') {
            return;  // tokens already set or not bodytext: do not change.
104
105
        }

106
107
108
109
        // Nothing defined: set default {}.
        if ($firstLine === false || $firstLine === '' || $firstLine[0] !== '#') {
            $nestingOpen = '{';
            $nestingClose = '}';
Carsten  Rose's avatar
Carsten Rose committed
110

111
112
113
114
115
            return;
        }

        // Definition: first line of bodytext, has to be a comment line. If the last char is one of the valid token: set that one.
        // Nothing found: set {}.
116
117
        $nestingOpen = '{';
        $nestingClose = '}';
118
119
120

        if ($firstLine[0] === '#') {
            $token = substr($firstLine, -1);
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
            switch($token) {
                case '<':
                    $nestingOpen = '<';
                    $nestingClose = '>';
                    break;
                case '[':
                    $nestingOpen = '[';
                    $nestingClose = ']';
                    break;
                case '(':
                    $nestingOpen = '(';
                    $nestingClose = ')';
                    break;
                default:
                    break;
136
137
            }
        }
138
    }
139
140

    /**
141
     * Join lines. Nesting isn't changed.
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
     *
     * Iterates over all lines.
     *   Is a line a 'new line'?
     *    no: concat it to the last one.
     *    yes: flush the buffer, start a new 'new line'
     *
     * New Line Trigger:
     * a: {
     * b: }
     * c: 20
     * d: 20.30
     *
     * e: 5 {
     * f: 5.10 {
     *
     * g: head =
     * h: 10.20.head =
     *
     *  c,d,e,f: ^\d+(\.\d+)*(\s*{)?$
     *  g,h:     ^(\d+\.)*(sql|head)\s*=
162
     *
163
164
165
     * @param $bodyText
     * @param $nestingOpen
     * @param $nestingClose
166
167
     * @return string
     */
168
    private function joinLine($bodyText, $nestingOpen, $nestingClose) {
169
        $data = array();
170
        $bodytextArray = explode(PHP_EOL, $bodyText);
171

172
173
174
175
176
        $nestingOpenRegexp = $nestingOpen;
        if ($nestingOpen === '(' || $nestingOpen === '[') {
            $nestingOpenRegexp = '\\' . $nestingOpen;
        }

177
        $full = '';
178
        $joinDelimiter = ' ';
179
        foreach ($bodytextArray as $row) {
180

181
182
183
184
185
186
187
            // Line end with '\'?
            if (substr($row, -1) == '\\') {
                $row = trim(substr($row, 0, -1)); // remove last char and trim
                $joinDelimiterNext = '';
            } else {
                $joinDelimiterNext = ' ';
            }
188
189
190
191

            if (($row == $nestingOpen || $row == $nestingClose)
                || (1 === preg_match('/^\d+(\.\d+)*(\s*' . $nestingOpenRegexp . ')?$/', $row))
                || (1 === preg_match('/^(\d+\.)*(' . TOKEN_VALID_LIST . ')\s*=/', $row))
192
193
            ) {

194
195
                // if there is already something: save this.
                if ($full !== '') {
196
                    $data[] = $full;
197
                }
198
199
200
201
202

                // start new line
                $full = $row;

            } else {
Carsten  Rose's avatar
Carsten Rose committed
203
                // continue row: concat - the space is necessary to join SQL statements correctly: 'SELECT ... FROM ... WHERE ... AND\np.id=...'  - here a 'AND' and 'p.id' need a space.
204
                $full .= $joinDelimiter . $row;
205
            }
206
207

            $joinDelimiter = $joinDelimiterNext;
208
209
210
        }

        // Save last line
211
        if ($full !== '') {
212
            $data[] = $full;
213
        }
214
215
216
217

        return implode(PHP_EOL, $data);
    }

218
219
220
221
222
223
224
    /**
     * Encrypt $nestingOpen and $nestingClose by a more complex token. This makes it easy to search later for '}' or '{'
     *
     * Valid open (complete line): {, 10 {, 10.20 {
     * Valid close (complete line): }
     *
     * @param $bodytext
Carsten  Rose's avatar
Carsten Rose committed
225
     *
226
227
     * @param $nestingOpen
     * @param $nestingClose
228
229
230
231
232
233
234
235
236
237
238
239
240
241
     * @return mixed
     */
    private function encryptNestingDelimeter($bodytext, $nestingOpen, $nestingClose) {

        if ($nestingOpen === '(' || $nestingOpen === '[') {
            $nestingOpen = '\\' . $nestingOpen;
            $nestingClose = '\\' . $nestingClose;
        }

        $bodytext = preg_replace('/^((\d+)(\.\d+)*\s*)?(' . $nestingOpen . ')$/m', '$1' . NESTING_TOKEN_OPEN, $bodytext);
        $bodytext = preg_replace('/^' . $nestingClose . '$/m', '$1' . NESTING_TOKEN_CLOSE, $bodytext);

        return $bodytext;
    }
242

Carsten  Rose's avatar
Carsten Rose committed
243
    /**
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
     * Unnest all level.
     *
     * Input:
     * 10 {
     *   sql = SELECT
     *   20.sql = INSERT ..
     *   30 {
     *      sql = DELETE
     *   }
     * }
     *
     * Output:
     * 10.sql = SELECT
     * 10.20.sql = INSERT
     * 10.20.30.sql = DELETE
     *
Carsten  Rose's avatar
Carsten Rose committed
260
     * @param $bodytext
Carsten  Rose's avatar
Carsten Rose committed
261
     *
262
263
     * @param $nestingOpen
     * @param $nestingClose
Carsten  Rose's avatar
Carsten Rose committed
264
     * @return mixed|string
265
     * @throws UserFormException
Carsten  Rose's avatar
Carsten Rose committed
266
     */
267
    private function unNest($bodytext, $nestingOpen, $nestingClose) {
268

269
        // Replace '\{' | '\}' by internal token. All remaining '}' | '{' means: 'nested'
270
271
272
//        $bodytext = str_replace('\{', '#&[_#', $bodytext);
//        $bodytext = str_replace('\}', '#&]_#', $bodytext);
//        $bodytext = Support::encryptDoubleCurlyBraces($bodytext);
273
274

        $result = $bodytext;
275
        $posFirstClose = strpos($result, NESTING_TOKEN_CLOSE);
276
277

        while ($posFirstClose !== false) {
278
            $posMatchOpen = strrpos(substr($result, 0, $posFirstClose), NESTING_TOKEN_OPEN);
279
280

            if ($posMatchOpen === false) {
281
                $result = $this->decryptNestingDelimeter($result, $nestingOpen, $nestingClose);
282
                throw new UserFormException(
Marc Egger's avatar
Marc Egger committed
283
                    json_encode([ERROR_MESSAGE_TO_USER => 'Missing open delimiter', ERROR_MESSAGE_TO_DEVELOPER => "Missing open delimiter: $result"]),
284
285
                    ERROR_MISSING_OPEN_DELIMITER);

286
287
288
289
290
291
            }

            $pre = substr($result, 0, $posMatchOpen);
            if ($pre === false)
                $pre = '';

292
            $post = substr($result, $posFirstClose + NESTING_TOKEN_LENGTH);
293
294
295
296
            if ($post === false)
                $post = '';

            // trim also removes '\n'
297
            $match = trim(substr($result, $posMatchOpen + NESTING_TOKEN_LENGTH, $posFirstClose - $posMatchOpen - NESTING_TOKEN_LENGTH));
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312

            // "10.sql = SELECT...\n20 {\n
            $levelStartPos = strrpos(trim($pre), PHP_EOL);
            $levelStartPos = ($levelStartPos === false) ? 0 : $levelStartPos + 1;  // Skip PHP_EOL

            $level = trim(substr($pre, $levelStartPos));
//            if($level==='') {
//                $pre=
//            }
            // remove 'level' from last line
            $pre = substr($pre, 0, $levelStartPos);

            // Split nested content in single rows
            $lines = explode(PHP_EOL, $match);
            foreach ($lines as $line) {
313
314
315
                if ($line !== '') {
                    $pre .= $level . '.' . $line . PHP_EOL;
                }
316
317
318
            }

            $result = $pre . $post;
319
            $posFirstClose = strpos($result, NESTING_TOKEN_CLOSE);
320
321
        }

322
323
324
//        $result = str_replace('#&[_#', '{', $result);
//        $result = str_replace('#&]_#', '}', $result);
//        $result = Support::decryptDoubleCurlyBraces($result);
325
326
327
328

        return $result;
    }

329
330
331
332
    /**
     * Decrypt complex token by '{\n' and '}\n'
     *
     * @param $bodytext
Carsten  Rose's avatar
Carsten Rose committed
333
     *
334
335
     * @param $nestingOpen
     * @param $nestingClose
336
337
     * @return mixed
     */
338
339
340
341
    private function decryptNestingDelimeter($bodytext, $nestingOpen, $nestingClose) {

        $bodytext = str_replace(NESTING_TOKEN_OPEN, "$nestingOpen\n", $bodytext);
        $bodytext = str_replace(NESTING_TOKEN_CLOSE, "$nestingClose\n", $bodytext);
342
343
344
345

        return $bodytext;
    }

346
}