Html2Pdf.php 7.4 KB
Newer Older
1
2
3
4
5
6
<?php
/**
 * Created by PhpStorm.
 * User: crose
 * Date: 4/17/17
 * Time: 10:17 PM
Carsten  Rose's avatar
Carsten Rose committed
7
8
9
 *
 * Check: CODING.md > 'Print' and 'Download'
 *
10
11
12
13
14
 */

namespace qfq;

require_once(__DIR__ . '/../store/Config.php');
15
require_once(__DIR__ . '/../store/Session.php');
16
require_once(__DIR__ . '/../store/Sip.php');
17
18
require_once(__DIR__ . '/../Constants.php');
require_once(__DIR__ . '/../helper/KeyValueStringParser.php');
19
require_once(__DIR__ . '/../helper/SessionCookie.php');
20
21
22
23
24
25
26
27
28


class Html2Pdf {

    /**
     * @var array
     */
    private $config = array();

29
30
31
32
33
    /**
     * @var \qfq\Session
     */
    private $session = null;

34
35
36
37
38
39
40
41
42
43
    /**
     * @var \qfq\SessionCookie
     */
    private $sessionCookie = null;

    /**
     * @var \qfq\Sip
     */
    private $sip = null;

44
45
46
47
48
    /**
     * Read QFQ config. Only SYSTEM_BASE_URL_PRINT and SYSTEM_WKHTMLTOPDF will be used.
     * Check and get all clean _GET Parameter. Build a URL based on SYSTEM_BASE_URL_PRINT and the delivered URL params.
     *
     * @param array $config
49
     * @param $phpUnit
50
51
52
     * @throws UserFormException
     * @throws \exception
     */
53
    public function __construct(array $config = array(), $phpUnit = false) {
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73

        if (count($config) == 0) {
            $cfg = new Config();

            $config = $cfg->readConfig('');
        }

        $this->config = $config;

        if (!isset($config[SYSTEM_BASE_URL_PRINT]) || $config[SYSTEM_BASE_URL_PRINT] == '') {
            throw new \exception(CONFIG_INI . ' - Missing ' . SYSTEM_BASE_URL_PRINT);
        }

        if (!isset($config[SYSTEM_WKHTMLTOPDF]) || $config[SYSTEM_WKHTMLTOPDF] == '') {
            throw new \exception(CONFIG_INI . ' - Missing ' . SYSTEM_WKHTMLTOPDF);
        }

        if (!is_executable($config[SYSTEM_WKHTMLTOPDF])) {
            throw new \exception(CONFIG_INI . ' - ' . SYSTEM_WKHTMLTOPDF . '=' . $config[SYSTEM_WKHTMLTOPDF] . ' - not found or not executable.');
        }
74
75

        $urlParts = parse_url($config[SYSTEM_BASE_URL_PRINT]);
76
77
78
        $this->session = Session::getInstance($phpUnit);
        $this->sessionCookie = new SessionCookie($urlParts['host'], $urlParts['path']);
        $this->sip = new Sip($phpUnit);
79

80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
    }

    /**
     * Return an array with GET params who are clean - they do not violate $pattern.
     *
     * @return array
     */
    private function readCleanGetParam(array $get) {

        $param = array();
        $pattern = '^[\-_\.,;:\/a-zA-Z0-9]*$'; // ':alnum:' does not work here in FF

        foreach ($get as $key => $value) {
            if (preg_match("/$pattern/", $value) === 1) {
                $param[$key] = $value;
            }
        }

        return $param;
    }

    /**
     * Set HTML Header to initiate PDF download.
     *
     * @param $filename
     */
    private function setHeader($filename) {

        header("Content-Disposition: inline; filename=\"$filename\"");
        header("Content-Type: application/pdf");
        header("Content-Transfer-Encoding: binary");
    }

113
    /**
114
115
116
117
118
     * Split the wkthtml Parameter between:
     * - general 'urlParam' for the specific website (page which will be converted to PDF).
     * - `wkthml` Parameter. Those will always start with '-' and control wkhtml how to render the PDF.
     * - '_sip' - to activate that `urlParam` parameters will be SIP encoded.
     *
119
120
     * @param string $urlParamString
     * @param array $rcArgs
121
122
     * @param bool $rcSipEncode
     * @return array The remaining 'real' URL parameter to call the T3 page.
123
     * @throws UserFormException
124
     */
125
    private function splitParam($urlParamString, array &$rcArgs, &$rcSipEncode) {
126
        $urlParamNew = array();
127

128
129
        $urlParam = KeyValueStringParser::parse($urlParamString, '=', '&', KVP_IF_VALUE_EMPTY_COPY_KEY);
        foreach ($urlParam as $key => $value) {
130
131
132
133
134
135
136
137
138
139
140
141
            switch (substr($key, 0, 1)) {
                case '-':
                    $rcArgs[$key] = $value;
                    break;
                case '_':
                    if ($key == DOWNLOAD_SIP_ENCODE_PARAMETER) {
                        $rcSipEncode = true;
                    }
                    break;
                default:
                    $urlParamNew[$key] = $value;
                    break;
142
143
144
            }
        }

145
        return $urlParamNew;
146
147
    }

148
    /**
Carsten  Rose's avatar
Carsten Rose committed
149
150
     * Converts a Webpage (URL) to a PDF file.
     * The URL might be a local Typo3 page (without hostname, starting with the parameter) or a full URL.
Carsten  Rose's avatar
Carsten Rose committed
151
152
153
154
     *
     * @param string $token TOKEN_URL | TOKEN_URL_PARAM
     * @param string $url id=exportPage&r=123, www.nzz.ch/issue?id=456
     * @return string        rendered file - please delete later
155
156
     * @throws \exception
     */
Carsten  Rose's avatar
Carsten Rose committed
157
    public function page2pdf($token, $url) {
158
        $rcArgs = array();
Carsten  Rose's avatar
Carsten Rose committed
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
        $urlParamString = '';
        $host = '';

        switch ($token) {
            case TOKEN_URL:
                $arr = explode('?', $url, 2);
                $host = $arr[0];
                $urlParamString = empty($arr[1]) ? '' : $arr[1];
                break;
            case TOKEN_URL_PARAM:
                $host = $this->config[SYSTEM_BASE_URL_PRINT];
                $urlParamString = $url;
                break;
            default:
                break;
174
175
        }

176
177
        $rcSipEncode = false;
        $urlParam = $this->splitParam($urlParamString, $rcArgs, $rcSipEncode);
178
179
180
181
182

        $rcArgs = OnArray::arrayEscapeshellarg($rcArgs);
        $options = KeyValueStringParser::unparse($rcArgs, ' ', ' ');

        $urlParamString = KeyValueStringParser::unparse($urlParam, '=', '&');
183
        if ($rcSipEncode) {
184
            $urlParamString = $this->sip->queryStringToSip($urlParamString, RETURN_URL);
Carsten  Rose's avatar
Carsten Rose committed
185
186
        }

Carsten  Rose's avatar
Carsten Rose committed
187
        if ($urlParamString != '') {
188
            $url = Support::mergeUrlComponents('', $host, $urlParamString);
Carsten  Rose's avatar
Carsten Rose committed
189
190
191
192
193
        }

        if (substr($url, 0, 4) != 'http') {
            $url = 'http://' . $url;
        }
Carsten  Rose's avatar
Carsten Rose committed
194
195
196

        $urlPrint = escapeshellarg($url);
        $wkhtmlToPdf = $this->config[SYSTEM_WKHTMLTOPDF];
197
198
199
200

        $filename = tempnam(sys_get_temp_dir(), DOWNLOAD_FILE_PREFIX);
        $filenameEscape = escapeshellarg($filename);

201
        $cookieOptions = '--cookie-jar ' . escapeshellarg($this->sessionCookie->getFile());
202
        $customHeader = '--custom-header User-Agent ' . escapeshellarg($_SERVER['HTTP_USER_AGENT']) . ' --custom-header-propagation'; // By default 'Typo3' expects the same User-Agent for the FE-Session
203

204
205
        // Very important: The current lock on session SESSION_NAME has to be freed, cause wkhtmltopdf will use the same
        // session in a few moments and this script remains active all the time.
206
        $this->session->close();
207
        $cmd = "$wkhtmlToPdf $customHeader $cookieOptions $options $urlPrint $filenameEscape";
208

Carsten  Rose's avatar
Carsten Rose committed
209
        $rc = 0;
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
        $line = system($cmd, $rc);

        if ($rc != 0) {
            throw new \exception("Error [RC=$rc] $line: $cmd");
        }

        return $filename;
    }

    /**
     * @throws \exception
     */
    public function outputHtml2Pdf() {

        $get = $this->readCleanGetParam($_GET);
Carsten  Rose's avatar
Carsten Rose committed
225
        $urlParam = KeyValueStringParser::unparse($get, '=', '&');
226
227
        $pageId = Support::setIfNotSet($get, HTML2PDF_PAGEID, 0);

Carsten  Rose's avatar
Carsten Rose committed
228
        $filename = $this->page2pdf(TOKEN_URL_PARAM, $urlParam);
229
230
231
232
233
234
235
236
237
238
239

        $this->setHeader('print.' . $pageId . '.pdf');
        @readfile($filename);
        @unlink($filename);
//        @unlink($filename . '.log');

        exit; // Do an extremely hard exit here to make sure there are no more additional bytes sent (makes the delivered PDF unusable).

    }

}