'Print' and 'Download' * */ namespace qfq; require_once(__DIR__ . '/../store/Config.php'); require_once(__DIR__ . '/../store/Session.php'); require_once(__DIR__ . '/../store/Sip.php'); require_once(__DIR__ . '/../Constants.php'); require_once(__DIR__ . '/../helper/KeyValueStringParser.php'); require_once(__DIR__ . '/../helper/SessionCookie.php'); class Html2Pdf { /** * @var array */ private $config = array(); /** * @var \qfq\Session */ private $session = null; /** * @var \qfq\SessionCookie */ private $sessionCookie = null; /** * @var \qfq\Sip */ private $sip = null; /** * Read QFQ config. Only SYSTEM_BASE_URL_PRINT and SYSTEM_WKHTMLTOPDF will be used. * Check and get all clean _GET Parameter. Build a URL based on SYSTEM_BASE_URL_PRINT and the delivered URL params. * * @param array $config * @param $phpUnit * @throws UserFormException * @throws \exception */ public function __construct(array $config = array(), $phpUnit = false) { if (count($config) == 0) { $cfg = new Config(); $config = $cfg->readConfig(''); } $this->config = $config; if (!isset($config[SYSTEM_BASE_URL_PRINT]) || $config[SYSTEM_BASE_URL_PRINT] == '') { throw new \exception(CONFIG_INI . ' - Missing ' . SYSTEM_BASE_URL_PRINT); } if (!isset($config[SYSTEM_WKHTMLTOPDF]) || $config[SYSTEM_WKHTMLTOPDF] == '') { throw new \exception(CONFIG_INI . ' - Missing ' . SYSTEM_WKHTMLTOPDF); } if (!is_executable($config[SYSTEM_WKHTMLTOPDF])) { throw new \exception(CONFIG_INI . ' - ' . SYSTEM_WKHTMLTOPDF . '=' . $config[SYSTEM_WKHTMLTOPDF] . ' - not found or not executable.'); } $urlParts = parse_url($config[SYSTEM_BASE_URL_PRINT]); $this->session = Session::getInstance($phpUnit); $this->sessionCookie = new SessionCookie($urlParts['host'], $urlParts['path']); $this->sip = new Sip($phpUnit); } /** * Return an array with GET params who are clean - they do not violate $pattern. * * @return array */ private function readCleanGetParam(array $get) { $param = array(); $pattern = '^[\-_\.,;:\/a-zA-Z0-9]*$'; // ':alnum:' does not work here in FF foreach ($get as $key => $value) { if (preg_match("/$pattern/", $value) === 1) { $param[$key] = $value; } } return $param; } /** * Set HTML Header to initiate PDF download. * * @param $filename */ private function setHeader($filename) { header("Content-Disposition: inline; filename=\"$filename\""); header("Content-Type: application/pdf"); header("Content-Transfer-Encoding: binary"); } /** * Split the wkthtml Parameter between: * - general 'urlParam' for the specific website (page which will be converted to PDF). * - `wkthml` Parameter. Those will always start with '-' and control wkhtml how to render the PDF. * - '_sip' - to activate that `urlParam` parameters will be SIP encoded. * * @param string $urlParamString * @param array $rcArgs * @param bool $rcSipEncode * @return array The remaining 'real' URL parameter to call the T3 page. * @throws UserFormException */ private function splitParam($urlParamString, array &$rcArgs, &$rcSipEncode) { $urlParamNew = array(); $urlParam = KeyValueStringParser::parse($urlParamString, '=', '&', KVP_IF_VALUE_EMPTY_COPY_KEY); foreach ($urlParam as $key => $value) { switch (substr($key, 0, 1)) { case '-': $rcArgs[$key] = $value; break; case '_': if ($key == DOWNLOAD_SIP_ENCODE_PARAMETER) { $rcSipEncode = true; } break; default: $urlParamNew[$key] = $value; break; } } return $urlParamNew; } /** * Converts a Webpage (URL) to a PDF file. * The URL might be a local Typo3 page (without hostname, starting with the parameter) or a full URL. * * @param string $token TOKEN_URL | TOKEN_URL_PARAM * @param string $url id=exportPage&r=123, www.nzz.ch/issue?id=456 * @return string rendered file - please delete later * @throws \exception */ public function page2pdf($token, $url) { $rcArgs = array(); $urlParamString = ''; $host = ''; switch ($token) { case TOKEN_URL: $arr = explode('?', $url, 2); $host = $arr[0]; $urlParamString = empty($arr[1]) ? '' : $arr[1]; break; case TOKEN_URL_PARAM: $host = $this->config[SYSTEM_BASE_URL_PRINT]; $urlParamString = $url; break; default: break; } $rcSipEncode = false; $urlParam = $this->splitParam($urlParamString, $rcArgs, $rcSipEncode); $rcArgs = OnArray::arrayEscapeshellarg($rcArgs); $options = KeyValueStringParser::unparse($rcArgs, ' ', ' '); $urlParamString = KeyValueStringParser::unparse($urlParam, '=', '&'); if ($rcSipEncode) { $urlParamString = $this->sip->queryStringToSip($urlParamString, RETURN_URL); } if ($urlParamString != '') { $url = Support::mergeUrlComponents('', $host, $urlParamString); } if (substr($url, 0, 4) != 'http') { $url = 'http://' . $url; } $urlPrint = escapeshellarg($url); $wkhtmlToPdf = $this->config[SYSTEM_WKHTMLTOPDF]; $filename = tempnam(sys_get_temp_dir(), DOWNLOAD_FILE_PREFIX); $filenameEscape = escapeshellarg($filename); $cookieOptions = '--cookie-jar ' . escapeshellarg($this->sessionCookie->getFile()); $customHeader = '--custom-header User-Agent ' . escapeshellarg($_SERVER['HTTP_USER_AGENT']) . ' --custom-header-propagation'; // By default 'Typo3' expects the same User-Agent for the FE-Session // Very important: The current lock on session SESSION_NAME has to be freed, cause wkhtmltopdf will use the same // session in a few moments and this script remains active all the time. $this->session->close(); $cmd = "$wkhtmlToPdf $customHeader $cookieOptions $options $urlPrint $filenameEscape"; $rc = 0; $line = system($cmd, $rc); if ($rc != 0) { throw new \exception("Error [RC=$rc] $line: $cmd"); } return $filename; } /** * @throws \exception */ public function outputHtml2Pdf() { $get = $this->readCleanGetParam($_GET); $urlParam = KeyValueStringParser::unparse($get, '=', '&'); $pageId = Support::setIfNotSet($get, HTML2PDF_PAGEID, 0); $filename = $this->page2pdf(TOKEN_URL_PARAM, $urlParam); $this->setHeader('print.' . $pageId . '.pdf'); @readfile($filename); @unlink($filename); // @unlink($filename . '.log'); exit; // Do an extremely hard exit here to make sure there are no more additional bytes sent (makes the delivered PDF unusable). } }