Commit 3160bd47 authored by Carsten  Rose's avatar Carsten Rose
Browse files

#Refs 13722: Detect and fix pdfunite problems "Gen inside xref table too large...

#Refs 13722: Detect and fix pdfunite problems "Gen inside xref table too large (bigger than INT_MAX)" via pdf2ps, ps2pdf
parent aa8cac92
Pipeline #7095 passed with stage
in 1 minute and 55 seconds
......@@ -46,6 +46,7 @@ The following features are only tested / supported on linux hosts:
* Convert of images to PDF files - command `img2pdf`.
* PDF decrypt (used for merge with pdfunite) - command `qpdf`.
* PDF decrypt (used for merge with pdfunite) - command `gs` - in case `qpdf` is not successful.
* PDF fix (used for merge with pdfunite) - command `pdf2ps` and `ps2pdf` - in case `qpdf` is not successful.
* Mime type detection for uploads - command `file`.
* Split PDF into JPG - command `convert`.
* Repair PDF - command `pdftocairo`.
......@@ -63,7 +64,8 @@ To normalize UTF8 input, *php-intl* package is needed by
* normalizer::normalize()
For the :ref:`download` function, the programs `img2pdf`, `pdfunite`, `qpdf`, `gs` and `file` are necessary to concatenate PDF files.
For the :ref:`download` function, the programs `img2pdf`, `pdfunite`, `qpdf`, `gs`, `pdf2ps`, `ps2pdf` and `file` are
necessary to concatenate PDF files.
Preparation for Ubuntu::
......
......@@ -679,7 +679,8 @@ const SYSTEM_CMD_GS = 'cmdGs';
const SYSTEM_CMD_PDFUNITE = 'cmdPdfunite';
const SYSTEM_CMD_IMG2PDF = 'cmdImg2pdf';
const SYSTEM_CMD_HEIF_CONVERT = 'cmdHeifConvert';
const SYSTEM_CMD_PDF2PS = 'cmdPdf2ps';
const SYSTEM_CMD_PS2PDF = 'cmdPs2pdf';
// Thumbnail
const SYSTEM_THUMBNAIL_DIR_SECURE_REL_TO_APP = 'thumbnailDirSecure';
const SYSTEM_THUMBNAIL_DIR_PUBLIC_REL_TO_APP = 'thumbnailDirPublic';
......
......@@ -74,6 +74,16 @@ class Download {
*/
private $gs = '';
/**
* @var string Name of command
*/
private $pdf2ps = '';
/**
* @var string Name of command
*/
private $ps2pdf = '';
/**
* @var string Name of command
*/
......@@ -101,6 +111,8 @@ class Download {
$this->qpdf = $this->store->getVar(SYSTEM_CMD_QPDF, STORE_SYSTEM);
$this->gs = $this->store->getVar(SYSTEM_CMD_GS, STORE_SYSTEM);
$this->pdf2ps = $this->store->getVar(SYSTEM_CMD_PDF2PS, STORE_SYSTEM);
$this->ps2pdf = $this->store->getVar(SYSTEM_CMD_PS2PDF, STORE_SYSTEM);
$this->pdfunite = $this->store->getVar(SYSTEM_CMD_PDFUNITE, STORE_SYSTEM);
$this->img2Pdf = $this->store->getVar(SYSTEM_CMD_IMG2PDF, STORE_SYSTEM);
......@@ -218,11 +230,17 @@ class Download {
}
/**
* Fires the merge command.
* If for any reason the command fails: check if the reason is 'unencrypted files'.
* If 'yes': try to decrypt them with qpdf.
* After one decrypt, try merge again.
* Try to merge and decrypt as long as there are encrypted files.
* Do the merge command.
* Fixing part is highly customized to pdfunite: parsing of reported problematic files.
* If for any reason the command fails: check if the reason
* a) is 'unencrypted files'. If 'yes': try to decrypt them with qpdf.
* Try1: qpdf --decrypt '$backup' '$file'
* Try2: gs -sDEVICE=pdfwrite -dNOPAUSE -sOutputFile="$file" -- "$backup"
* b) is 'Syntax Error: Gen inside xref table too large (bigger than INT_MAX)': try convert PDF2PS and PS2PDF
* Try1: pdf2ps $file $file.ps; ps2pdf $file.ps $file
*
* After one antempt to fix, try merge again.
* Try to merge, decrypt and fix as long as there are encrypted files.
*
* @param $cmd
* @param $rcOutput
......@@ -236,7 +254,6 @@ class Download {
// Try to merge the PDFs as long as a problematic PDF has been repaired. Check this by comparing the last and the current output.
while ($last != $rcOutput) {
$last = $rcOutput; // Remember last
// Merge:
......@@ -246,50 +263,137 @@ class Download {
break; // skip rest if everything is fine
}
// Possible output: "Unimplemented Feature: Could not merge encrypted files ('ct.18.06.092-097.pdf')"
$line = implode(',', $rcOutput);
if (false !== ($line = strstr($line, "Unimplemented Feature: Could not merge encrypted files ("))) {
// Possible output on error:
// "Unimplemented Feature: Could not merge encrypted files ('ct.18.06.092-097.pdf')"
// "Could not merge damaged documents ('offerFinal_Light_Cycler_Offerte_2.pdf')"
$lineAll = implode(',', $rcOutput);
$cmdFailed = "CMD Merge: " . $cmd . "<br>RC: $rc<br>Output: " . implode("<br>", $rcOutput) . '<br>';
$fix = false;
// Searching for line "Unimplemented Feature: Could not merge encrypted files ('file.pdf')"
// Skip error message until problematic file is named.
$line = strstr($lineAll, "Unimplemented Feature: Could not merge encrypted files (");
if (false !== $line) {
$arr = explode("'", $line, 3);
if (!empty($arr[1]) && file_exists($arr[1])) {
$file = $arr[1]; // problematic file
// Create a backup file: only one per day!
$backup = $file . date('.Y-m-d');
if (!file_exists($backup)) {
HelperFile::copy($file, $backup);
}
// Create a backup file if none already exist
$backup = $this->doBackupFilePerDay($file);
// Try 1: via 'qpdf --decrypt'
$cmdQpdf = $this->qpdf . " --decrypt '$backup' '$file' 2>&1"; // Try to decrypt file
exec($cmdQpdf, $outputQpdf, $rcQpdf);
if ($rcQpdf != 0) {
$fix = $rcQpdf == 0;
if (!$fix) {
// Try 2: via 'gs -sDEVICE=pdfwrite'
$cmdGs = $this->gs . " -sDEVICE=pdfwrite -dNOPAUSE -sOutputFile=\"$file\" -- \"$backup\" 2>&1";
exec($cmdGs, $outputGs, $rcGs);
if ($rcGs != 0) {
// qpdf failed: restore origfile in case the $file has been destroyed.
HelperFile::copy($backup, $file);
throw new \DownloadException (json_encode([ERROR_MESSAGE_TO_USER => "Failed to decrypt PDF",
ERROR_MESSAGE_TO_DEVELOPER => "CMD1: " . $cmdQpdf . "<br>RC: $rcQpdf<br>Output: " . implode("<br>", $outputQpdf) . '<br>' .
"CMD2: " . $cmdGs . "<br>RC: $rcGs<br>Output: " . implode("<br>", $outputGs)])
, ERROR_DOWNLOAD_MERGE_FAILED);
$fix = $rcGs == 0;
if (!$fix) {
$line = implode(',', $rcOutputGs);
$msgUser = "Failed to decrypt PDF";
$msgDeveloper = $cmdFailed . "CMD Fix: " . $cmdGs . "<br>RC: $rcGs<br>Output: " . implode("<br>", $rcOutputGs);
$this->revertFixAndThrowException($msgUser, $msgDeveloper, $file, $backup);
}
}
}
} else {
// Check for 'Gen inside xref table too large' (Ticket 13722, 13731: pdf2ps >> ps2pdf)
}
$line = strstr($lineAll, "Syntax Error: Gen inside xref table too large (bigger than INT_MAX)");
if (!$fix && false !== $line) {
// Skip error message until problematic file is named.
$line = strstr($line, "Could not merge damaged documents (");
$arr = explode("'", $line, 3);
if (!empty($arr[1]) && file_exists($arr[1])) {
$file = $arr[1]; // problematic file
// Create a backup file if none already exist
$backup = $this->doBackupFilePerDay($file);
// PDF2PS
$cmdFix = $this->pdf2ps . " $file $file.ps";
exec($cmdFix, $rcOutputFix, $rcFix);
$fix = $rcFix == 0;
if (!$fix) {
$line = implode(',', $rcOutputFix);
$msgUser = "Failed to merge PDF and also fix try failed on converting source PDF to PS.";
$msgDeveloper = $cmdFailed . "CMD Fix: " . $cmdFix . "<br>RC: $rcFix<br>Output: " . implode("<br>", $rcOutputFix);
$this->revertFixAndThrowException($msgUser, $msgDeveloper, $file, $backup);
}
// PS2PDF
$cmdFix = $this->ps2pdf . " $file.ps $file";
exec($cmdFix, $rcOutputFix, $rcFix);
$fix = $rcFix == 0;
if (!$fix) {
$line = implode(',', $rcOutputFix);
$msgUser = "Failed to merge PDF and also fix try failed on converting temporary PS to PDF.";
$msgDeveloper = $cmdFailed . "CMD Fix: " . $cmdFix . "<br>RC: $rcFix<br>Output: " . implode("<br>", $rcOutputFix);
$this->revertFixAndThrowException($msgUser, $msgDeveloper, $file, $backup);
}
HelperFile::unlink($file . '.ps');
}
}
if (!$fix) {
throw new \DownloadException (json_encode([ERROR_MESSAGE_TO_USER => "Merge PDF file failed.",
ERROR_MESSAGE_TO_DEVELOPER => "CMD: " . $cmd . "<br>RC: $rc<br>Output: " . implode("<br>", $rcOutput)])
ERROR_MESSAGE_TO_DEVELOPER => $cmdFailed])
, ERROR_DOWNLOAD_MERGE_FAILED);
}
}
return $rc;
}
/**
* @param $msgUser
* @param $msgDeveloper
* @param $file
* @param $backup
* @return mixed
* @throws \DownloadException
* @throws \UserFormException
*/
private
function revertFixAndThrowException($msgUser, $msgDeveloper, $file, $backup) {
if ($backup != '' && $file != '') {
// command failed: restore origfile in case the $file has been destroyed.
HelperFile::copy($backup, $file);
}
throw new \DownloadException (json_encode([ERROR_MESSAGE_TO_USER => $msgUser,
ERROR_MESSAGE_TO_DEVELOPER => $msgDeveloper])
, ERROR_DOWNLOAD_MERGE_FAILED);
}
/**
* If there is already a Backup file from today: do nothing.
* If there is no Backup file from today: duplicate the named file to the backup filename.
* In general, it's meant that the original filename will be replaced by a fixed one. This should lead to have
* only one copy of the original.
*
* @param $file
* @return string
* @throws \UserFormException
*/
private
function doBackupFilePerDay($file) {
// Create a backup file: only one per day!
$backup = $file . date('.Y-m-d');
if (!file_exists($backup)) {
HelperFile::copy($file, $backup);
}
return ($backup);
}
/**
* Get the mimetype of $filename and store them in $rcMimetype.
*
......@@ -299,7 +403,8 @@ class Download {
*
* @return string possible updated $outputFilename, according the mimetype.
*/
private function targetFilenameExtension($pathFileName, $outputFilename, &$rcMimetype) {
private
function targetFilenameExtension($pathFileName, $outputFilename, &$rcMimetype) {
if ($pathFileName != '' && file_exists($pathFileName)) {
......@@ -315,7 +420,8 @@ class Download {
* @param $outputFilename
* @throws \DownloadException
*/
private function outputFile($file, $outputFilename) {
private
function outputFile($file, $outputFilename) {
$json = '';
$flagJson = ($this->getOutputFormat() === DOWNLOAD_OUTPUT_FORMAT_JSON);
......@@ -369,7 +475,8 @@ class Download {
* @throws \UserFormException
* @throws \UserReportException
*/
private function getEvaluatedBodytext($uid, array $urlParam) {
private
function getEvaluatedBodytext($uid, array $urlParam) {
$bodyTextArr = $this->db->getBodytext($uid);
// Copy $urlParam to STORE_SIP
......@@ -402,7 +509,8 @@ class Download {
* @throws \UserFormException
* @throws \UserReportException
*/
private function getElement($element, $downloadMode, &$rcData) {
private
function getElement($element, $downloadMode, &$rcData) {
$filename = '';
$rcArgs = array();
......@@ -482,7 +590,8 @@ class Download {
* @return string ZIP filename - has to be deleted later.
* @throws \DownloadException
*/
private function zipFiles(array $files) {
private
function zipFiles(array $files) {
$zipFile = HelperFile::tempnam();
if (false === $zipFile) {
......@@ -538,7 +647,8 @@ class Download {
* @throws \UserFormException
* @throws \UserReportException
*/
private function checkAndExpandSource($param) {
private
function checkAndExpandSource($param) {
if ($param == '') {
return '';
......@@ -599,7 +709,8 @@ class Download {
* @throws \UserFormException
* @throws \UserReportException
*/
private function doElements(array $vars, $outputMode) {
private
function doElements(array $vars, $outputMode) {
$srcFiles = array();
$filesCleanLater = array();
......@@ -733,7 +844,8 @@ class Download {
* @throws \UserFormException
* @throws \UserReportException
*/
private function doThumbnail($urlParam) {
private
function doThumbnail($urlParam) {
$thumbnail = new Thumbnail();
$pathFilenameThumbnail = $thumbnail->process($urlParam, THUMBNAIL_VIA_DOWNLOAD);
......@@ -751,7 +863,8 @@ class Download {
* @throws \UserFormException
* @throws \UserReportException
*/
private function getDirectDownloadSql() {
private
function getDirectDownloadSql() {
$scriptName = str_replace('.', '', $this->store->getVar('SCRIPT_NAME', STORE_CLIENT . STORE_EMPTY));
// Example: /var/www/html/qfq/dl.php >> dl.php
......@@ -776,7 +889,8 @@ class Download {
* @throws \UserFormException
* @throws \UserReportException
*/
private function getDirectDownloadModeDetails() {
private
function getDirectDownloadModeDetails() {
$arr = $this->getDirectDownloadSql();
......@@ -831,7 +945,8 @@ class Download {
* @throws \UserFormException
* @throws \UserReportException
*/
public function process($vars, $outputMode = OUTPUT_MODE_DIRECT) {
public
function process($vars, $outputMode = OUTPUT_MODE_DIRECT) {
if (!is_array($vars)) {
......@@ -851,14 +966,16 @@ class Download {
/**
* @param $outputFormat
*/
private function setOutputFormat($outputFormat) {
private
function setOutputFormat($outputFormat) {
$this->outputFormat = $outputFormat;
}
/**
* @return string - DOWNLOAD_OUTPUT_FORMAT_RAW | DOWNLOAD_OUTPUT_FORMAT_JSON
*/
public function getOutputFormat() {
public
function getOutputFormat() {
return $this->outputFormat;
}
}
......
......@@ -484,6 +484,8 @@ class Config {
SYSTEM_CMD_PDFUNITE => 'pdfunite',
SYSTEM_CMD_IMG2PDF => 'img2pdf',
SYSTEM_CMD_HEIF_CONVERT => 'heif-convert',
SYSTEM_CMD_PDF2PS => 'pdf2ps',
SYSTEM_CMD_PS2PDF => 'ps2pdf',
SYSTEM_THUMBNAIL_DIR_SECURE_REL_TO_APP => Path::APP_TO_SYSTEM_THUMBNAIL_DIR_SECURE_DEFAULT,
SYSTEM_THUMBNAIL_DIR_PUBLIC_REL_TO_APP => Path::APP_TO_SYSTEM_THUMBNAIL_DIR_PUBLIC_DEFAULT,
......
......@@ -420,6 +420,8 @@ class StoreTest extends TestCase {
SYSTEM_CMD_IMG2PDF => 'img2pdf',
SYSTEM_CMD_HEIF_CONVERT => 'heif-convert',
SYSTEM_CMD_QFQPDF => '/opt/qfqpdf/qfqpdf',
SYSTEM_CMD_PDF2PS => 'pdf2ps',
SYSTEM_CMD_PS2PDF => 'ps2pdf',
];
$body = json_encode([
......
......@@ -52,6 +52,12 @@ cmdImg2pdf = img2pdf
# cat=config/config; type=string; label=Command 'heif-convert':Default is 'heif-convert'. Will be used to convert images from HEIC/HEIF to PNG.
cmdHeifConvert = heif-convert
# cat=config/config; type=string; label=Command 'pdf2ps':Default is 'pdf2ps'. Will be used to convert images from PDF to PS.
cmdPdf2ps = pdf2ps
# cat=config/config; type=string; label=Command 'ps2pdf':Default is 'ps2pdf'. Will be used to convert images from PS to PDF.
cmdPs2pdf = ps2pdf
# cat=config/email; type=string; label=Options for SendEMail:Default is empty. General options. Check: http://caspian.dotconf.net/menu/Software/SendEmail. E.g.: 'sendEMail=-o tls=yes'
sendEMailOptions =
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment