From 42d7a87578e23be199b16b60a223c261df2f1668 Mon Sep 17 00:00:00 2001
From: enured <enis.nuredini@uzh.ch>
Date: Mon, 31 Oct 2022 16:21:36 +0100
Subject: [PATCH] F14320: Implemented use for library html purifier and
 finished implementation for new feature htmlAllow. Added documentation for
 htmlAllow.

---
 Documentation/Form.rst                       | 17 ++++
 extension/Classes/Core/AbstractBuildForm.php |  8 +-
 extension/Classes/Core/Save.php              | 99 ++++++++++----------
 3 files changed, 72 insertions(+), 52 deletions(-)

diff --git a/Documentation/Form.rst b/Documentation/Form.rst
index 0f6609708..a9a331fb2 100644
--- a/Documentation/Form.rst
+++ b/Documentation/Form.rst
@@ -1378,6 +1378,12 @@ General input for any text.
   * *step* = Step size of the up/down buttons which increase/decrease the number of in the input field. Optional.
     Default 1. Only useful with `inputType=number` (defined explicit via `inputType` or detected automatically).
   * *textareaResize* = 0|1 (optional). Be default = 1 (=on). A textarea element is resizable by the user.
+  * *htmlAllow* = p,br,img,table,u,ol,b,h2,h3,h5,sup (optional). By default every html tag is allowed. Allow only specific html tags. If following tags are given in list then associated tags will be added automatically:
+
+    * table: td, tr, th, tbody, thead
+    * ol,ul: li
+    * b: strong
+    * u,ins,del,s: span
 
 .. _`input-typeahead`:
 
@@ -1615,6 +1621,17 @@ Type: editor
 
 * *FormElement.size* = <min_height>,<max_height>: in pixels, including top and bottom bars. E.g.: 300,600
 
+  Define allowed html tags. TinyMce Settings will be overwritten if this parameter is set.
+* Following tags are not used from tinyMce: u,del,ins,s. In this case use textDecoration to get comparable function and correct configuration. Example: ::
+
+    htmlAllow = p,br,h1,h3,table,b,textDecoration,ul,img
+
+* By default every html tag is allowed. If following tags are given in list then associated tags will be added automatically:
+
+    * table: td, tr, th, tbody, thead
+    * ol,ul: li
+    * b: strong
+    * textDecoration: span
 
 Type: annotate
 ^^^^^^^^^^^^^^
diff --git a/extension/Classes/Core/AbstractBuildForm.php b/extension/Classes/Core/AbstractBuildForm.php
index da767926e..d5afe0755 100644
--- a/extension/Classes/Core/AbstractBuildForm.php
+++ b/extension/Classes/Core/AbstractBuildForm.php
@@ -3393,8 +3393,8 @@ abstract class AbstractBuildForm {
         // Set defaults for tinyMce
         $imgToken = 'img[longdesc|usemap|src|border|alt=|title|hspace|vspace|width|height|align]';
         $textDecoration = 'span[style]';
-        $table = 'table[style|align],td[style],th[style],tr[style],tbody[style],thead[style]';
-        $url = 'a[href|target]';
+        $table = 'table[style|align|border],td[style],th[style],tr[style],tbody[style],thead[style]';
+        $url = 'a[href|target|title]';
         $paragraphToken = 'p[align]';
         $strong = 'strong';
 
@@ -3437,6 +3437,8 @@ abstract class AbstractBuildForm {
                     if(!$listFlag) {
                         $htmlAllowArray[$htmlToken] = 'ul,li';
                         $listFlag = true;
+                    } else {
+                        $htmlAllowArray[$htmlToken] = 'ul';
                     }
                     self::setTinymceEditorToolbarAttributes($customEditorToolbarFlags, $customEditorToolbar, 'bullist');
                 break;
@@ -3444,6 +3446,8 @@ abstract class AbstractBuildForm {
                     if(!$listFlag) {
                         $htmlAllowArray[$htmlToken] = 'ol,li';
                         $listFlag = true;
+                    } else {
+                        $htmlAllowArray[$htmlToken] = 'ol';
                     }
                     self::setTinymceEditorToolbarAttributes($customEditorToolbarFlags, $customEditorToolbar, 'numlist');
                 break;
diff --git a/extension/Classes/Core/Save.php b/extension/Classes/Core/Save.php
index 2430f35f5..688946540 100644
--- a/extension/Classes/Core/Save.php
+++ b/extension/Classes/Core/Save.php
@@ -9,6 +9,7 @@
 namespace IMATHUZH\Qfq\Core;
 
 use DOMDocument;
+use HTMLPurifier;
 use IMATHUZH\Qfq\Core\Database\Database;
 use IMATHUZH\Qfq\Core\Form\FormAction;
 use IMATHUZH\Qfq\Core\Helper\EncryptDecrypt;
@@ -410,10 +411,10 @@ class Save {
         foreach($this->feSpecNative as $fe) {
             $feColumnTypes[$fe['name']] = $fe['type'];
         }
+
         // Get htmlAllow parameters of all formValues and store in $feSpecsTags
         $feSpecsTags = $this->getHtmlAllowTags($this->feSpecNative, $formValues);
-
-        // For htmlAllow: Define with flags if '<li>' or '<span>' is needed for tinyMce and add them
+        // For htmlAllow: Define with flags if '<li>' or '<span>' is needed for tinyMce (used for underline and other text decoration) and add them
         $feSpecsTags = $this->setTinyMceSpecificTags($feSpecsTags);
 
 
@@ -470,10 +471,10 @@ class Save {
                 Support::setIfNotSet($formValues, $column);
             }
 
-            // Check for existing htmlAllow and strip tags
+            // Check for existing htmlAllow and strip tags, purify html result to prevent XSS
             if(isset($feSpecsTags[$column]) && $feSpecsTags[$column] !== '') {
                 $formValues[$column] = $this->custom_strip_tags($formValues[$column], $feSpecsTags[$column]);
-                $formValues[$column] = $this->strip_attributes($formValues[$column], 'style,name,align');
+                $formValues[$column] = $this->purifierHtml($formValues[$column]);
             }
 
             $newValues[$column] = $formValues[$column];
@@ -535,17 +536,32 @@ class Save {
     private function setTinyMceSpecificTags($feSpecsTags): array {
         $listFlag = false;
         $decorationFlag = false;
+        $tableFlag = false;
+        $strongFlag = false;
         foreach ($feSpecsTags as $key => $value) {
             $feSpecsTagArray[$key] = explode(',',$value);
             foreach ($feSpecsTagArray[$key] as $key2 => $tag) {
-                if(($tag === 'ul' || $tag === 'ol')) {
-                    $listFlag = true;
-                } elseif ($tag === 'textDecoration' || $tag === 'u' || $tag === 'ins' || $tag === 'del' || $tag === 's') {
-                    $decorationFlag = true;
-                }
-
-                if($tag !== 'textDecoration') {
-                    $feSpecsTagArray[$key][$key2] = $tag;
+                switch ($tag) {
+                    case 'ul':
+                    case 'ol':
+                        $listFlag = true;
+                    break;
+                    case 'textDecoration':
+                    case 'u':
+                    case 'ins':
+                    case 'del':
+                    case 's':
+                        $decorationFlag = true;
+                    break;
+                    case 'table':
+                        $tableFlag = true;
+                    break;
+                    case 'b':
+                        $strongFlag = true;
+                    break;
+                    default:
+                        $feSpecsTagArray[$key][$key2] = $tag;
+                    break;
                 }
             }
 
@@ -559,6 +575,18 @@ class Save {
                 $feSpecsTagArray[$key][] = "span";
                 $decorationFlag = false;
             }
+
+
+            if($strongFlag) {
+                $feSpecsTagArray[$key][] = "strong";
+                $strongFlag = false;
+            }
+
+            if($tableFlag) {
+                array_push($feSpecsTagArray[$key],"th","td","tr","tbody","thead");
+                $tableFlag = false;
+            }
+
             $feSpecsTags[$key] = implode(',', $feSpecsTagArray[$key]);
         }
 
@@ -1750,48 +1778,19 @@ class Save {
     }
 
     /**
-     * Remove not allowed attributes
+     * Remove not allowed attributes and content which is not in whitelist
+     * Used in combination with htmlAllow.
+     * Author:  Edward Z. Yang
+     * Website: http://htmlpurifier.org/
      *
      * @param $html
-     * @param string $allowedAttributes
      * @return array|string|string[]|null
      */
-    function strip_attributes($html, string $allowedAttributes) {
-        $regex_attributes = '/(<\s*\w+\s+)(\w+)\s*=\s*".*"\s*\/*(>)/U';
-        $regex_attributes2 = '/(<\s*\w+\s+)(\w+)\s*\W*(>)/U';
-        $regex_attributes3 = '/(<\s*\w+\s+)(\w+)\s*=\s*.+\W*(>)/U';
-        $allowed_attributes = explode(',',$allowedAttributes);
-        $allowed_attributes = array_map(strtolower,$allowed_attributes);
-
-        //
-
-        $regexHtmlBlock = '/<\s*\w+\s*\w*=*.*>.*<\/.*>/U';
-        $matchAllBlocks = array();
-        preg_match_all($regexHtmlBlock, $html, $matchAllBlocks);
-
-        foreach ($matchAllBlocks[0] as &$block) {
-            $matchesType1 = array();
-            preg_match_all($regex_attributes, $block, $matchesType1);
-            $matchesType2 = array();
-            preg_match_all($regex_attributes2, $block, $matchesType2);
-            $matchesType3 = array();
-            preg_match_all($regex_attributes3, $block, $matchesType3);
-
-            if(empty($matchesType1[0])) {
-                if(empty($matchesType2[0])) {
-                    $regex_attributes = $regex_attributes3;
-                } else {
-                    $regex_attributes = $regex_attributes2;
-                }
-            }
-            $allMatches = array_replace_recursive($matchesType1, $matchesType2, $matchesType3);
-            $block = preg_replace_callback($regex_attributes, function ($allMatches) use (&$allowed_attributes) {
-                return in_array(strtolower($allMatches[2]),$allowed_attributes)?$allMatches[0]:$allMatches[1].$allMatches[3];
-            },$block);
-        }
-
-
-        $rhtml = implode('',$matchAllBlocks[0]);
+    function purifierHtml($html) {
+        $libraryPath = Path::absoluteExt('vendor/htmlpurifier-4.15.0-lite/library/HTMLPurifier.auto.php');
+        require_once $libraryPath;
+        $purifier = new HTMLPurifier();
+        $rhtml = $purifier->purify($html);
         return $rhtml;
     }
 
-- 
GitLab