From 1f97025fa34ecbf1d64642588321032eb0f349ec Mon Sep 17 00:00:00 2001 From: Daniel Seifert Date: Thu, 17 Apr 2025 09:25:17 +0200 Subject: [PATCH] complete sanitizing filenames from umlauts and accents --- .../AbstractClasses/pdfdocumentsGeneric.php | 17 ++- .../pdfDocumentsGenericTest.php | 132 ++++++++++++++++++ composer.json | 3 +- 3 files changed, 145 insertions(+), 7 deletions(-) create mode 100644 Tests/Unit/Application/Model/AbstractClasses/pdfDocumentsGenericTest.php diff --git a/Application/Model/AbstractClasses/pdfdocumentsGeneric.php b/Application/Model/AbstractClasses/pdfdocumentsGeneric.php index 44f54f2..79ca0d5 100644 --- a/Application/Model/AbstractClasses/pdfdocumentsGeneric.php +++ b/Application/Model/AbstractClasses/pdfdocumentsGeneric.php @@ -26,6 +26,7 @@ use Psr\Container\ContainerExceptionInterface; use Psr\Container\NotFoundExceptionInterface; use Spipu\Html2Pdf\Exception\Html2PdfException; use Spipu\Html2Pdf\Html2Pdf; +use Symfony\Component\String\UnicodeString; use Twig\Error\Error; abstract class pdfdocumentsGeneric extends Base implements genericInterface @@ -285,6 +286,10 @@ abstract class pdfdocumentsGeneric extends Base implements genericInterface */ public function makeValidFileName($filename) { + // replace transliterations (umlauts, accents ...) + $unicodeString = new UnicodeString(utf8_encode($filename)); + $filename = (string) $unicodeString->ascii(); + // sanitize filename $filename = preg_replace( '~ @@ -306,11 +311,11 @@ abstract class pdfdocumentsGeneric extends Base implements genericInterface // maximize filename length to 255 bytes $ext = pathinfo($filename, PATHINFO_EXTENSION); return mb_strcut( - pathinfo($filename, PATHINFO_FILENAME), - 0, - 255 - ($ext ? strlen($ext) + 1 : 0), - mb_detect_encoding($filename) - ) . ($ext ? '.' . $ext : ''); + pathinfo($filename, PATHINFO_FILENAME), + 0, + 255 - ($ext ? strlen($ext) + 1 : 0), + mb_detect_encoding($filename) ?: null + ) . ($ext ? '.' . $ext : ''); } public function beautifyFilename($filename) @@ -318,7 +323,7 @@ abstract class pdfdocumentsGeneric extends Base implements genericInterface // reduce consecutive characters $filename = preg_replace([ // "file name.zip" becomes "file-name.zip" - '/ +/', + '/\s+/', // "file___name.zip" becomes "file-name.zip" '/_{2,}/', // "file---name.zip" becomes "file-name.zip" diff --git a/Tests/Unit/Application/Model/AbstractClasses/pdfDocumentsGenericTest.php b/Tests/Unit/Application/Model/AbstractClasses/pdfDocumentsGenericTest.php new file mode 100644 index 0000000..7cc90c1 --- /dev/null +++ b/Tests/Unit/Application/Model/AbstractClasses/pdfDocumentsGenericTest.php @@ -0,0 +1,132 @@ + + * @link http://www.oxidmodule.com + */ + +namespace D3\PdfDocuments\Tests\Unit\Application\Model\AbstractClasses; + +use Assert\InvalidArgumentException; +use D3\PdfDocuments\Application\Model\AbstractClasses\pdfdocumentsGeneric; +use D3\PdfDocuments\Application\Model\Constants; +use D3\PdfDocuments\Application\Model\Interfaces\pdfdocumentsGenericInterface as genericInterface; +use Generator; +use OxidEsales\Eshop\Core\Base; +use OxidEsales\Eshop\Core\Exception\StandardException; +use OxidEsales\Eshop\Core\Registry; +use OxidEsales\Eshop\Core\UtilsView; +use OxidEsales\EshopCommunity\Internal\Container\ContainerFactory; +use OxidEsales\EshopCommunity\Internal\Framework\Module\Facade\ModuleSettingServiceInterface; +use OxidEsales\EshopCommunity\Internal\Framework\Templating\TemplateRenderer; +use OxidEsales\EshopCommunity\Internal\Framework\Templating\TemplateRendererBridgeInterface; +use OxidEsales\Twig\Resolver\TemplateChain\TemplateNotInChainException; +use PHPUnit\Framework\MockObject\MockObject; +use PHPUnit\Framework\TestCase; +use Psr\Container\ContainerExceptionInterface; +use Psr\Container\NotFoundExceptionInterface; +use ReflectionException; +use Spipu\Html2Pdf\Exception\Html2PdfException; +use Spipu\Html2Pdf\Html2Pdf; +use Symfony\Component\String\UnicodeString; +use Twig\Error\Error; + +abstract class pdfDocumentsGenericTest extends TestCase +{ + /** + * setup basic requirements + * @throws ReflectionException + */ + public function setUp(): void + { + parent::setUp(); + + $this->_oModel = oxNew(pdfdocumentsGeneric::class); + } + + /** + * @covers \D3\ModCfg\Application\Model\d3filesystem::filterFilename + * @test + * @param $filename + * @param $expected + * @param $beautify + * @throws ReflectionException + * @dataProvider filterFilenameTestDataProvider + */ + public function filterFilenameTest($filename, $expected, $beautify) + { + /** @var pdfdocumentsGeneric|MockObject $modelMock */ + $modelMock = $this->getMockBuilder(pdfdocumentsGeneric::class) + ->onlyMethods(['beautifyFilename']) + ->getMock(); + $modelMock->expects($this->exactly((int) $beautify))->method('beautifyFilename')->willReturnArgument(0); + + $this->assertSame( + $expected, + $this->callMethod( + $modelMock, + 'filterFilename', + [$filename, $beautify] + ) + ); + } + + /** + * @return Generator + */ + public function filterFilenameTestDataProvider(): Generator + { + yield 'file system reserved' => ["ab<>cd\\ef*ghi.ext", 'ab--cd-ef-ghi.ext', false]; + yield 'control characters' => [".abc\x00def\x01ghi.ext", 'abc-def-ghi.ext', true]; + yield 'non-printing characters' => ["..abc\x7Fdef\xA0ghi.ext", 'abc-def ghi.ext', false]; + yield 'URI reserved' => ['abc#def@&ghi.ext', 'abc-def--ghi.ext', true]; + yield 'URL unsafe characters' => ["abc{def~ghi.ext", 'abc-def-ghi.ext', false]; + yield 'umlauts' => ["abücdßefÄgh.ext", 'abucdssefAgh.ext', false]; + yield 'accents' => ["abçcdïeféghùijôkl.ext", 'abccdiefeghuijokl.ext', false]; + yield 'currency signs' => ['ab€cd£ef$gh?ij¢kl¥m.ext', 'ab-cdGBPef-gh-ijcklJPYm.ext', false]; + // yield 'cyrillic' => ['????????', 'foo', false]; + // yield 'arabic' => ['???????? ???????', 'foo', false]; + yield 'long string' => [str_repeat("a", 300).".ext", str_repeat('a', 251).'.ext', true]; + } + + /** + * @covers \D3\ModCfg\Application\Model\d3filesystem::beautifyFilename + * @test + * @param $fileName + * @param $expected + * @throws ReflectionException + * @dataProvider beautifyFilenameTestDataProvider + */ + public function beautifyFilenameTest($fileName, $expected) + { + $this->assertSame( + $expected, + $this->callMethod( + $this->_oModel, + 'beautifyFilename', + [$fileName] + ) + ); + } + + /** + * @return Generator + */ + public function beautifyFilenameTestDataProvider(): Generator + { + yield 'spaces' => ['file name.zip', 'file-name.zip']; + yield 'underscores' => ['file___name.zip', 'file-name.zip']; + yield 'dashes' => ['file---name.zip', 'file-name.zip']; + yield 'dot separated dashes' => ['file--.--.-.--name.zip', 'file.name.zip']; + yield 'dotted' => [' file...name..zip ', 'file.name.zip']; + yield 'mixed cases' => ['fIleNaMe..zIp', 'filename.zip']; + yield 'trimmed' => ['.file-name.-', 'file-name']; + yield 'single underscore' => ['file_name', 'file_name']; + yield 'empty' => ['', '']; + yield 'null' => [null, '']; + yield 'false' => [false, '']; + } +} \ No newline at end of file diff --git a/composer.json b/composer.json index eb28114..922f67a 100644 --- a/composer.json +++ b/composer.json @@ -30,7 +30,8 @@ "php": "^8.0", "oxid-esales/oxideshop-ce": "7.0 - 7.2", "spipu/html2pdf": "~5.2.8", - "beberlei/assert": "^3.3.2" + "beberlei/assert": "^3.3.2", + "symfony/string": "^6" }, "autoload": { "psr-4": {