diff --git a/libs/Format/HTML/Generator.php b/libs/Format/HTML/Generator.php
index 960af80..ef7f73e 100755
--- a/libs/Format/HTML/Generator.php
+++ b/libs/Format/HTML/Generator.php
@@ -102,6 +102,10 @@ class Generator implements \Todaymade\Daux\Format\Base\Generator, LiveGenerator
$destination . DIRECTORY_SEPARATOR . 'tipuesearch' . DIRECTORY_SEPARATOR . 'tipuesearch_content.json',
json_encode(['pages' => $this->indexed_pages])
);
+
+ if (json_last_error()) {
+ echo "Could not write search index: \n" . json_last_error_msg() . "\n";
+ }
}
}
@@ -115,7 +119,7 @@ class Generator implements \Todaymade\Daux\Format\Base\Generator, LiveGenerator
* @param string $text
* @return string
*/
- private function strip_html_tags($text)
+ private function sanitize($text)
{
$text = preg_replace(
[
@@ -146,7 +150,15 @@ class Generator implements \Todaymade\Daux\Format\Base\Generator, LiveGenerator
$text
);
- return trim(preg_replace('/\s+/', ' ', strip_tags($text)));
+ $text = trim(preg_replace('/\s+/', ' ', strip_tags($text)));
+
+ // Sometimes strings are detected as invalid UTF-8 and json_encode can't treat them
+ // iconv can fix those strings
+ if (function_exists("iconv")) {
+ $text = iconv('UTF-8', 'UTF-8//IGNORE', $text);
+ }
+
+ return $text;
}
/**
@@ -196,7 +208,7 @@ class Generator implements \Todaymade\Daux\Format\Base\Generator, LiveGenerator
if ($index_pages) {
$this->indexed_pages[] = [
'title' => $node->getTitle(),
- 'text' => utf8_encode($this->strip_html_tags($generated->getPureContent())),
+ 'text' => $this->sanitize($generated->getPureContent()),
'tags' => '',
'url' => $node->getUrl(),
];