Fix json output for search index, fixes #21

This commit is contained in:
Stéphane Goetz 2017-11-08 00:01:08 +01:00
parent 837fca6886
commit d898eb6edf

View File

@ -102,6 +102,10 @@ class Generator implements \Todaymade\Daux\Format\Base\Generator, LiveGenerator
$destination . DIRECTORY_SEPARATOR . 'tipuesearch' . DIRECTORY_SEPARATOR . 'tipuesearch_content.json', $destination . DIRECTORY_SEPARATOR . 'tipuesearch' . DIRECTORY_SEPARATOR . 'tipuesearch_content.json',
json_encode(['pages' => $this->indexed_pages]) json_encode(['pages' => $this->indexed_pages])
); );
if (json_last_error()) {
echo "Could not write search index: \n" . json_last_error_msg() . "\n";
}
} }
} }
@ -115,7 +119,7 @@ class Generator implements \Todaymade\Daux\Format\Base\Generator, LiveGenerator
* @param string $text * @param string $text
* @return string * @return string
*/ */
private function strip_html_tags($text) private function sanitize($text)
{ {
$text = preg_replace( $text = preg_replace(
[ [
@ -146,7 +150,15 @@ class Generator implements \Todaymade\Daux\Format\Base\Generator, LiveGenerator
$text $text
); );
return trim(preg_replace('/\s+/', ' ', strip_tags($text))); $text = trim(preg_replace('/\s+/', ' ', strip_tags($text)));
// Sometimes strings are detected as invalid UTF-8 and json_encode can't treat them
// iconv can fix those strings
if (function_exists("iconv")) {
$text = iconv('UTF-8', 'UTF-8//IGNORE', $text);
}
return $text;
} }
/** /**
@ -196,7 +208,7 @@ class Generator implements \Todaymade\Daux\Format\Base\Generator, LiveGenerator
if ($index_pages) { if ($index_pages) {
$this->indexed_pages[] = [ $this->indexed_pages[] = [
'title' => $node->getTitle(), 'title' => $node->getTitle(),
'text' => utf8_encode($this->strip_html_tags($generated->getPureContent())), 'text' => $this->sanitize($generated->getPureContent()),
'tags' => '', 'tags' => '',
'url' => $node->getUrl(), 'url' => $node->getUrl(),
]; ];