daux.io/libs/Format/HTML/Generator.php

240 lines
8.1 KiB
PHP
Raw Normal View History

<?php namespace Todaymade\Daux\Format\HTML;
use Symfony\Component\Console\Input\InputInterface;
2015-07-14 22:06:01 +02:00
use Symfony\Component\Console\Output\OutputInterface;
use Todaymade\Daux\Config;
2015-07-23 17:44:24 +02:00
use Todaymade\Daux\Console\RunAction;
use Todaymade\Daux\Daux;
use Todaymade\Daux\DauxHelper;
use Todaymade\Daux\Format\Base\LiveGenerator;
2016-07-29 23:46:57 +02:00
use Todaymade\Daux\Format\HTML\ContentTypes\Markdown\ContentType;
2015-07-23 17:44:24 +02:00
use Todaymade\Daux\GeneratorHelper;
use Todaymade\Daux\Tree\ComputedRaw;
use Todaymade\Daux\Tree\Directory;
use Todaymade\Daux\Tree\Entry;
use Todaymade\Daux\Tree\Raw;
class Generator implements \Todaymade\Daux\Format\Base\Generator, LiveGenerator
{
2015-07-14 22:06:01 +02:00
use RunAction;
/** @var Daux */
protected $daux;
2016-03-13 21:51:58 +01:00
protected $indexed_pages = [];
/**
* @param Daux $daux
*/
public function __construct(Daux $daux)
{
$this->daux = $daux;
}
/**
* @return array
*/
public function getContentTypes()
{
return [
2016-07-27 15:32:51 -04:00
'markdown' => new ContentType($this->daux->getParams()),
];
}
2016-09-26 18:54:06 +00:00
protected function ensureEmptyDestination($destination)
{
if (is_dir($destination)) {
GeneratorHelper::rmdir($destination);
} else {
mkdir($destination);
}
}
/**
* Copy all files from $local to $destination
*
* @param string $destination
* @param string $local_base
*/
protected function copyThemes($destination, $local_base)
{
mkdir($destination . DIRECTORY_SEPARATOR . 'themes');
GeneratorHelper::copyRecursive(
$local_base,
$destination . DIRECTORY_SEPARATOR . 'themes'
);
}
public function generateAll(InputInterface $input, OutputInterface $output, $width)
{
$destination = $input->getOption('destination');
$params = $this->daux->getParams();
if (is_null($destination)) {
2015-07-20 15:59:52 +02:00
$destination = $this->daux->local_base . DIRECTORY_SEPARATOR . 'static';
}
2015-07-14 22:06:01 +02:00
$this->runAction(
2016-07-27 15:32:51 -04:00
'Copying Static assets ...',
2015-07-14 22:06:01 +02:00
$output,
$width,
function () use ($destination, $params) {
$this->ensureEmptyDestination($destination);
$this->copyThemes($destination, $params->getThemesPath());
2015-07-14 22:06:01 +02:00
}
);
2016-07-27 15:32:51 -04:00
$output->writeLn('Generating ...');
2016-04-14 12:03:46 +02:00
if (!array_key_exists('search', $params['html']) || !$params['html']['search']) {
$params['html']['search'] = $input->getOption('search');
}
2016-03-13 21:51:58 +01:00
$this->generateRecursive($this->daux->tree, $destination, $params, $output, $width, $params['html']['search']);
if ($params['html']['search']) {
GeneratorHelper::copyRecursive(
$this->daux->local_base . DIRECTORY_SEPARATOR . 'tipuesearch' . DIRECTORY_SEPARATOR,
$destination . DIRECTORY_SEPARATOR . 'tipuesearch'
);
file_put_contents(
$destination . DIRECTORY_SEPARATOR . 'tipuesearch' . DIRECTORY_SEPARATOR . 'tipuesearch_content.json',
json_encode(['pages' => $this->indexed_pages])
);
if (json_last_error()) {
echo "Could not write search index: \n" . json_last_error_msg() . "\n";
}
}
}
/**
* Remove HTML tags, including invisible text such as style and
* script code, and embedded objects. Add line breaks around
* block-level tags to prevent word joining after tag removal.
* Also collapse whitespace to single space and trim result.
* modified from: http://nadeausoftware.com/articles/2007/09/php_tip_how_strip_html_tags_web_page
2016-07-29 23:46:57 +02:00
*
* @param string $text
* @return string
*/
private function sanitize($text)
{
$text = preg_replace(
2016-07-27 15:32:51 -04:00
[
// Remove invisible content
'@<head[^>]*?>.*?</head>@siu',
'@<style[^>]*?>.*?</style>@siu',
'@<script[^>]*?.*?</script>@siu',
'@<object[^>]*?.*?</object>@siu',
'@<embed[^>]*?.*?</embed>@siu',
'@<applet[^>]*?.*?</applet>@siu',
'@<noframes[^>]*?.*?</noframes>@siu',
'@<noscript[^>]*?.*?</noscript>@siu',
'@<noembed[^>]*?.*?</noembed>@siu',
// Add line breaks before and after blocks
'@</?((address)|(blockquote)|(center)|(del))@iu',
'@</?((div)|(h[1-9])|(ins)|(isindex)|(p)|(pre))@iu',
'@</?((dir)|(dl)|(dt)|(dd)|(li)|(menu)|(ol)|(ul))@iu',
'@</?((table)|(th)|(td)|(caption))@iu',
'@</?((form)|(button)|(fieldset)|(legend)|(input))@iu',
'@</?((label)|(select)|(optgroup)|(option)|(textarea))@iu',
'@</?((frameset)|(frame)|(iframe))@iu',
2016-07-27 15:32:51 -04:00
],
[
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
"\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0",
"\n\$0", "\n\$0",
2016-07-27 15:32:51 -04:00
],
2016-03-13 21:51:58 +01:00
$text
);
2016-07-27 15:32:51 -04:00
$text = trim(preg_replace('/\s+/', ' ', strip_tags($text)));
// Sometimes strings are detected as invalid UTF-8 and json_encode can't treat them
// iconv can fix those strings
2017-11-08 21:51:01 +01:00
$text = iconv('UTF-8', 'UTF-8//IGNORE', $text);
return $text;
}
2015-07-17 23:38:06 +02:00
/**
* Recursively generate the documentation
*
2015-07-18 01:10:40 +02:00
* @param Directory $tree
2015-07-17 23:38:06 +02:00
* @param string $output_dir
* @param \Todaymade\Daux\Config $params
* @param OutputInterface $output
2016-07-27 15:32:51 -04:00
* @param int $width
* @param bool $index_pages
2015-07-17 23:38:06 +02:00
* @param string $base_url
* @throws \Exception
*/
2016-03-13 21:51:58 +01:00
private function generateRecursive(Directory $tree, $output_dir, $params, $output, $width, $index_pages, $base_url = '')
{
2015-07-19 14:05:12 +02:00
DauxHelper::rebaseConfiguration($params, $base_url);
2015-06-30 15:50:42 +02:00
if ($base_url !== '' && empty($params['entry_page'])) {
$params['entry_page'] = $tree->getFirstPage();
}
foreach ($tree->getEntries() as $key => $node) {
if ($node instanceof Directory) {
2015-07-20 15:59:52 +02:00
$new_output_dir = $output_dir . DIRECTORY_SEPARATOR . $key;
2015-07-17 23:38:06 +02:00
mkdir($new_output_dir);
$this->generateRecursive($node, $new_output_dir, $params, $output, $width, $index_pages, '../' . $base_url);
// Rebase configuration again as $params is a shared object
2015-07-19 14:05:12 +02:00
DauxHelper::rebaseConfiguration($params, $base_url);
} else {
2015-07-14 22:06:01 +02:00
$this->runAction(
2016-07-27 15:32:51 -04:00
'- ' . $node->getUrl(),
2015-07-14 22:06:01 +02:00
$output,
$width,
2016-07-27 15:32:51 -04:00
function () use ($node, $output_dir, $key, $params, $index_pages) {
if ($node instanceof Raw) {
2015-07-20 15:59:52 +02:00
copy($node->getPath(), $output_dir . DIRECTORY_SEPARATOR . $key);
2016-07-27 15:32:51 -04:00
return;
}
$this->daux->tree->setActiveNode($node);
$generated = $this->generateOne($node, $params);
2015-07-20 15:59:52 +02:00
file_put_contents($output_dir . DIRECTORY_SEPARATOR . $key, $generated->getContent());
2016-03-13 21:51:58 +01:00
if ($index_pages) {
2016-07-27 15:32:51 -04:00
$this->indexed_pages[] = [
'title' => $node->getTitle(),
'text' => $this->sanitize($generated->getPureContent()),
2016-07-27 15:32:51 -04:00
'tags' => '',
'url' => $node->getUrl(),
2016-03-13 21:51:58 +01:00
];
}
2015-07-14 22:06:01 +02:00
}
);
}
}
}
/**
* @param Entry $node
* @param Config $params
* @return \Todaymade\Daux\Format\Base\Page
*/
public function generateOne(Entry $node, Config $params)
{
if ($node instanceof Raw) {
return new RawPage($node->getPath());
}
if ($node instanceof ComputedRaw) {
return new ComputedRawPage($node);
}
$params['request'] = $node->getUrl();
2016-07-27 15:32:51 -04:00
return ContentPage::fromFile($node, $params, $this->daux->getContentTypeHandler()->getType($node));
}
}