*/
class P4Cms_Filter_HtmlToTextTest extends TestCase
{
/**
* Test HTML to text conversion.
*/
public function testFilter()
{
$tests = array(
array(
'label' => __LINE__ .' - null html',
'html' => null,
'text' => null,
),
array(
'label' => __LINE__ .' - no html',
'html' => '',
'text' => null,
),
array(
'label' => __LINE__ .' - just text',
'html' => 'the quick brown fox jumped over the lazy dog.',
'text' => 'the quick brown fox jumped over the lazy dog.',
),
array(
'label' => __LINE__ .' - simple HTML',
'html' => 'This is
a test¡
',
'text' => "This is\na test¡",
),
array(
'label' => __LINE__ .' - simple HTML, space after keyword',
'html' => 'This is
a test¡
',
'text' => "This is\na test¡",
),
array(
'label' => __LINE__ .' - simple HTML, space before and after keyword',
'html' => 'This is< br />a test¡
',
'text' => "This is\na test¡",
),
array(
'label' => __LINE__ .' - simple HTML, space before and after keyword plus attribute',
'html' => 'This is< br class="foo" />a test¡
',
'text' => "This is\na test¡",
),
array(
'label' => __LINE__ .' - simple HTML, two line-breaks',
'html' => 'This is
a test¡
',
'text' => "This is\n\na test¡",
),
array(
'label' => __LINE__ .' - simple HTML, three line-breaks to stay at two',
'html' => 'This is
a test¡
',
'text' => "This is\n\na test¡",
),
array(
'label' => __LINE__ .' - simple HTML, keep entities',
'html' => 'This is
a test¡
',
'text' => <<<'EOT'
This is
a test¡
EOT
,
'keepEntities' => true,
),
array(
'label' => __LINE__ .' - simple HTML, with headings',
'html' => 'Yabba dabbaMy heading1
the body',
'text' => "Yabba dabba\n\n\nMY HEADING1:\n\nthe body",
),
array(
'label' => __LINE__ .' - simple HTML, with multiple headings',
'html' => 'Yabba dabbaMy heading1
the bodyAnother heading
more body',
'text' => "Yabba dabba\n\n\nMY HEADING1:\n\nthe body\n\n\nANOTHER HEADING:\n\nmore body",
),
array(
'label' => __LINE__ .' - simple HTML, with headings, space before',
'html' => 'Yabba dabba< h4>My heading2the body',
'text' => "Yabba dabba\n\n\nMY HEADING2:\n\nthe body",
),
array(
'label' => __LINE__ .' - simple HTML, with headings, space before and after',
'html' => 'Yabba dabba< h4 >My heading3the body',
'text' => "Yabba dabba\n\n\nMY HEADING3:\n\nthe body",
),
array(
'label' => __LINE__ .' - simple HTML, with multiple script blocks',
'html' => 'This test should not fail',
'text' => 'This test should not fail',
),
array(
'label' => __LINE__ .' - a link',
'html' => 'Please click here.',
'text' => 'Please click here [http://perforce.com/].',
),
array(
'label' => __LINE__ .' - a link, keep links',
'html' => 'Please click here.',
'text' => 'Please click here.',
'keepLinks' => true,
),
array(
'label' => __LINE__ .' - HTML with pre',
'html' => <<<'EOH'
Here is some sample code:
$count = 0;
foreach ($list as $item) {
$item->number($count++);
}
EOH
,
'text' => <<<'EOT'
Here is some sample code:
$count = 0;
foreach ($list as $item) {
$item->number($count++);
}
EOT
,
),
);
$filter = new P4Cms_Filter_HtmlToText;
foreach ($tests as $test) {
$filter->setOptions(
array(
'keepLinks' => (array_key_exists('keepLinks', $test)
? $test['keepLinks']
: false),
'keepEntities' => (array_key_exists('keepEntities', $test)
? $test['keepEntities']
: false)
)
);
if (is_string($test['text'])) {
$test['text'] = str_replace("\r\n", "\n", $test['text']);
}
$this->assertSame(
$test['text'],
$filter->filter($test['html']),
$test['label'] .': Expected text'
);
}
}
}