Updated code. Here is the complete code. It seems to work.
/**
* parse the HTML code
*
* @access public
*/
public function parse()
{
$parents = array();
// flag : are we in a <textarea> Tag ?
$tagTextAreaIn = false;
// flag : are we in a <pre> Tag ?
$tagPreIn = false;
// action to use for each line of the content of a <pre> Tag
$tagPreBr = array(
'name' => 'br',
'close' => false,
'param' => array(
'style' => array(),
'num' => 0
)
);
// tag that can be not closed
$tagsNotClosed = array(
'br', 'hr', 'img', 'col',
'input', 'link', 'option',
'circle', 'ellipse', 'path', 'rect', 'line', 'polygon', 'polyline'
);
// search the HTML tags
$tmp = array();
$this->_searchCode($tmp);
// all the actions to do
$actions = array();
// foreach part of the HTML code
foreach ($tmp as $part) {
// if it is a tag code
if ($part[0]=='code') {
// analise the HTML code
$res = $this->_analiseCode($part[1]);
// if it is a real HTML tag
if ($res) {
// save the current posistion in the HTML code
$res['html_pos'] = $part[2];
// if the tag must be closed
if (!in_array($res['name'], $tagsNotClosed)) {
// if it is a closure tag
if ($res['close']) {
// HTML validation
if (count($parents)<1)
throw new HTML2PDF_exception(3, $res['name'], $this->getHtmlErrorCode($res['html_pos']));
else if ($parents[count($parents)-1]!=$res['name'])
throw new HTML2PDF_exception(4, $parents, $this->getHtmlErrorCode($res['html_pos']));
else
unset($parents[count($parents)-1]);
} else {
// if it is a autoclosed tag
if ($res['autoclose']) {
// save the opened tag
$actions[] = $res;
// prepare the closed tag
$res['params'] = array();
$res['close'] = true;
}
// else :add a child for validation
else
$parents[count($parents)] = $res['name'];
}
// if it is a <textarea> tag and not auclosed =>, update the flag
// if it is a <pre> tag (or <code> tag) and not auclosed => update the flag
if ($res['name']=='textarea' && !$res['autoclose']) {
$tagTextAreaIn = !$res['close'];
}
elseif (($res['name']=='pre' || $res['name']=='code') && !$res['autoclose']) {
$tagPreIn = !$res['close'];
}
}
// save the actions to convert
$actions[] = $res;
} else { // else (it is not a real HTML tag => we transform it in Texte
$part[0]='txt';
}
}
// if it is text
if ($part[0]=='txt') {
if ($tagTextAreaIn) { // we are in <textarea> tag
$part[1] = str_replace("\r", '', $part[1]);
$actions[] = array(
'name' => 'write',
'close' => false,
'param' => array('txt' => $this->_prepareTxt($part[1], false)),
);
} elseif (!$tagPreIn) { // if we are not in a <pre> tag
// save the action
$actions[] = array(
'name' => 'write',
'close' => false,
'param' => array('txt' => $this->_prepareTxt($part[1])),
);
} else { // else (if we are in a <pre> tag)
// prepare the text
$part[1] = str_replace("\r", '', $part[1]);
$part[1] = explode("\n", $part[1]);
// foreach line of the text
foreach ($part[1] as $k => $txt) {
// transform the line
$txt = str_replace("\t", self::HTML_TAB, $txt);
$txt = str_replace(' ', ' ', $txt);
// add a break line
if ($k>0) $actions[] = $tagPreBr;
// save the action
$actions[] = array(
'name' => 'write',
'close' => false,
'param' => array('txt' => $this->_prepareTxt($txt, false)),
);
}
}
}
}
// for each indentified action, we have to clean up the begin and the end of the texte
// based on tags that surround it
// list of the tags to clean
$tagsToClean = array(
'page', 'page_header', 'page_footer', 'form',
'table', 'thead', 'tfoot', 'tr', 'td', 'th', 'br',
'div', 'hr', 'p', 'ul', 'ol', 'li',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
'bookmark', 'fieldset', 'legend',
'draw', 'circle', 'ellipse', 'path', 'rect', 'line', 'g', 'polygon', 'polyline',
'option'
);
// foreach action
$nb = count($actions);
for ($k=0; $k<$nb; $k++) {
// if it is a Text
if ($actions[$k]['name']=='write') {
// if the tag before the text is a tag to clean => ltrim on the text
if ($k>0 && in_array($actions[$k-1]['name'], $tagsToClean))
$actions[$k]['param']['txt'] = ltrim($actions[$k]['param']['txt']);
// if the tag after the text is a tag to clean => rtrim on the text
if ($k<$nb-1 && in_array($actions[$k+1]['name'], $tagsToClean))
$actions[$k]['param']['txt'] = rtrim($actions[$k]['param']['txt']);
// if the text is empty => remove the action
if (!strlen($actions[$k]['param']['txt']))
unset($actions[$k]);
}
}
// if we are not on the level 0 => HTML validator ERROR
if (count($parents)) throw new HTML2PDF_exception(5, $parents);
// save the actions to do
$this->code = array_values($actions);
}