fix various html parse errors

This commit is contained in:
Friendika 2011-10-20 16:48:07 -07:00
parent bef7a5d66b
commit 01164c8c2f
6 changed files with 42 additions and 15 deletions

View file

@ -30,8 +30,11 @@ function scrape_dfrn($url) {
} }
} }
try {
$dom = HTML5_Parser::parse($s); $dom = HTML5_Parser::parse($s);
} catch (DOMException $e) {
logger('scrape_dfrn: parse error: ' . $e);
}
if(! $dom) if(! $dom)
return $ret; return $ret;
@ -132,9 +135,11 @@ function scrape_meta($url) {
} }
} }
try {
$dom = HTML5_Parser::parse($s); $dom = HTML5_Parser::parse($s);
} catch (DOMException $e) {
logger('scrape_meta: parse error: ' . $e);
}
if(! $dom) if(! $dom)
return $ret; return $ret;
@ -177,7 +182,11 @@ function scrape_vcard($url) {
} }
} }
try {
$dom = HTML5_Parser::parse($s); $dom = HTML5_Parser::parse($s);
} catch (DOMException $e) {
logger('scrape_vcard: parse error: ' . $e);
}
if(! $dom) if(! $dom)
return $ret; return $ret;
@ -243,7 +252,11 @@ function scrape_feed($url) {
} }
} }
try {
$dom = HTML5_Parser::parse($s); $dom = HTML5_Parser::parse($s);
} catch (DOMException $e) {
logger('scrape_feed: parse error: ' . $e);
}
if(! $dom) if(! $dom)
return $ret; return $ret;

View file

@ -53,7 +53,12 @@ function parse_event($h) {
$ret = array(); $ret = array();
try {
$dom = HTML5_Parser::parse($h); $dom = HTML5_Parser::parse($h);
} catch (DOMException $e) {
logger('parse_event: parse error: ' . $e);
}
if(! $dom) if(! $dom)
return $ret; return $ret;

View file

@ -429,7 +429,12 @@ function lrdd($uri) {
// don't try and parse raw xml as html // don't try and parse raw xml as html
if(! strstr($html,'<?xml')) { if(! strstr($html,'<?xml')) {
require_once('library/HTML5/Parser.php'); require_once('library/HTML5/Parser.php');
$dom = @HTML5_Parser::parse($html);
try {
$dom = HTML5_Parser::parse($html);
} catch (DOMException $e) {
logger('lrdd: parse error: ' . $e);
}
if($dom) { if($dom) {
$items = $dom->getElementsByTagName('link'); $items = $dom->getElementsByTagName('link');

View file

@ -3041,6 +3041,8 @@ class HTML5_TreeBuilder {
if (!empty($token['attr'])) { if (!empty($token['attr'])) {
foreach($token['attr'] as $attr) { foreach($token['attr'] as $attr) {
// mike@macgirvin.com 2011-10-21, stray double quotes cause everything to abort
$attr['name'] = str_replace('"','',$attr['name']);
if(!$el->hasAttribute($attr['name'])) { if(!$el->hasAttribute($attr['name'])) {
$el->setAttribute($attr['name'], $attr['value']); $el->setAttribute($attr['name'], $attr['value']);
} }

View file

@ -88,9 +88,11 @@ function parse_url_content(&$a) {
$purifier = new HTMLPurifier($config); $purifier = new HTMLPurifier($config);
$s = $purifier->purify($s); $s = $purifier->purify($s);
// logger('parse_url: purified: ' . $s, LOGGER_DATA); try {
$dom = HTML5_Parser::parse($s);
$dom = @HTML5_Parser::parse($s); } catch (DOMException $e) {
logger('scrape_dfrn: parse error: ' . $e);
}
if(! $dom) { if(! $dom) {
echo sprintf($template,$url,$url,'') . $str_tags; echo sprintf($template,$url,$url,'') . $str_tags;

View file

@ -33,19 +33,19 @@
<dl class="entity_photo"> <dl class="entity_photo">
<dt>Photo</dt> <dt>Photo</dt>
<dd> <dd>
<img class="photo avatar" height="300px" width="300px" src="$diaspora.photo300"> <img class="photo avatar" height="300" width="300" src="$diaspora.photo300">
</dd> </dd>
</dl> </dl>
<dl class="entity_photo_medium"> <dl class="entity_photo_medium">
<dt>Photo</dt> <dt>Photo</dt>
<dd> <dd>
<img class="photo avatar" height="100px" width="100px" src="$diaspora.photo100"> <img class="photo avatar" height="100" width="100" src="$diaspora.photo100">
</dd> </dd>
</dl> </dl>
<dl class="entity_photo_small"> <dl class="entity_photo_small">
<dt>Photo</dt> <dt>Photo</dt>
<dd> <dd>
<img class="photo avatar" height="50px" width="50px" src="$diaspora.photo50"> <img class="photo avatar" height="50" width="50" src="$diaspora.photo50">
</dd> </dd>
</dl> </dl>
<dl class="entity_searchable"> <dl class="entity_searchable">