fix various html parse errors
This commit is contained in:
parent
bef7a5d66b
commit
01164c8c2f
6 changed files with 42 additions and 15 deletions
|
@ -30,8 +30,11 @@ function scrape_dfrn($url) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
$dom = HTML5_Parser::parse($s);
|
$dom = HTML5_Parser::parse($s);
|
||||||
|
} catch (DOMException $e) {
|
||||||
|
logger('scrape_dfrn: parse error: ' . $e);
|
||||||
|
}
|
||||||
|
|
||||||
if(! $dom)
|
if(! $dom)
|
||||||
return $ret;
|
return $ret;
|
||||||
|
@ -132,9 +135,11 @@ function scrape_meta($url) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
$dom = HTML5_Parser::parse($s);
|
||||||
$dom = HTML5_Parser::parse($s);
|
} catch (DOMException $e) {
|
||||||
|
logger('scrape_meta: parse error: ' . $e);
|
||||||
|
}
|
||||||
|
|
||||||
if(! $dom)
|
if(! $dom)
|
||||||
return $ret;
|
return $ret;
|
||||||
|
@ -177,7 +182,11 @@ function scrape_vcard($url) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$dom = HTML5_Parser::parse($s);
|
try {
|
||||||
|
$dom = HTML5_Parser::parse($s);
|
||||||
|
} catch (DOMException $e) {
|
||||||
|
logger('scrape_vcard: parse error: ' . $e);
|
||||||
|
}
|
||||||
|
|
||||||
if(! $dom)
|
if(! $dom)
|
||||||
return $ret;
|
return $ret;
|
||||||
|
@ -243,7 +252,11 @@ function scrape_feed($url) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$dom = HTML5_Parser::parse($s);
|
try {
|
||||||
|
$dom = HTML5_Parser::parse($s);
|
||||||
|
} catch (DOMException $e) {
|
||||||
|
logger('scrape_feed: parse error: ' . $e);
|
||||||
|
}
|
||||||
|
|
||||||
if(! $dom)
|
if(! $dom)
|
||||||
return $ret;
|
return $ret;
|
||||||
|
|
|
@ -53,7 +53,12 @@ function parse_event($h) {
|
||||||
|
|
||||||
$ret = array();
|
$ret = array();
|
||||||
|
|
||||||
$dom = HTML5_Parser::parse($h);
|
|
||||||
|
try {
|
||||||
|
$dom = HTML5_Parser::parse($h);
|
||||||
|
} catch (DOMException $e) {
|
||||||
|
logger('parse_event: parse error: ' . $e);
|
||||||
|
}
|
||||||
|
|
||||||
if(! $dom)
|
if(! $dom)
|
||||||
return $ret;
|
return $ret;
|
||||||
|
|
|
@ -429,7 +429,12 @@ function lrdd($uri) {
|
||||||
// don't try and parse raw xml as html
|
// don't try and parse raw xml as html
|
||||||
if(! strstr($html,'<?xml')) {
|
if(! strstr($html,'<?xml')) {
|
||||||
require_once('library/HTML5/Parser.php');
|
require_once('library/HTML5/Parser.php');
|
||||||
$dom = @HTML5_Parser::parse($html);
|
|
||||||
|
try {
|
||||||
|
$dom = HTML5_Parser::parse($html);
|
||||||
|
} catch (DOMException $e) {
|
||||||
|
logger('lrdd: parse error: ' . $e);
|
||||||
|
}
|
||||||
|
|
||||||
if($dom) {
|
if($dom) {
|
||||||
$items = $dom->getElementsByTagName('link');
|
$items = $dom->getElementsByTagName('link');
|
||||||
|
|
|
@ -3041,6 +3041,8 @@ class HTML5_TreeBuilder {
|
||||||
|
|
||||||
if (!empty($token['attr'])) {
|
if (!empty($token['attr'])) {
|
||||||
foreach($token['attr'] as $attr) {
|
foreach($token['attr'] as $attr) {
|
||||||
|
// mike@macgirvin.com 2011-10-21, stray double quotes cause everything to abort
|
||||||
|
$attr['name'] = str_replace('"','',$attr['name']);
|
||||||
if(!$el->hasAttribute($attr['name'])) {
|
if(!$el->hasAttribute($attr['name'])) {
|
||||||
$el->setAttribute($attr['name'], $attr['value']);
|
$el->setAttribute($attr['name'], $attr['value']);
|
||||||
}
|
}
|
||||||
|
|
|
@ -88,9 +88,11 @@ function parse_url_content(&$a) {
|
||||||
$purifier = new HTMLPurifier($config);
|
$purifier = new HTMLPurifier($config);
|
||||||
$s = $purifier->purify($s);
|
$s = $purifier->purify($s);
|
||||||
|
|
||||||
// logger('parse_url: purified: ' . $s, LOGGER_DATA);
|
try {
|
||||||
|
$dom = HTML5_Parser::parse($s);
|
||||||
$dom = @HTML5_Parser::parse($s);
|
} catch (DOMException $e) {
|
||||||
|
logger('scrape_dfrn: parse error: ' . $e);
|
||||||
|
}
|
||||||
|
|
||||||
if(! $dom) {
|
if(! $dom) {
|
||||||
echo sprintf($template,$url,$url,'') . $str_tags;
|
echo sprintf($template,$url,$url,'') . $str_tags;
|
||||||
|
|
|
@ -33,19 +33,19 @@
|
||||||
<dl class="entity_photo">
|
<dl class="entity_photo">
|
||||||
<dt>Photo</dt>
|
<dt>Photo</dt>
|
||||||
<dd>
|
<dd>
|
||||||
<img class="photo avatar" height="300px" width="300px" src="$diaspora.photo300">
|
<img class="photo avatar" height="300" width="300" src="$diaspora.photo300">
|
||||||
</dd>
|
</dd>
|
||||||
</dl>
|
</dl>
|
||||||
<dl class="entity_photo_medium">
|
<dl class="entity_photo_medium">
|
||||||
<dt>Photo</dt>
|
<dt>Photo</dt>
|
||||||
<dd>
|
<dd>
|
||||||
<img class="photo avatar" height="100px" width="100px" src="$diaspora.photo100">
|
<img class="photo avatar" height="100" width="100" src="$diaspora.photo100">
|
||||||
</dd>
|
</dd>
|
||||||
</dl>
|
</dl>
|
||||||
<dl class="entity_photo_small">
|
<dl class="entity_photo_small">
|
||||||
<dt>Photo</dt>
|
<dt>Photo</dt>
|
||||||
<dd>
|
<dd>
|
||||||
<img class="photo avatar" height="50px" width="50px" src="$diaspora.photo50">
|
<img class="photo avatar" height="50" width="50" src="$diaspora.photo50">
|
||||||
</dd>
|
</dd>
|
||||||
</dl>
|
</dl>
|
||||||
<dl class="entity_searchable">
|
<dl class="entity_searchable">
|
||||||
|
|
Loading…
Reference in a new issue