useragent = $this->name . '/' . $this->version . ' (Feed Parser; ' . $this->url . '; Allow like Gecko) Build/' . $this->build;
$this->linkback = '' . $this->name . ' ' . $this->version . '';
}
/****************************************************
CONFIGURE OPTIONS
Set various options (feed URL, XML dump, caching, etc.)
****************************************************/
// Feed URL
function feed_url($url) {
$url = $this->fix_protocol($url, 1);
$this->rss_url = $url;
return true;
}
// XML Dump
function enable_xmldump($enable) {
$this->xml_dump = $enable;
return true;
}
// Caching
function enable_caching($enable) {
$this->caching = $enable;
return true;
}
// Cache Timeout
function cache_max_minutes($minutes) {
$this->max_minutes = (int) $minutes;
return true;
}
// Cache Location
function cache_location($location) {
$this->cache_location = (string) $location;
return true;
}
// Replace H1, H2, and H3 tags with the less important H4 tags.
function replace_headers($enable) {
$this->replace_headers = (bool) $enable;
return true;
}
/****************************************************
MAIN SIMPLEPIE FUNCTION
Rewrites the feed so that it actually resembles XML, processes the XML,
and builds an array from the feed.
****************************************************/
function init() {
// If this is a .Mac Photocast, change it to the real URL.
if (stristr($this->rss_url, 'http://photocast.mac.com')) {
$this->rss_url = preg_replace('%http://photocast.mac.com%i', 'http://web.mac.com', $this->rss_url);
}
// Return the User-Agent string to the website's logs.
ini_set('user_agent', $this->useragent);
// Clear all outdated cache from the server's cache folder
$this->clear_cache($this->cache_location, $this->max_minutes);
if ($this->rss_url) {
// Read the XML file for processing.
$cache_filename = $this->cache_location . '/' . urlencode($this->rss_url) . '.spc';
if ($this->caching && !$this->xml_dump && substr($this->rss_url, 0, 7) == 'http://' && file_exists($cache_filename)) {
if ($fp = fopen($cache_filename, 'r')) {
$data = '';
while (!feof($fp)) {
$data .= fread($fp, 2048);
}
fclose($fp);
$mp_rss = unserialize($data);
if (empty($mp_rss)) {
$this->caching = false;
return $this->init();
} elseif (isset($mp_rss['feed_url'])) {
$this->rss_url = $mp_rss['feed_url'];
return $this->init();
} else {
$this->data = $mp_rss;
return true;
}
} else {
$this->caching = false;
return $this->init();
}
} else {
// Get the file
$mp_rss = $this->get_file($this->rss_url);
// Check if file is a feed or a webpage
// If it's a webpage, auto-discover the feed and re-pass it to init()
$discovery = $this->rss_locator($mp_rss, $this->rss_url);
if ($discovery) {
if ($discovery == 'nofeed') {
return false;
} else {
$this->rss_url = $discovery;
if ($this->caching && substr($this->rss_url, 0, 7) == 'http://') {
if ($this->is_writeable_createable($cache_filename)) {
$fp = fopen($cache_filename, 'w');
fwrite($fp, serialize(array('feed_url' => $discovery)));
fclose($fp);
} else trigger_error("$cache_filename is not writeable", E_USER_WARNING);
}
return $this->init();
}
}
// Trim out any whitespace at the beginning or the end of the file
$mp_rss = trim($mp_rss);
// Get encoding
// Support everything from http://www.php.net/manual/en/ref.mbstring.php#mbstring.supported-encodings
$use_mbstring = false;
if (preg_match('/encoding=["](.*)["]/Ui', $mp_rss, $encoding)) {
switch (strtolower($encoding[1])) {
// UCS-4
case 'ucs-4':
case 'ucs4':
case 'utf-32':
case 'utf32':
$encoding = 'UCS-4';
$use_mbstring = true;
break;
// UCS-4BE
case 'ucs-4be':
case 'ucs4be':
case 'utf-32be':
case 'utf32be':
$encoding = 'UCS-4BE';
$use_mbstring = true;
break;
// UCS-4LE
case 'ucs-4le':
case 'ucs4le':
case 'utf-32le':
case 'utf32le':
$encoding = 'UCS-4LE';
$use_mbstring = true;
break;
// UCS-2
case 'ucs-2':
case 'ucs2':
case 'utf-16':
case 'utf16':
$encoding = 'UCS-2';
$use_mbstring = true;
break;
// UCS-2BE
case 'ucs-2be':
case 'ucs2be':
case 'utf-16be':
case 'utf16be':
$encoding = 'UCS-2BE';
$use_mbstring = true;
break;
// UCS-2LE
case 'ucs-2le':
case 'ucs2le':
case 'utf-16le':
case 'utf16le':
$encoding = 'UCS-2LE';
$use_mbstring = true;
break;
// UCS-32
case 'ucs-32':
case 'ucs32':
$encoding = 'UCS-32';
$use_mbstring = true;
break;
// UCS-32BE
case 'ucs-32be':
case 'ucs32be':
$encoding = 'UCS-32BE';
$use_mbstring = true;
break;
// UCS-32LE
case 'ucs-32le':
case 'ucs32le':
$encoding = 'UCS-32LE';
$use_mbstring = true;
break;
// UCS-16
case 'ucs-16':
case 'ucs16':
$encoding = 'UCS-16';
$use_mbstring = true;
break;
// UCS-16BE
case 'ucs-16be':
case 'ucs16be':
$encoding = 'UCS-16BE';
$use_mbstring = true;
break;
// UCS-16LE
case 'ucs-16le':
case 'ucs16le':
$encoding = 'UCS-16LE';
$use_mbstring = true;
break;
// UTF-7
case 'utf-7':
case 'utf7':
$encoding = 'UTF-7';
$use_mbstring = true;
break;
// UTF7-IMAP
case 'utf-7-imap':
case 'utf7-imap':
case 'utf7imap':
$encoding = 'UTF7-IMAP';
$use_mbstring = true;
break;
// ASCII
case 'us-ascii':
case 'ascii':
$encoding = 'US-ASCII';
$use_mbstring = true;
break;
// EUC-JP
case 'euc-jp':
case 'eucjp':
$encoding = 'EUC-JP';
$use_mbstring = true;
break;
// EUCJP-win
case 'euc-jp-win':
case 'eucjp-win':
case 'eucjpwin':
$encoding = 'EUCJP-win';
$use_mbstring = true;
break;
// Shift_JIS
case 'shift_jis':
case 'sjis':
case '932':
$encoding = 'Shift_JIS';
$use_mbstring = true;
break;
// SJIS-win
case 'sjis-win':
case 'sjiswin':
case 'shift_jis-win':
$encoding = 'SJIS-win';
$use_mbstring = true;
break;
// ISO-2022-JP
case 'iso-2022-jp':
case 'iso2022-jp':
case 'iso2022jp':
$encoding = 'ISO-2022-JP';
$use_mbstring = true;
break;
// JIS
case 'jis':
$encoding = 'JIS';
$use_mbstring = true;
break;
// ISO-8859-1
case 'iso-8859-1':
case 'iso8859-1':
$encoding = 'ISO-8859-1';
$use_mbstring = true;
break;
// ISO-8859-2
case 'iso-8859-2':
case 'iso8859-2':
$encoding = 'ISO-8859-2';
$use_mbstring = true;
break;
// ISO-8859-3
case 'iso-8859-3':
case 'iso8859-3':
$encoding = 'ISO-8859-3';
$use_mbstring = true;
break;
// ISO-8859-4
case 'iso-8859-4':
case 'iso8859-4':
$encoding = 'ISO-8859-4';
$use_mbstring = true;
break;
// ISO-8859-5
case 'iso-8859-5':
case 'iso8859-5':
$encoding = 'ISO-8859-5';
$use_mbstring = true;
break;
// ISO-8859-6
case 'iso-8859-6':
case 'iso8859-6':
$encoding = 'ISO-8859-6';
$use_mbstring = true;
break;
// ISO-8859-7
case 'iso-8859-7':
case 'iso8859-7':
$encoding = 'ISO-8859-7';
$use_mbstring = true;
break;
// ISO-8859-8
case 'iso-8859-8':
case 'iso8859-8':
$encoding = 'ISO-8859-8';
$use_mbstring = true;
break;
// ISO-8859-9
case 'iso-8859-9':
case 'iso8859-9':
$encoding = 'ISO-8859-9';
$use_mbstring = true;
break;
// ISO-8859-10
case 'iso-8859-10':
case 'iso8859-10':
$encoding = 'ISO-8859-10';
$use_mbstring = true;
break;
// mbstring functions don't appear to support 11 & 12
// ISO-8859-13
case 'iso-8859-13':
case 'iso8859-13':
$encoding = 'ISO-8859-13';
$use_mbstring = true;
break;
// ISO-8859-14
case 'iso-8859-14':
case 'iso8859-14':
$encoding = 'ISO-8859-14';
$use_mbstring = true;
break;
// ISO-8859-15
case 'iso-8859-15':
case 'iso8859-15':
$encoding = 'ISO-8859-15';
$use_mbstring = true;
break;
// byte2be
case 'byte2be':
$encoding = 'byte2be';
$use_mbstring = true;
break;
// byte2le
case 'byte2le':
$encoding = 'byte2le';
$use_mbstring = true;
break;
// byte4be
case 'byte4be':
$encoding = 'byte4be';
$use_mbstring = true;
break;
// byte4le
case 'byte4le':
$encoding = 'byte4le';
$use_mbstring = true;
break;
// BASE64
case 'base64':
case 'base-64':
$encoding = 'BASE64';
$use_mbstring = true;
break;
// HTML-ENTITIES
case 'html-entities':
case 'htmlentities':
$encoding = 'HTML-ENTITIES';
$use_mbstring = true;
break;
// 7bit
case '7bit':
case '7-bit':
$encoding = '7bit';
$use_mbstring = true;
break;
// 8bit
case '8bit':
case '8-bit':
$encoding = '8bit';
$use_mbstring = true;
break;
// EUC-CN
case 'euc-cn':
case 'euccn':
$encoding = 'EUC-CN';
$use_mbstring = true;
break;
// EUC-TW
case 'euc-tw':
case 'euctw':
$encoding = 'EUC-TW';
$use_mbstring = true;
break;
// EUC-KR
case 'euc-kr':
case 'euckr':
$encoding = 'EUC-KR';
$use_mbstring = true;
break;
// Traditional Chinese, mainly used in Taiwan
case 'big5':
case '950':
$encoding = 'BIG5';
$use_mbstring = true;
break;
// Simplified Chinese, national standard character set
case 'gb2312':
case '936':
$encoding = 'GB2312';
$use_mbstring = true;
break;
// Big5 with Hong Kong extensions, Traditional Chinese
case 'big5-hkscs':
$encoding = 'BIG5-HKSCS';
$use_mbstring = true;
break;
// Windows-specific Cyrillic
case 'cp1251':
case 'windows-1251':
case 'win-1251':
case '1251':
$encoding = 'Windows-1251';
$use_mbstring = true;
break;
// Windows-specific Western Europe
case 'cp1252':
case 'windows-1252':
case '1252':
$encoding = 'Windows-1252';
$use_mbstring = true;
break;
// Russian
case 'koi8-r':
case 'koi8-ru':
case 'koi8r':
$encoding = 'KOI8-R';
$use_mbstring = true;
break;
// HZ
case 'hz':
$encoding = 'HZ';
$use_mbstring = true;
break;
// ISO-2022-KR
case 'iso-2022-kr':
case 'iso2022-kr':
case 'iso2022kr':
$encoding = 'ISO-2022-KR';
$use_mbstring = true;
break;
// DOS-specific Cyrillic
case 'cp866':
case 'ibm866':
case '866':
$encoding = 'cp866';
$use_mbstring = true;
break;
// DOS-specific Cyrillic
case 'cp936':
case 'ibm936':
case '936':
$encoding = 'cp936';
$use_mbstring = true;
break;
// DOS-specific Cyrillic
case 'cp959':
case 'ibm959':
case '959':
$encoding = 'cp959';
$use_mbstring = true;
break;
// DOS-specific Cyrillic
case 'cp949':
case 'ibm949':
case '949':
case 'uhc':
$encoding = 'cp949';
$use_mbstring = true;
break;
// Default to UTF-8
default:
$encoding = 'UTF-8';
break;
}
} else {
$encoding = 'UTF-8';
}
$this->encoding = $encoding;
// If function is available, convert characters to UTF-8, and overwrite $this->encoding
if (function_exists('mb_convert_encoding') && ($use_mbstring)) {
$mp_rss = mb_convert_encoding($mp_rss, 'UTF-8', $encoding);
$this->encoding = 'UTF-8';
}
// Encode entities within CDATA
$mp_rss = preg_replace_callback('//Uis', array(&$this, 'cdata_encode'), $mp_rss);
// Strip out all CDATA tags
$mp_rss = str_replace('', '', $mp_rss);
// Replace any other brackets with their entities
$mp_rss = str_replace('[', '[', $mp_rss); // [ character -- problems with CDATA
$mp_rss = str_replace(']', ']', $mp_rss); // ] character -- problems with CDATA
// Fix tags inside code and pre tags.
$mp_rss = preg_replace_callback('/(.*)<\/code>/Uis', array(&$this, 'code_encode'), $mp_rss);
$mp_rss = preg_replace_callback('/
(.*)<\/pre>/Uis', array(&$this, 'code_encode'), $mp_rss);
// Create an array of all of the elements that SimplePie supports the parsing of.
$sp_elements = array(
// These elements are supported by SimplePie (alphabetical)
'category',
'content',
'copyright',
'dc:creator',
'dc:date',
'dc:description',
'dc:language',
'dc:subject',
'description',
'guid',
'id',
'height',
'issued',
'language',
'logo',
'name',
'pubDate',
'published',
'subtitle',
'summary',
'tagline',
'title',
'url',
'width',
// These elements are not currently supported by SimplePie
// We'll just CDATA them to be safe.
'comments',
'dc:contributor',
'dc:coverage',
'dc:format',
'dc:identifier',
'dc:publisher',
'dc:relation',
'dc:rights',
'dc:source',
'dc:title',
'dc:type',
'docs',
'generator',
'icon',
'itunes:author',
'itunes:duration',
'itunes:email',
'itunes:explicit',
'itunes:keywords',
'itunes:name',
'itunes:subtitle',
'itunes:summary',
'lastBuildDate',
'managingEditor',
'media:credit',
'media:text',
'rating',
'rights',
'sy:updatePeriod',
'sy:updateFrequency',
'sy:updateBase',
'ttl',
'updated',
'webMaster'
);
// Store the number of elements in the above array.
// Helps execution time in JavaScript, why not PHP?
$sp_elements_size = sizeof($sp_elements);
$mp_rss = str_replace('content:encoded', 'content', $mp_rss);
$mp_rss = preg_replace("%%i", '', $mp_rss); // Strip out the DOCTYPE since we don't use it anyways.
for ($i=0; $i < $sp_elements_size; $i++) {
$full = $sp_elements[$i];
$short = substr($full, 0, -1);
$mp_rss = preg_replace('%<' . $short . "[^>/]+((\"[^\"]*\")|(\'[^\']*\')|([^>/]*))((\s*)?|([^\s]))/>%i", '<' . $full . '>' . $full . '>', $mp_rss);
$mp_rss = preg_replace('%<' . $full . '(.|\s)*?>%i', '<' . $full . '\\0%i', ']]>' . $full . '>', $mp_rss);
}
// Separate rules for some tags.
if (preg_match('/
$mp_rss = preg_replace("%/]+((\"[^\"]*\")|(\'[^\']*\')|([^>/]*))((\s*)|([^\s]))/>%i", '', $mp_rss);
$mp_rss = preg_replace('%%i', '%i', ']]>',$mp_rss);
//
$mp_rss = preg_replace('%%i', '%i', ']]>', $mp_rss);
}
// Strip out HTML tags that might cause various security problems.
// Based on recommendations by Mark Pilgrim at:
// http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely
$tags_to_strip = array(
'html',
'body',
'script',
'noscript',
'embed',
'object',
'frameset',
'frame',
'iframe',
'meta',
'style',
'param',
'doctype',
'form',
'input',
'blink',
'marquee',
'font'
);
foreach ($tags_to_strip as $tag) {
$mp_rss = preg_replace('/<\/?' . $tag . '(.|\s)*?>/i', '', $mp_rss);
}
// Strip out HTML attributes that might cause various security problems.
// Based on recommendations by Mark Pilgrim at:
// http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely
$stripAttrib = '\' (style|id|class)="(.*?)"\'i';
$mp_rss = preg_replace($stripAttrib, '', $mp_rss);
// Swap out problematic characters.
$mp_rss = str_replace('', '', $mp_rss); // UTF-8 BOM
$mp_rss = preg_replace("/�|�|–|—/", '--', $mp_rss); // em/en dash
$mp_rss = preg_replace("/�|�|’|‘/", "'", $mp_rss); // single-quotes
$mp_rss = preg_replace("/�|�|“|”/", '"', $mp_rss); // double-quotes
$mp_rss = preg_replace("/�/", '', $mp_rss); // bad character
// Swap out funky characters with their named entities.
// Code is from Feedsplitter at chxo.com
$mp_rss = preg_replace(array('/\&([a-z\d\#]+)\;/i',
'/\&/',
'/\#\|\|([a-z\d\#]+)\|\|\#/i',
'/(\=\"\-\/\%\?\!\'\(\)\[\\{\}\ \#\+\,\@_])/e'
),
array('#||\\1||#',
'&',
'&\\1;',
"''.ord('\\1').';'"
),
$mp_rss
);
// Get rid of invalid UTF-8 characters
// Code is from chregu at blog.bitflux.ch
if (function_exists('iconv'))
$mp_rss = iconv('UTF-8', 'UTF-8//IGNORE', $mp_rss);
if ($this->replace_headers) {
// Replace H1, H2, and H3 tags with the less important H4 tags.
// This is because on a site, the more important headers might make sense,
// but it most likely doesn't fit in the context of RSS-in-a-webpage.
$mp_rss = preg_replace('//i', '