| 1 | <?php |
|---|
| 2 | |
|---|
| 3 | // |
|---|
| 4 | // Open Web Analytics - An Open Source Web Analytics Framework |
|---|
| 5 | // |
|---|
| 6 | // Copyright 2006 Peter Adams. All rights reserved. |
|---|
| 7 | // |
|---|
| 8 | // Licensed under GPL v2.0 http://www.gnu.org/copyleft/gpl.html |
|---|
| 9 | // |
|---|
| 10 | // Unless required by applicable law or agreed to in writing, software |
|---|
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, |
|---|
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|---|
| 13 | // See the License for the specific language governing permissions and |
|---|
| 14 | // limitations under the License. |
|---|
| 15 | // |
|---|
| 16 | // $Id$ |
|---|
| 17 | // |
|---|
| 18 | |
|---|
| 19 | require_once(OWA_BASE_DIR.'/owa_env.php'); |
|---|
| 20 | require_once(OWA_INCLUDE_DIR.'/lastRSS.php'); |
|---|
| 21 | require_once(OWA_BASE_DIR.'/owa_httpRequest.php'); |
|---|
| 22 | |
|---|
| 23 | /** |
|---|
| 24 | * Grabs the OWA News Feed from the OWA Blog. |
|---|
| 25 | * |
|---|
| 26 | * @author Peter Adams <peter@openwebanalytics.com> |
|---|
| 27 | * @copyright Copyright © 2006 Peter Adams <peter@openwebanalytics.com> |
|---|
| 28 | * @license http://www.gnu.org/copyleft/gpl.html GPL v2.0 |
|---|
| 29 | * @category owa |
|---|
| 30 | * @package owa |
|---|
| 31 | * @version $Revision$ |
|---|
| 32 | * @since owa 1.0.0 |
|---|
| 33 | */ |
|---|
| 34 | |
|---|
| 35 | class owa_news extends lastRSS { |
|---|
| 36 | |
|---|
| 37 | /** |
|---|
| 38 | * Configuration |
|---|
| 39 | * |
|---|
| 40 | * @var array |
|---|
| 41 | */ |
|---|
| 42 | var $config; |
|---|
| 43 | |
|---|
| 44 | /** |
|---|
| 45 | * Error handler |
|---|
| 46 | * |
|---|
| 47 | * @var object |
|---|
| 48 | */ |
|---|
| 49 | var $e; |
|---|
| 50 | |
|---|
| 51 | var $crawler; |
|---|
| 52 | |
|---|
| 53 | function owa_news() { |
|---|
| 54 | |
|---|
| 55 | $c = &owa_coreAPI::configSingleton(); |
|---|
| 56 | $this->config = &$c->fetch('base'); |
|---|
| 57 | $this->e = &owa_coreAPI::errorSingleton(); |
|---|
| 58 | $this->crawler = new owa_http; |
|---|
| 59 | $this->crawler->read_timeout = 10; |
|---|
| 60 | $this->cache_dir = ''; |
|---|
| 61 | $this->date_format = "F j, Y"; |
|---|
| 62 | $this->CDATA = 'content'; |
|---|
| 63 | $this->items_limit = 3; |
|---|
| 64 | return; |
|---|
| 65 | } |
|---|
| 66 | |
|---|
| 67 | /** |
|---|
| 68 | * This is a redefined Parse function that uses Snoopy to fetch |
|---|
| 69 | * the file instead of fopen. |
|---|
| 70 | * |
|---|
| 71 | * @param unknown_type $rss_url |
|---|
| 72 | * @return unknown |
|---|
| 73 | */ |
|---|
| 74 | function Parse ($rss_url) { |
|---|
| 75 | // Open and load RSS file |
|---|
| 76 | |
|---|
| 77 | $this->crawler->fetch($rss_url); |
|---|
| 78 | $rss_content = $this->crawler->results; |
|---|
| 79 | |
|---|
| 80 | if (!empty($rss_content)) { |
|---|
| 81 | |
|---|
| 82 | // Parse document encoding |
|---|
| 83 | $result['encoding'] = $this->my_preg_match("'encoding=[\'\"](.*?)[\'\"]'si", $rss_content); |
|---|
| 84 | // if document codepage is specified, use it |
|---|
| 85 | if ($result['encoding'] != '') |
|---|
| 86 | { $this->rsscp = $result['encoding']; } // This is used in my_preg_match() |
|---|
| 87 | // otherwise use the default codepage |
|---|
| 88 | else |
|---|
| 89 | { $this->rsscp = $this->default_cp; } // This is used in my_preg_match() |
|---|
| 90 | |
|---|
| 91 | // Parse CHANNEL info |
|---|
| 92 | preg_match("'<channel.*?>(.*?)</channel>'si", $rss_content, $out_channel); |
|---|
| 93 | foreach($this->channeltags as $channeltag) |
|---|
| 94 | { |
|---|
| 95 | $temp = $this->my_preg_match("'<$channeltag.*?>(.*?)</$channeltag>'si", $out_channel[1]); |
|---|
| 96 | if ($temp != '') $result[$channeltag] = $temp; // Set only if not empty |
|---|
| 97 | } |
|---|
| 98 | // If date_format is specified and lastBuildDate is valid |
|---|
| 99 | if ($this->date_format != '' && ($timestamp = strtotime($result['lastBuildDate'])) !==-1) { |
|---|
| 100 | // convert lastBuildDate to specified date format |
|---|
| 101 | $result['lastBuildDate'] = date($this->date_format, $timestamp); |
|---|
| 102 | } |
|---|
| 103 | |
|---|
| 104 | // Parse TEXTINPUT info |
|---|
| 105 | preg_match("'<textinput(|[^>]*[^/])>(.*?)</textinput>'si", $rss_content, $out_textinfo); |
|---|
| 106 | // This a little strange regexp means: |
|---|
| 107 | // Look for tag <textinput> with or without any attributes, but skip truncated version <textinput /> (it's not beggining tag) |
|---|
| 108 | if (isset($out_textinfo[2])) { |
|---|
| 109 | foreach($this->textinputtags as $textinputtag) { |
|---|
| 110 | $temp = $this->my_preg_match("'<$textinputtag.*?>(.*?)</$textinputtag>'si", $out_textinfo[2]); |
|---|
| 111 | if ($temp != '') $result['textinput_'.$textinputtag] = $temp; // Set only if not empty |
|---|
| 112 | } |
|---|
| 113 | } |
|---|
| 114 | // Parse IMAGE info |
|---|
| 115 | preg_match("'<image.*?>(.*?)</image>'si", $rss_content, $out_imageinfo); |
|---|
| 116 | if (isset($out_imageinfo[1])) { |
|---|
| 117 | foreach($this->imagetags as $imagetag) { |
|---|
| 118 | $temp = $this->my_preg_match("'<$imagetag.*?>(.*?)</$imagetag>'si", $out_imageinfo[1]); |
|---|
| 119 | if ($temp != '') $result['image_'.$imagetag] = $temp; // Set only if not empty |
|---|
| 120 | } |
|---|
| 121 | } |
|---|
| 122 | // Parse ITEMS |
|---|
| 123 | preg_match_all("'<item(| .*?)>(.*?)</item>'si", $rss_content, $items); |
|---|
| 124 | $rss_items = $items[2]; |
|---|
| 125 | $i = 0; |
|---|
| 126 | $result['items'] = array(); // create array even if there are no items |
|---|
| 127 | foreach($rss_items as $rss_item) { |
|---|
| 128 | // If number of items is lower then limit: Parse one item |
|---|
| 129 | if ($i < $this->items_limit || $this->items_limit == 0) { |
|---|
| 130 | foreach($this->itemtags as $itemtag) { |
|---|
| 131 | $temp = $this->my_preg_match("'<$itemtag.*?>(.*?)</$itemtag>'si", $rss_item); |
|---|
| 132 | if ($temp != '') $result['items'][$i][$itemtag] = $temp; // Set only if not empty |
|---|
| 133 | } |
|---|
| 134 | // Strip HTML tags and other bullshit from DESCRIPTION |
|---|
| 135 | if ($this->stripHTML && $result['items'][$i]['description']) |
|---|
| 136 | $result['items'][$i]['description'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['description']))); |
|---|
| 137 | // Strip HTML tags and other bullshit from TITLE |
|---|
| 138 | if ($this->stripHTML && $result['items'][$i]['title']) |
|---|
| 139 | $result['items'][$i]['title'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['title']))); |
|---|
| 140 | // If date_format is specified and pubDate is valid |
|---|
| 141 | if ($this->date_format != '' && ($timestamp = strtotime($result['items'][$i]['pubDate'])) !==-1) { |
|---|
| 142 | // convert pubDate to specified date format |
|---|
| 143 | $result['items'][$i]['pubDate'] = date($this->date_format, $timestamp); |
|---|
| 144 | } |
|---|
| 145 | // Item counter |
|---|
| 146 | $i++; |
|---|
| 147 | } |
|---|
| 148 | } |
|---|
| 149 | |
|---|
| 150 | $result['items_count'] = $i; |
|---|
| 151 | return $result; |
|---|
| 152 | } |
|---|
| 153 | else // Error in opening return False |
|---|
| 154 | { |
|---|
| 155 | $this->e->notice('no rss content found at: '.$rss_url); |
|---|
| 156 | return False; |
|---|
| 157 | } |
|---|
| 158 | } |
|---|
| 159 | |
|---|
| 160 | } |
|---|
| 161 | |
|---|
| 162 | ?> |
|---|