Thứ Bảy, 12 tháng 11, 2011

PHP: RSS Parser, Reader

<?php
function RSSreader($url)
{
    //original author :?
    // modifications and code rewrite: Femi Hasani [www.vision.to]
    // added remove images and add "nofollow" attribute to content links
    // added CSS classes
    // added unhtmletnities
    
    $html = "<div class="rssfeeds" >n";
    $rssstring = file_get_contents(trim($url));
    preg_match_all("#<title>(.*?)</title>#s",$rssstring, $titel);
    
    
    preg_match_all("#<item>(.*?)</item>#s",$rssstring, $items);
    $n = count($items[0]);
    
    for($i=0;$i <$n; $i++)
    {
        $rsstemp= $items[0][$i];
        preg_match_all("#<title>(.*?)</title>#s",$rsstemp, $titles);
        $title[$i]= $titles[1][0];
        
        preg_match_all("#<pubDate>(.*?)</pubDate>#s",$rsstemp, $dates);
        $date[$i]= $dates[1][0];
        
        preg_match_all("#<description>(.*?)</description>#s",$rsstemp,$descriptions);
        $desc= $descriptions[1][0];
        $desc = str_replace("<![CDATA[", "", $desc);
        $desc = str_replace("]]>", "", $desc);
        $desc = strip_html_tags( $desc, false);
        //    $description[$i] = html_entity_decode( $desc, ENT_QUOTES, "utf-8" );
        //    $description[$i] = html_entity_decode( $desc );
        $description[$i] = unhtmlentities($desc);
        
        
        preg_match_all("#<link>(.*?)</link>#s",$rsstemp,$links);
        $link[$i]= $links[1][0];
    }

    $html .= "<h2>".$titel[1][0]."</h2>";
    for($i=0;$i<$n;$i++)
    {
        $timestamp=strtotime($date[$i]);
        $datum=date('Y-m-d H:i', $timestamp);
        if(!empty($title[$i]))
        {
            //    $title = str_replace("&", "&amp;", $title[$i]);
            $feed = str_replace("&", "&amp;", $link[$i]);
            $html .="<div class="rssHeader"><p><a rel="nofollow" href="".$feed.
            "" target="_blank">".$title[$i]."</a><br />".$datum."</p></div>n";
            if(strlen($description[$i]) > 5)
            {
                $content = str_replace("&", "&amp;", $description[$i]);
                $content = preg_replace("/<img([^>]*)>/im", "[ IMAGE ]<!-- image removed -->",
                $content);
                // add nofollow
                if (!preg_match('#rels*?=s*?['"]?.*?nofollow.*?['"]?#i', $content))
                {
                    $content = preg_replace('/ rel="([^>]*)" /im', "", $content);
                    $content = preg_replace('#<a#i', '<a rel="nofollow"', $content);
                }
                
                $html .= "<div class="rssContent">".$content."</div>n";
            }
        }
    }
    $html .= "</div>";
    return $html;
}

// For users prior to PHP 4.3.0 you may do this:
function unhtmlentities($string)
{
    // replace numeric entities
    $string = preg_replace('~&#x([0-9a-f]+);~ei', 'chr(hexdec("1"))', $string);
    $string = preg_replace('~&#([0-9]+);~e', 'chr("1")', $string);
    // replace literal entities
    $trans_tbl = get_html_translation_table(HTML_ENTITIES);
    $trans_tbl = array_flip($trans_tbl);
    return strtr($string, $trans_tbl);
}
?>

Không có nhận xét nào:

Đăng nhận xét