PHP优酷土豆酷6采集入库函数(获取视频缩略图,视频swf地址,视频标题)

<?php
/**
 * 采集入库函数
 * 优酷,土豆,酷6 采集 (自动获取视频缩略图,视频swf地址,视频标题)
 * by hkshadow
 * QQ 2765237
 * dete: 2011-06-25 AM 02:32
 * edit: 2011-06-25 PM 17:38
 */

function CaptureVideo($link, $host) {
    $return = array ();
    if ('youku.com' == $host) {
        header ( "Content-Type:text/html; charset=utf-8" ); //优酷是utf-8编码,只为测试显示正常,可自行删除
        preg_match_all ( "/id\_(\w+)[\=|.html]/", $link, $matches );
        if (! empty ( $matches [1] [0] )) {
            $return ['flashvar'] = $matches [1] [0];
        }
        $text = file_get_contents ( $link );
        preg_match ( "/<title>(.*?) - (.*)<\/title>/", $text, $title );
        preg_match_all ( '/<li class="download"(.*)<\/li>/', $text, $match2 );
        preg_match ( '/http:\/\/g(.*)\.ykimg.com\/(.*)\|"\>/', $match2 [1] [0], $imageurl );
        if (! empty ( $imageurl [1] )) {
            $return ['imageurl'] = "http://g" . $imageurl [1] . ".ykimg.com/" . $imageurl [2];
        }
        preg_match ( '/embed src=\"(.*)\/v.swf/', $text, $vidurls );
        if (! empty ( $vidurls [1] )) {

            $return ['vidurl'] = $vidurls [1];
        }

        if (! empty ( $title )) {
            $return ['title'] = $title [1];
        }
    } elseif ('ku6.com' == $host) {
        header ( "Content-Type:text/html; charset=gbk" );  //酷6是gbk编码,只为测试显示正常,可自行删除
        $text = file_get_contents ( $link );
        preg_match_all ( "/\/([\w\-]+)\.html/", $link, $matches );
        if (1 > preg_match ( "/\/index_([\w\-]+)\.html/", $link ) && ! empty ( $matches [1] [0] )) {
            $return ['flashvar'] = $matches [1] [0];
        } else {
            preg_match_all ( "/refer\/(.*)\/v.swf/", $text, $videourl );
            $return ['flashvar'] = $videourl [1] [0];
        }
        preg_match ( '/http\:(.*)\/v.swf/', $text, $vidurls );
        if (! empty ( $vidurls [0] )) {
            $return ['vidurl'] = $vidurls [0];
        }
        preg_match ( "/\"title\" content=\"(.*)\"\/>/", $text, $title );
        preg_match_all ( '/<span class="s_pic">(.*)<\/span>/', $text, $imageurl );
        if (! empty ( $imageurl [1] [0] )) {
            $return ['imageurl'] = $imageurl [1] [0];
        }
        if (! empty ( $title[1] )) {
            $return ['title'] = $title [1];
        }
    } elseif ('tudou.com' == $host) {
        header ( "Content-Type:text/html; charset=gbk" );  //土豆是gbk编码,只为测试显示正常,可自行删除
        $tudou = file_get_contents ( $link );
        preg_match_all ( "/view\/([\w\-]+)\//", $tudou, $matches );

        if (! empty ( $matches [1] [0] )) {
            $return ['flashvar'] = $matches [1] [0];
        }

        preg_match ( "/<title>(.*?)_(.*)<\/title>/", $tudou, $title );

        preg_match ( "/pic:\"(.*)\"/", $tudou, $imageurl );

        preg_match ( "/,lid = (.*)/", $tudou, $vls );
        preg_match ( '/,lid_code = lcode = (.*)/', $tudou, $tx );
        $ntx = str_replace ( "'", "", $tx );
        if (! empty ( $ntx [1] ) && ! empty ( $vls [1] )) {
            $return ['vidurl'] = "http://www.tudou.com/l/" . $ntx [1] . "/&iid=" . $vls [1] . "/v.swf";
        }
        if (! empty ( $imageurl [1] )) {
            $return ['imageurl'] = $imageurl [1];
        }
        if (! empty ( $title )) {
            $return ['title'] = $title [1];
        }
    }
    return $return;
}

Demo

//用法如下
//暂只做了土豆,优酷,酷6三种
//由于以上官方不定期变动html结构,如失效请修改相应正则
//by hkshadow 2011-06-25
$link = 'http://v.youku.com/v_show/id_XMjcxNjU0NjMy.html';
$host = "youku.com";
$text = CaptureVideo ( $link, $host );
print_r ( $text );
?>

Leave a Reply

(will not be published)