PHP优酷土豆酷6采集入库函数(获取视频缩略图,视频swf地址,视频标题)

<?php
/**
 * 采集入库函数
 * 优酷,土豆,酷6 采集 (自动获取视频缩略图,视频swf地址,视频标题)
 * by hkshadow
 * QQ 2765237
 * dete: 2011-06-25 AM 02:32
 * edit: 2011-06-25 PM 17:38
 */

function CaptureVideo($link, $host) {
	$return = array ();
	if ('youku.com' == $host) {
		header ( "Content-Type:text/html; charset=utf-8" ); //优酷是utf-8编码,只为测试显示正常,可自行删除
		preg_match_all ( "/id\_(\w+)[\=|.html]/", $link, $matches );
		if (! empty ( $matches [1] [0] )) {
			$return ['flashvar'] = $matches [1] [0];
		}
		$text = file_get_contents ( $link );
		preg_match ( "/<title>(.*?) - (.*)<\/title>/", $text, $title );
		preg_match_all ( '/<li class="download"(.*)<\/li>/', $text, $match2 );
		preg_match ( '/http:\/\/g(.*)\.ykimg.com\/(.*)\|"\>/', $match2 [1] [0], $imageurl );
		if (! empty ( $imageurl [1] )) {
			$return ['imageurl'] = "http://g" . $imageurl [1] . ".ykimg.com/" . $imageurl [2];
		}
		preg_match ( '/embed src=\"(.*)\/v.swf/', $text, $vidurls );
		if (! empty ( $vidurls [1] )) {
			
			$return ['vidurl'] = $vidurls [1];
		}
		
		if (! empty ( $title )) {
			$return ['title'] = $title [1];
		}
	} elseif ('ku6.com' == $host) {
		header ( "Content-Type:text/html; charset=gbk" );  //酷6是gbk编码,只为测试显示正常,可自行删除
		$text = file_get_contents ( $link );
		preg_match_all ( "/\/([\w\-]+)\.html/", $link, $matches );
		if (1 > preg_match ( "/\/index_([\w\-]+)\.html/", $link ) && ! empty ( $matches [1] [0] )) {
			$return ['flashvar'] = $matches [1] [0];
		} else {
			preg_match_all ( "/refer\/(.*)\/v.swf/", $text, $videourl );
			$return ['flashvar'] = $videourl [1] [0];
		}
		preg_match ( '/http\:(.*)\/v.swf/', $text, $vidurls );
		if (! empty ( $vidurls [0] )) {
			$return ['vidurl'] = $vidurls [0];
		}
		preg_match ( "/\"title\" content=\"(.*)\"\/>/", $text, $title );
		preg_match_all ( '/<span class="s_pic">(.*)<\/span>/', $text, $imageurl );
		if (! empty ( $imageurl [1] [0] )) {
			$return ['imageurl'] = $imageurl [1] [0];
		}
		if (! empty ( $title[1] )) {
			$return ['title'] = $title [1];
		}
	} elseif ('tudou.com' == $host) {
		header ( "Content-Type:text/html; charset=gbk" );  //土豆是gbk编码,只为测试显示正常,可自行删除
		$tudou = file_get_contents ( $link );
		preg_match_all ( "/view\/([\w\-]+)\//", $tudou, $matches );
		
		if (! empty ( $matches [1] [0] )) {
			$return ['flashvar'] = $matches [1] [0];
		}
		
		preg_match ( "/<title>(.*?)_(.*)<\/title>/", $tudou, $title );
		
		preg_match ( "/pic:\"(.*)\"/", $tudou, $imageurl );
		
		preg_match ( "/,lid = (.*)/", $tudou, $vls );
		preg_match ( '/,lid_code = lcode = (.*)/', $tudou, $tx );
		$ntx = str_replace ( "'", "", $tx );
		if (! empty ( $ntx [1] ) && ! empty ( $vls [1] )) {
			$return ['vidurl'] = "http://www.tudou.com/l/" . $ntx [1] . "/&iid=" . $vls [1] . "/v.swf";
		}
		if (! empty ( $imageurl [1] )) {
			$return ['imageurl'] = $imageurl [1];
		}
		if (! empty ( $title )) {
			$return ['title'] = $title [1];
		}
	}
	return $return;
}

Demo

//用法如下
//暂只做了土豆,优酷,酷6三种
//由于以上官方不定期变动html结构,如失效请修改相应正则
//by hkshadow 2011-06-25
$link = 'http://v.youku.com/v_show/id_XMjcxNjU0NjMy.html';
$host = "youku.com";
$text = CaptureVideo ( $link, $host );
print_r ( $text );
?>

Leave a Reply

(will not be published)