<?php /** * 采集入库函数 * 优酷,土豆,酷6 采集 (自动获取视频缩略图,视频swf地址,视频标题) * by hkshadow * QQ 2765237 * dete: 2011-06-25 AM 02:32 * edit: 2011-06-25 PM 17:38 */ function CaptureVideo($link, $host) { $return = array (); if ('youku.com' == $host) { header ( "Content-Type:text/html; charset=utf-8" ); //优酷是utf-8编码,只为测试显示正常,可自行删除 preg_match_all ( "/id\_(\w+)[\=|.html]/", $link, $matches ); if (! empty ( $matches [1] [0] )) { $return ['flashvar'] = $matches [1] [0]; } $text = file_get_contents ( $link ); preg_match ( "/<title>(.*?) - (.*)<\/title>/", $text, $title ); preg_match_all ( '/<li class="download"(.*)<\/li>/', $text, $match2 ); preg_match ( '/http:\/\/g(.*)\.ykimg.com\/(.*)\|"\>/', $match2 [1] [0], $imageurl ); if (! empty ( $imageurl [1] )) { $return ['imageurl'] = "http://g" . $imageurl [1] . ".ykimg.com/" . $imageurl [2]; } preg_match ( '/embed src=\"(.*)\/v.swf/', $text, $vidurls ); if (! empty ( $vidurls [1] )) { $return ['vidurl'] = $vidurls [1]; } if (! empty ( $title )) { $return ['title'] = $title [1]; } } elseif ('ku6.com' == $host) { header ( "Content-Type:text/html; charset=gbk" ); //酷6是gbk编码,只为测试显示正常,可自行删除 $text = file_get_contents ( $link ); preg_match_all ( "/\/([\w\-]+)\.html/", $link, $matches ); if (1 > preg_match ( "/\/index_([\w\-]+)\.html/", $link ) && ! empty ( $matches [1] [0] )) { $return ['flashvar'] = $matches [1] [0]; } else { preg_match_all ( "/refer\/(.*)\/v.swf/", $text, $videourl ); $return ['flashvar'] = $videourl [1] [0]; } preg_match ( '/http\:(.*)\/v.swf/', $text, $vidurls ); if (! empty ( $vidurls [0] )) { $return ['vidurl'] = $vidurls [0]; } preg_match ( "/\"title\" content=\"(.*)\"\/>/", $text, $title ); preg_match_all ( '/<span class="s_pic">(.*)<\/span>/', $text, $imageurl ); if (! empty ( $imageurl [1] [0] )) { $return ['imageurl'] = $imageurl [1] [0]; } if (! empty ( $title[1] )) { $return ['title'] = $title [1]; } } elseif ('tudou.com' == $host) { header ( "Content-Type:text/html; charset=gbk" ); //土豆是gbk编码,只为测试显示正常,可自行删除 $tudou = file_get_contents ( $link ); preg_match_all ( "/view\/([\w\-]+)\//", $tudou, $matches ); if (! empty ( $matches [1] [0] )) { $return ['flashvar'] = $matches [1] [0]; } preg_match ( "/<title>(.*?)_(.*)<\/title>/", $tudou, $title ); preg_match ( "/pic:\"(.*)\"/", $tudou, $imageurl ); preg_match ( "/,lid = (.*)/", $tudou, $vls ); preg_match ( '/,lid_code = lcode = (.*)/', $tudou, $tx ); $ntx = str_replace ( "'", "", $tx ); if (! empty ( $ntx [1] ) && ! empty ( $vls [1] )) { $return ['vidurl'] = "http://www.tudou.com/l/" . $ntx [1] . "/&iid=" . $vls [1] . "/v.swf"; } if (! empty ( $imageurl [1] )) { $return ['imageurl'] = $imageurl [1]; } if (! empty ( $title )) { $return ['title'] = $title [1]; } } return $return; }
Demo
//用法如下 //暂只做了土豆,优酷,酷6三种 //由于以上官方不定期变动html结构,如失效请修改相应正则 //by hkshadow 2011-06-25 $link = 'http://v.youku.com/v_show/id_XMjcxNjU0NjMy.html'; $host = "youku.com"; $text = CaptureVideo ( $link, $host ); print_r ( $text ); ?>