使用you-get下载优酷播单视频

in 其他 | 0 comment | 阅读量: 1,205

you-get是python制作的一个命令行工具,但是目前不支持优酷的播单视频下载功能,自己写了一个小脚本来下载.

依赖库安装

代码如下(下载任何播单只需要更换`playlist_id`)
import requests as rq
import re
import json
from bs4 import BeautifulSoup
from you_get import common as you_get

# parameters
playlist_id = 49399706  # 播单页面的id, 可以在url中查看
page = 1  # 无需更改


headers = {
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36',
    'Accept': 'ext/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    'Referer': 'http://v.youku.com/v_show/id_XMjY4NDI3NjE2OA==.html?spm=a2h1n.8251843.playList.5~5~A&f=49399706&o=0'
}

urls = set()
failed_urls = set()


while True:
    print("===== downloading the {} page =====".format(page))
    json_data = rq.get("http://list.youku.com/albumlist/items?id={}&page={}&size=20&ascending=1&callback=tuijsonp5".format(playlist_id, page)).text
    json_data = re.search("\((.*)\)", json_data).group(1)

    video_data = json.loads(json_data)['html']
    if not video_data:
        print("===== all the pages have scraped! =====")
        break

    soup = BeautifulSoup(video_data, 'html5lib')

    video_tags = soup.find_all(href=re.compile("v.youku.com"))

    for each in video_tags:
        video_url = "http:" + each.get('href')
        urls.add(video_url)

    page += 1

print("thr num of all the video is :{}".format(len(urls)))
# begin download
for each in urls:
    try:
        you_get.any_download(each, output_dir="../../Videos/tensorflow/", merge=True)
    except Exception as e:
        print(e)
        failed_urls.add(each)
        print("download {} failed".format(each))

for each in failed_urls:
    print(each)
Responses