雨课堂自动播放视频爬虫

时间:2021-08-19
本文章向大家介绍雨课堂自动播放视频爬虫,主要包括雨课堂自动播放视频爬虫使用实例、应用技巧、基本知识点总结和需要注意事项,具有一定的参考价值,需要的朋友可以参考一下。
import time
import json
import os
from functools import partial

from selenium import webdriver
from concurrent.futures import ThreadPoolExecutor, as_completed


def get_cookies():
    """
    获取cookies保存至本地
    """
    browser = webdriver.Firefox()
    log_url = 'https://www.yuketang.cn/web'
    browser.get(log_url)
    time.sleep(15)  # 进行扫码
    dictCookies = browser.get_cookies()  # 获取list的cookies
    jsonCookies = json.dumps(dictCookies)  # 转换成字符串保存

    with open('cookies.txt', 'w') as f:
        f.write(jsonCookies)
    print('cookies保存成功!')


def login(driver, url):
    driver.get(url)
    time.sleep(10)  # 2
    account_login_button = driver.find_element_by_xpath(
        '/html/body/div[4]/div[2]/div/div[2]/div/div/div/section[2]/div/div/div[2]/div/xt-wrap/xt-controls/xt-inner/xt-playbutton')
    account_login_button.click()
    time.sleep(500)
    title = driver.title
    print(title)
    driver.quit()
    return title


def browser_initial():
    """"
    浏览器初始化,并打开大麦网购票界面(未登录状态)
    """
    options = webdriver.FirefoxOptions()
    # options.add_argument("--headless")  # 设置火狐为headless无界面模式
    # options.add_argument("--disable-gpu")
    browser = webdriver.Firefox(options=options)
    browser.get(
        'https://www.yuketang.cn/web')

    # 从本地读取cookies并刷新页面,成为已登录状态

    with open('./cookies.txt', 'r', encoding='utf8') as f:
        listCookies = json.loads(f.read())

    # 往browser里添加cookies
    for cookie in listCookies:
        cookie_dict = {
            'domain': 'www.yuketang.cn',
            'name': cookie.get('name'),
            'value': cookie.get('value'),
            'sessionid': cookie.get('value'),
            "expires": '',
            'path': '/',
            'httpOnly': False,
            'HostOnly': False,
            'Secure': False,
            'sameSite': 'None'
        }
        browser.add_cookie(cookie_dict)
    browser.refresh()  # 刷新网页,cookies才成功
    return browser


if __name__ == '__main__':
    # 存储COOKIES 首次运行先运行这两句
    if not os.path.exists('cookies.txt'):
        get_cookies()
    browser = browser_initial()
    # 上面两句运行完毕后即可运行下面两句
    start = 12449817  # 视频起始编号
    end = 12449820  # 视频结束编号74
    urls = [
        f'https://www.yuketang.cn/v2/web/xcloud/video-student/7698212/{index}'
        for index in range(start, end)
    ]
    print(urls)
    #partial固定参数
    part = partial(login,browser=browser)
    with ThreadPoolExecutor() as pool:
        results=pool.map(part,urls)
        for result in results:
            print(result)

原文地址:https://www.cnblogs.com/suchcools/p/15161677.html