• 273阅读
  • 1回复

[移动开发] 关于腾讯视频的解析方式


会员


发帖
8
楼主  发表于:2018/7/12 20:32

http://imgcache.qq.com/tencentvideo_v1/tvp/js/tvp.player_v2_zepto.js 算法大部分在这里

普通流视频(完整视频)

http://vv.video.qq.com/geturl?vid=v00149uf4ir&otype=json

高清视频(分段视频)

1080P-fhd,超清-shd,高清-hd,标清-sd
http://vv.video.qq.com/getinfo?vids=v00149uf4ir&otype=json&charge=0&defaultfmt=shd

其他可用解析
vv.video.qq.com/getinfo.*
tt.video.qq.com/getinfo.*
ice.video.qq.com/getinfo.*
tjsa.video.qq.com/getinfo.*
a10.video.qq.com/getinfo.*
xyy.video.qq.com/getinfo.*
vcp.video.qq.com/getinfo.*
vsh.video.qq.com/getinfo.*
vbj.video.qq.com/getinfo.*
bobo.video.qq.com/getinfo.*
flvs.video.qq.com/getinfo.*
rcgi.video.qq.com/report.*

                    
会员


发帖
楼主   发表于: 2018年7月13日 20:25

getinfo参数

params = {
        'charge': 0,
        'vid': vid, *url或html获取
        'defaultfmt': 'auto',
        'otype': 'json',
        'guid': '8fffd19befa1413953bb108f58e49b3b', *发觉有问题用不了就要换,抓包看
        'platform': plt,
        'defnpayver': 1,
        'appVer': '3.0.83',
        'sdtfrom':std,
        'host':'v.qq.com',
        'ehost':'https%3A%2F%2Fv.qq.com%2Fx%2Fcover%2Fnuijxf6k13t6z9b%2Fl0023olk3g4.html',
        'defn':'mp4',
        'fhdswitch': 0,
        'show1080p':1,
        'isHLS':0,
        'newplatform':'v1010',
        'defsrc':1,
        '_0': 'undefined',
        '_1': 'undefined',
        '_2': 'undefined',
        '_': int(round(time.time() * 1000)),
        'callback':jsonpCallback, *返回json的前缀
    }
    r = requests.get('https://h5vv.video.qq.com/getinfo', params=params).content


上面的参数基本固定也最好不要落下,vid要从自己获取。这个接口获取到的信息是为了获得vkey而作准备。
从2中返回的json获得

视频url前缀
url_prefix = data['vl']['vi'][0]['ul']['ui'][0]['url']
MP4文件名字,q0200qbrzbk.mp4这种全集的,q0200qbrzbk.p201.1.mp4这种分段的
fn_pre = data['vl']['vi'][0]['lnk']
filename = fn_pre + '.mp4'
接着请求3接口
参数:

params = {
        'charge': 0,
        'vid': vid, *视频vid
        'format':2,
        'otype': 'json',
        'guid': '8fffd19befa1413953bb108f58e49b3b',
        'platform': 10901,
        'defnpayver': 0,
        'appVer': '3.0.83',
        'vt':0,
        'sdtfrom':'v1010',
        '_rnd':rmt['t'], *时间戳重要,没有直接20k速度
        '_qv_rmt': rmt['u1'], *限速算法,重要,没有直接20k速度
        '_qv_rmt2': rmt['u2'], *同上
        'ui_host': 2,
        'filename':filename,
        'callback':jsonpCallback,
        '_':int(round(time.time() * 1000)), *13位时间戳,我测没有会卡顿
    }
    r = requests.get('https://h5vv.video.qq.com/getkey', params=params).content

核心来了,限速折腾了一天,直到爬各种数据拿到js的算法整合而成。qvrmt这两个经过算法而生成的参数腾讯出不久,所以觉得这种爬取视频方法短时间不会失效。
ok,从以下算法弄出三个参数扔到上面的接口上去请求。

qvrmt 请求方法 rmt = getQv(plt, vid, std, str(1)),分别为platform,vid,sdtfrom

# coding: utf-8
import time
import hashlib

Seed = "#$#@#*ad"
urlStr = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="


def hexToString(h):
    r = ''
    index = 2 if h[0:2] == '0x' else 0
    indes = []
    indes.append(index)
    while(index <len(h)-2):
        index+=2
        indes.append(index)

    for i in indes:
        b = int(h[i:i+2],16)
        r+=chr(b)
    return r

def getQv(plt, vid, std, sts, ts=int(time.time())):
    global Seed
    ts = str(ts)
    p = {
        "plt":plt,
        "vid":vid,
        "std":std,
        "sts":sts,
        "ts":ts,
    }
    md = hashlib.md5()
    md.update((str(plt) + vid + ts + Seed + sts + std).encode('utf-8'))
    result = hexToString(md.hexdigest())
    u = urlenc(tempcalc(result, Seed),sts[0],ts)
    c = urlenc(tempcalc(result, '86FG@hdf'), sts[0],ts)
    u1 = U1(u, 0)
    u2 = U1(u, 1)
    data = {
        'p':p,
        'u':u,
        'c':c,
        'u1':u1,
        'u2':u2,
        't':ts
    }
    return data

def urlenc(input,sts,ts):
    global urlStr, output
    chr1 = chr2 = chr3 = enc1 = enc2 = enc3 = enc4 = ''
    chr5 = ''
    chr6 = ''
    output = ''
    i = 0
    while(i < len(input)):
        chr1 = ord(input[i])
        i += 1
        m1 = i
        i += 1
        if(len(input)>m1):
            chr2 = ord(input[m1])
        else:
            chr5='NaN'
        m = i
        i += 1
        if(m>len(input) or m==len(input)):
            chr6='NaN'
        else:
            chr3=ord(input[m])

        if(i==15):
            output = output+'A'
            output = output+sts
            output = output+ts

        enc1 = chr1 >> 2
        enc2 = ((chr1 & 3) << 4) | (chr2 >> 4)
        enc3 = ((chr2 & 15) << 2) | (chr3 >> 6)
        enc4 = chr3 & 63
        if (chr5 == 'NaN'):
            enc3 = enc4 = 64
        elif(chr6=='NaN'):
            enc4 = 64

        output = output+urlStr[enc1]+urlStr[enc2]+urlStr[enc3]+urlStr[enc4]
    return output

def tempcalc(a,b):
    r = ''
    for i in range(len(a)):
        chr1 = (ord(a[i])^ord(b[i%4]))
        r = r+chr(chr1)
    return r

def U1(a,b):
    r = ''
    index = b
    indes = []
    indes.append(index)
    while (index < len(a) - 2):
        index += 2
        indes.append(index)
    for i in indes:
        r+= a[i]
        i+=2
    return r

快速回复