网易云音乐评论爬取

确定信息来源

查看网页中任意一首音乐的评论来源:网页本身/数据包

通过浏览器工具,可以确定数据来源于网络传输的数据包.

image-20220319104619073

尝试进行抓取

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# -*- codeing = utf-8 -*-
# @Time : 2022/3/19 10:34
# @Author : Baxkiller
# @File : netease_music163_comments.py
# @Software : PyCharm

import requests
from Crypto.Cipher import AES
import base64
import json


def padding(strs):
l = 16 - (len(strs) % 16)
strs += chr(l) * l
return strs


data = {
"rid": "R_SO_4_1306923998",
"threadId": "R_SO_4_1306923998",
"pageNo": "1",
"pageSize": "20",
"cursor": "-1",
"offset": "0",
"orderType": "1"
}

f = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7'
g = "0CoJUm6Qyw8W8jud"
e = '01001'
i = "GAmFwie2XTGvncAy"


def get_encSecKey():
return "23ffcf188f9a56c6917dc47e8ff9c07f1837723ed57ed45d639e182c387c57293ccd37c3e0e20603a7818456b3e4730be3a429187129fedf600851d0b7d74db24916a2f47693a3a6cb95f502d2b4d6dcb1db78fa59f4fc0bfaf8cf5d999b4eaf9ab0a0506876449a8bff2d6530ace6c00bcf8ea80d3db7bd9b7cdee640c6ca02"


# 对传入的字符串进行加密
def encWords(strs, key):
iv = "0102030405060708".encode("utf-8")
strs = padding(strs) # 先填充padding
strs = strs.encode("utf-8") # 再进行编码
key = key.encode("utf-8")
cryptos = AES.new(key = key, iv = iv, mode = AES.MODE_CBC) # 创建加密
res = cryptos.encrypt(strs)
return str(base64.b64encode(res), "utf-8")


def get_params(strs):
res = encWords(strs, g)
res = encWords(res, i)
return res

if __name__ == "__main__":
url = "https://music.163.com/weapi/comment/resource/comments/get?csrf_token="
datas = {
"params": get_params(json.dumps(data)),
"encSecKey": get_encSecKey()
}
resp = requests.post(url = url, data = datas)
print(resp.json())

成功结果

image-20220319173436731