今天和大家分享的内容是…………………………………
如何用VBA代码,爬取指定QQ帐号在空间里的说说数据?
坦白的说,这事二步就可以解决了。
第一步,登录QQ或者TIM软件。
第二步,也就是最重要的一步,关注微信公众号:VBA编程学习与实践。后台回复关键词:说说
皮一下……开森……
获取Excel模版后,打开,点击按钮,运行宏……
一个重要的说明:
代码使用了IE浏览器获取QQ空间的Cookie,并计算关键参数g_tk的值,因此需要先将IE浏览器设置为默认浏览器,否则运行代码会出现错误的提示信息。
……
……
嗯,是时候回顾下过往的青春了。搞个小图表,可视化一下过去的时光里哪个月发的说说条数最多?哪个词出现的频率最高?再扩展下代码,看看哪个魂淡最爱到你的空间里扯淡?是基情四射还是情愫暗涌……
……
往期内容推荐:
需要批量下载网上的图片,Excel行不行?当然行!
为了让您买到最廉价的图书,我们用VBA干了这件事……
……
打个响指,好吧,本期代码如下所示。
Sub WebCrawlerQzone()
Dim strURL As String
Dim strCookie As String
Dim strText As String
Dim strGTK As String
Dim strKey As String
Dim strUserName As String
Dim strMsg As String
Dim intPageNum As Long
Dim lngCreateTime As Long
Dim k As Long
Dim i As Long
Dim blnClick As Boolean
Dim objIE As Object
Dim objWINHTTP As Object
Dim objDIC As Object
Dim objDOM As Object
Dim objTagA As Object
Dim objList As Object
Dim objWindow As Object
Dim vntTime As Variant
Dim vntQQNum As Variant
Set objDIC = CreateObject("scripting.dictionary")
Set objIE = CreateObject("InternetExplorer.Application")
Set objWINHTTP = CreateObject("WinHttp.WinHttpRequest.5.1")
Set objDOM = CreateObject("htmlfile")
Set objWindow = objDOM.parentWindow
strURL = "https://xui.ptlogin2.qq.com/cgi-bin/xlogin?"
strURL = strURL & "proxy_url=https%3A//qzs.qq.com/"
strURL = strURL & "qzone/v6/portal/proxy.html"
strURL = strURL & "&appid=549000912"
strURL = strURL & "&s_url=https%3A%2F%2Fqzs.qzone.qq.com" _
& "%2Fqzone%2Fv5%2Floginsucc.html%3Fpara%3Dizone"
With objIE
.navigate strURL
.Visible = False
vntTime = Timer
Do While Timer < vntTime + 4
Loop
Do Until .readyState = 4
DoEvents
Loop
For Each objTagA In .document.getElementsByTagName("a")
If objTagA.TabIndex = 2 Then
strUserName = objTagA.innerText
objTagA.Click
blnClick = True
Exit For
End If
Next
If Not blnClick Then
MsgBox strUserName & "您的QQ软件未登录或QQ空间未开通。"
Exit Sub
End If
vntTime = Timer
Do While Timer < vntTime + 4
Loop
strCookie = .document.cookie
.Quit
End With
strKey = Split(Split(strCookie, "p_skey=")(1), ";")(0)
strGTK = strGetGTK(strKey)
vntQQNum = [b1].Value
strURL = "https://user.qzone.qq.com/"
strURL = strURL & "proxy/domain/taotao.qq.com/"
strURL = strURL & "cgi-bin/emotion_cgi_msglist_v6?"
strURL = strURL & "num=20"
strURL = strURL & "&callback=_preloadCallback"
strURL = strURL & "&format=jsonp"
strURL = strURL & "&uin=" & vntQQNum
strURL = strURL & "&g_tk=" & strGTK
ActiveSheet.UsedRange.Offset(2).ClearContents
k = 3
On Error Resume Next
Application.ScreenUpdating = False
Do While 1 = 1
intPageNum = intPageNum + 20
With objWINHTTP
.Open "GET", strURL & "&pos=" & intPageNum - 20, False
.setRequestHeader "Cookie", strCookie
.send
strText = .responseText
End With
strText = Split(strText, "_preloadCallback(")(1)
strText = Left(strText, InStrRev(strText, ")") - 1)
objDOM.write "var data=" & strText & ""
For i = 0 To objWindow.eval("data.msglist.length") - 1
k = k + 1
Set objList = objWindow.eval("data.msglist[" & i & "]")
lngCreateTime = CallByName(objList, "created_time", VbGet)
If Not objDIC.exists(lngCreateTime) Then
objDIC(lngCreateTime) = ""
Else
Exit Do
End If
Cells(k, 1) = CallByName(objList, "createTime", VbGet)
Cells(k, 2) = CallByName(objList, "content", VbGet)
Cells(k, 3) = CallByName(objList, "cmtnum", VbGet)
Next i
Loop
[A3:C3] = Array("日期", "说说", "评论人数")
Application.ScreenUpdating = True
strMsg = "用户:" & strUserName & vbCrLf & "您好!"
strMsg = strMsg & "目标QQ" & vntQQNum
strMsg = strMsg & "的说说数据已抓取完成。"
MsgBox strMsg
Set objIE = Nothing
Set objWINHTTP = Nothing
Set objDOM = Nothing
Set objWindow = Nothing
Set objDIC = Nothing
Set objList = Nothing
End Sub
Function strGetGTK(ByVal strKey As String) As String
Dim objNewDom As Object
Dim objNewWindow As Object
Dim strJSON As String
Set objNewDom = CreateObject("htmlfile")
Set objNewWindow = objNewDom.parentWindow
With objNewWindow
strJSON = "gtk=function(skey)"
strJSON = strJSON & "{for(var hash=5381,i=0,"
strJSON = strJSON & "len=skey.length;i |
|