实例讲解实现抓取网上房产信息的ASP程序

论坛 期权论坛 脚本     
niminba   2021-5-23 05:05   1986   0
<%@LANGUAGE="VBSCRIPT" CODEPAGE="936"%>
<!-- #include file="conn.asp" -->

<!-- #include file="inc/function.asp" -->
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<title>Untitled Document</title>
<meta http-equiv="Content-Type" content="text/html; charset=gb2312">
<meta http-equiv="refresh" content="300;URL=steal_house.asp">
</head>

<body>
<%
on error resume next
'
Server.ScriptTimeout = 999999
'========================================================
'字符编码函数
'====================================================
Function BytesToBstr(body,code)
        dim objstream
        set objstream = Server.CreateObject("adodb.stream")
        objstream.Type = 1
        objstream.Mode =3
        objstream.Open
        objstream.Write body
        objstream.Position = 0
        objstream.Type = 2
        objstream.Charset =code
        BytesToBstr = objstream.ReadText 
        objstream.Close
        set objstream = nothing
End Function

'取行字符串在另一字符串中的出现位置
Function Newstring(wstr,strng)
        Newstring=Instr(lcase(wstr),lcase(strng))
        if Newstring<=0 then Newstring=Len(wstr)
End Function
'替换字符串函数
function ReplaceStr(ori,str1,str2)
ReplaceStr=replace(ori,str1,str2)
end function
'====================================================
function ReadXml(url,code,start,ends)
set oSend=createobject("Microsoft.XMLHTTP")
SourceCode = oSend.open ("GET",url,false)
oSend.send()
ReadXml=BytesToBstr(oSend.responseBody,code )
start=Instr(ReadXml,start)
ReadXml=mid(ReadXml,start)
ends=Instr(ReadXml,ends)
ReadXml=left(ReadXml,ends-1)
end function

function SubStr(body,start,ends)
start=Instr(body,start)
SubStr=mid(body,start+len(start)+1)
ends=Instr(SubStr,ends)
SubStr=left(SubStr,ends-1)
end function

dim getcont,NewsContent
dim url,title
url="http://www.***.com"'新闻网址knowsky.com
getcont=ReadXml(url,"gb2312","<table class=k2 border=""0""","</table>")
getcont=RegexHtml(getcont)
dim KeyId,NewsClass,City,Position,HouseType,Level,Area,Price,Demostra

dim ContactMan,Contact
for i=2 to ubound(getcont)
 response.Write(getcont(i)&"__<br>")

 tempLink=mid(getcont(i),instr(getcont(i),"href=""")+6,instr(getcont(i),""" onClick")-10)
 tempLink=replace(tempLink,"../","")

 response.Write(i&":"&tempLink&"<br>")
 NewsContent=ReadXml(tempLink,"gb2312","<td valign=""bottom"" width=""400"">","<hr width=""760"" noshade size=""1"" color=""#808080""> ")
 NewsContent=RemoveHtml(NewsContent)
 NewsContent=replace(NewsContent,VbCrLf,"")
 NewsContent=replace(NewsContent,vbNewLine,"")
 NewsContent=replace(NewsContent," ","")
 NewsContent=replace(NewsContent," ","")
 NewsContent=replace(NewsContent,"&nbsp;","")
 NewsContent=replace(NewsContent,"\n","")
 NewsContent=replace(NewsContent,chr(10),"")
 NewsContent=replace(NewsContent,chr(13),"")
 '===============get Content=======================
 response.Write(NewsContent)
 KeyId=SubStr(NewsContent,"列号:","信息类别:")
 NewsClass=SubStr(NewsContent,"类别:","所在城市:")
 City=SubStr(NewsContent,"城市:","房屋具体位置:")
 Position=SubStr(NewsContent,"位置:","房屋类型:")
 HouseType=SubStr(NewsContent,"类型:","楼层:")
 Level=SubStr(NewsContent,"楼层:","使用面积:")
 Area=SubStr(NewsContent,"面积:","房价:")
 Price=SubStr(NewsContent,"房价:","其他说明:")
 Demostra=SubStr(NewsContent,"说明:","联系人:")
 ContactMan=SubStr(NewsContent,"联系人:","联系方式:")
 Contact=SubStr(NewsContent,"联系方式:","信息来源:")
 response.Write("总序列号:"&KeyId&"<br>")
 response.Write("信息类别:"&NewsClass&"<br>")
 response.Write("所在城市:"&City&"<br>")
 response.Write("房屋具体位置:"&Position&"<br>")
 response.Write("房屋类型:"&HouseType&"<br>")
 response.Write("楼层:"&Level&"<br>")
 response.Write("使用面积:"&Area&"<br>")
 response.Write("房价:"&Price&"<br>")
 response.Write("其他说明:"&Demostra&"<br>")
 response.Write("联系人:"&ContactMH[YHBHZYHHHLB][YHY`9g*9g ] BZYHHH B[OHPTSQ Lcce  B[YHYH[YHBHZYHHHLB][YHY/mcy ] BZYHHH B[OHPTSQ Lccoz-c/9c  B[YHYH[YHBHZYHHHLB][YHY/n& ] BZYHHH B[OHPTSQ Lam.  B[YHYH[YHBHZYHHHLB][YHYo9l` ] BZYHHH B[OHPTSQ Lakyl`  B[YHYH[YHBHZYHHHLB][YHY/oh ] BZYHHH B[OHPTSQ L; 9nll  B[YHYH[YHBHZYHHHLB][YHY/& ] BZYHHH B[OHPTSQ L8`Na`" 9.l9ce.!ie  B[YHYH[YHBHZYHHHLB][YHYam.# ] BZYHHH B[OHPTSQ Lcc/abyo l`l#%a" ya`nl"{+9o9lgeabybak9z-a9/o#9c*9.bak9ayke#9dj:/z+zod9aj8.:`&/" 8yg*9+9o9bcy/"{aj9. yn +#9o9az+yc!y-8zod9aj;"y#-z+  B[YHYH[YHBHZYHHHLB][YHY e9. ] BZYHHH B[OHPTSQ Llx  B[YHYH[YHBHZYHHHLB][YHY e9yo#] BZYHHH B[OHPTSQ L M LM  B[YHYH[YHBHZYHHHLB][YHY/k; ] BZYHHH B[OHPTSQ L KN MMI&N N M  B[YHYH[YHBHZYHHHLB][YHYa(yl; ] BZYHHH B[OHPTSQ LNO  O XO

分享到 :
0 人收藏
您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

积分:1060120
帖子:212021
精华:0
期权论坛 期权论坛
发布
内容

下载期权论坛手机APP