采集类及例子可灵活结合自定函数
复制内容到剪贴板
- <%
- function getHTTPPage(url,cset)
- dim Http
- set Http=server.createobject("MSXML2.XMLHTTP")
- Http.open "GET",url,false
- Http.send()
- if Http.readystate<>4 then
- exit function
- end if
- getHTTPPage=bytesToBSTR(Http.responseBody,cset)
- set http=nothing
- if err.number<>0 then err.Clear
- end function
- Function BytesToBstr(body,cset)
- dim objstream
- set objstream = Server.CreateObject("adodb.stream")
- objstream.Type = 1
- objstream.Mode =3
- objstream.Open
- objstream.Write body
- objstream.Position = 0
- objstream.Type = 2
- objstream.Charset = cset
- BytesToBstr = objstream.ReadText
- objstream.Close
- set objstream = nothing
- End Function
- Function HTMLEncode(reString) '转换HTML代码
- Dim Str:Str=reString
- If Not IsNull(Str) Then
- 'Str = Replace(Str,CHR(9), "")
- 'Str = Replace(Str,CHR(13), "")
- 'Str = Replace(Str,CHR(10), "")
- 'Str = Replace(Str,CHR(32), " ")
- kongge=""
- 'del=""
- 'for i=1 to 20
- 'kongge=kongge&CHR(32)
- 'del=kongge
- 'Str = Replace(Str,kongge, " ")
- 'next
- Str = Replace(Str,CHR(32)&CHR(32), " ")
- Str = Replace(Str,"> <", "><")
- Str = Replace(Str," <", "<")
- Str = Replace(Str," >", ">")
- Str = Replace(Str," >", ">")
- Str = Replace(Str,CHR(34),CHR(39))
- HTMLEncode = Str
- End If
- End Function
- '==================================================
- '函数名:GetBody
- '作 用:截取字符串
- '参 数:ConStr--将要截取的字符串
- '参 数:StartStr--开始字符串
- '参 数:OverStr--结束字符串
- '参 数:IncluL--是否包含StartStr
- '参 数:IncluR--是否包含OverStr
- '==================================================
- Function GetBody(ConStr,StartStr,OverStr,IncluL,IncluR)
- If ConStr="$Falsecontentquot; or ConStr="" or IsNull(ConStr)=True Or StartStr="" or IsNull(StartStr)=True Or OverStr="" or IsNull(OverStr)=True Then
- GetBody="$Falsecontentquot;
- Exit Function
- End If
- Dim ConStrTemp
- Dim Start,Over
- ConStrTemp=Lcase(ConStr)
- StartStr=Lcase(StartStr)
- OverStr=Lcase(OverStr)
- Start = InStrB(1, ConStrTemp, StartStr, vbBinaryCompare)
- If Start<=0 then
- GetBody=""
- Exit Function
- Else
- If IncluL=False Then
- Start=Start+LenB(StartStr)
- End If
- End If
- Over=InStrB(Start,ConStrTemp,OverStr,vbBinaryCompare)
- If Over<=0 Or Over<=Start then
- GetBody="$Falsecontentquot;
- Exit Function
- Else
- If IncluR=True Then
- Over=Over+LenB(OverStr)
- End If
- End If
- GetBody=MidB(ConStr,Start,Over-Start)
- End Function
- '==================================================
- '函数名:LoseATag
- '作 用:正则表达式过滤 链接<a>标记
- '参 数:ContentStr ------将要截取的字符串
- '==================================================
- Function LoseATag(ContentStr)
- Dim ClsTempLoseStr,regEx
- ClsTempLoseStr = Cstr(ContentStr)
- Set regEx = New RegExp
- regEx.Pattern = "<(\/){0,1}a[^<>]*>"
- regEx.IgnoreCase = True
- regEx.Global = True
- ClsTempLoseStr = regEx.Replace(ClsTempLoseStr,"")
- LoseATag = ClsTempLoseStr
- Set regEx = Nothing
- End Function
- '==================================================
- '函数名:ReplaceText
- '作 用:批量替换字符串
- '参 数:strHTML ------将要截取的字符串
- '==================================================
- function ReplaceText(strHTML,patrn,replStr)
- Set regEx = New RegExp ' 建立正则表达式
- regEx.Pattern = patrn ' 设置模式
- regEx.IgnoreCase = True ' 设置是否区分大小写
- regEx.Global = True ' 设置全局可用性
- ReplaceText = regEx.Replace(""&strHTML"","") ' 作替换
- Set reg=nothing
- End function
- '==================================================
- '函数名:ReadFromUTF
- '作 用:读取文本文件内容,这个函数的优点在于可以读取gb2312编码格式的文件
- '参 数:TempString要读取的模板文件路径; Charset是编码,一般是gb2312或者GB2312
- '==================================================
- function ReadFromUTF (TempString,CharSet)
- dim str
- set stm=server.CreateObject("adodb.stream")
- stm.Type=2
- stm.mode=3
- stm.charset=CharSet
- stm.open
- stm.loadfromfile server.MapPath(TempString)
- str=stm.readtext
- stm.Close
- set stm=nothing
- ReadFromUTF=str
- end function
- '==================================================
- '函数名:字符串截取函数
- '作 用:截取字符串内容
- '参 数:Start要截取字符串开始; Last要截取字符串结束(如果结束为空,只取Start的split);num取第几个内容(从0开始)
- '==================================================
- Function GetKey(HTML,Start,Last,num)
- filearray=split(HTML,Start)
- if Last<>"" then
- if UBound(filearray)>0 and num<UBound(filearray) then
- filearray2=split(filearray(1),Last)
- GetKey=filearray2(num)
- else
- GetKey=UBound(filearray)
- end if
- else
- GetKey=filearray(num)
- end if
- End Function
- '例子
- 'WHtml=ReadFromUTF("101290101.htm","utf-8")
- 'Wurl="http://www.weather.com.cn/html/weather/"&cnid&".shtml"
- 'WHtml=GetHttpPage(Wurl)
- 'WHtml=HTMLEncode(WHtml)
- 'WHtml=Replace(WHtml,"<a class='cyclePrediction'>","<font color=red>") '对星期六、天进行颜色替换
- 'WHtml=LoseATag(WHtml) '去除超链接
- 'dateinfo=GetBody(WHtml,"<h1 class='weatheH1'>","发布",False,True)
- '例子
- 'dim myThief,page,aa,id
- 'aa=request("aa")
- 'id=request("id")
- 'set myThief=new clsThief
- 'myThief.src="http://hd.openv.com/inc/"&aa&"maker.php?id="&id&""
- 'myThief.steal
- 'myThief.cut "<!-- big img end -->","<!-- 合作 开始 -->"
- 'title=GetKey(myThief.value,"<title>","</title>")
- 'myThief.change "tv_show-","show.asp?id="
- 'page=myThief.value '获得偷取的内容
- 'set myThief=nothing '释放对象
- %>
- <%
- 'if request("id")="" then
- 'Response.write("<script >alert('对不起!您未输入相关信息!');history.back(-1);</script>")
- 'response.End()
- 'else
- 'cnid=request("id")
- 'WHtml=ReadFromUTF("101290101.htm","utf-8")
- imgurl="www.qiyipic.com"
- Wurl="http://www.qiyi.com/dianying/index.html"
- WHtml=GetHttpPage(Wurl,"utf-8")
- 'WHtml=HTMLEncode(WHtml)
- 'WHtml=LoseATag(WHtml) '去除超链接
- WHtml=Replace(WHtml,"http://www.qiyipic.com/dianying","/img.asp?img="&imgurl"/dianying")
- WHtml=Replace(WHtml,"http://www.qiyipic.com/common","/img.asp?img="&imgurl"/common")
- WHtml=Replace(WHtml,"http://www.qiyipic.com/thumb","/img.asp?img="&imgurl"/thumb")
- mmm=GetBody(WHtml,"<LINK href=","<!-- 标准尾 begin -->",True,False)
- main=GetBody(WHtml,"<!-- 头部搜索区域 -->","<!-- 标准尾 begin -->",True,False)
- ggg=GetBody(main,"<iframe","</iframe>",True,True)
- main=Replace(main,ggg,"")
- 'end if
- %>
- <%
- 'var IPData = new Array("58.242.83.181","","安徽省","淮北市");
- ipurl="http://fw.qq.com/ipaddress"
- ipHtml=GetHttpPage(ipurl,"gb2312")
- 'WHtml=HTMLEncode(WHtml)
- 'WHtml=LoseATag(WHtml) '去除超链接
- ipHtml=Replace(ipHtml,"http://www.qiyipic.com/dianying","/img.asp?img="&imgurl"/dianying")
- mmm=GetBody(ipHtml,"new Array(",");",False,False)
- mmm=Replace(mmm,"""","")
- 'aa=split(mmm,",")
- For i = 0 To 3
- Response.Write getkey(mmm,",","",i)& "<br>"
- Next
- %>
查看所有评论