本帖最后由 olala 于 2014-4-25 00:04 编辑
好久没来acn了,捣鼓一下代码,在这论坛里可以找到喜悦的感觉(*^__^*) 嘻嘻……
#include <ButtonConstants.au3>
#include <EditConstants.au3>
#include <GUIConstantsEx.au3>
#include <GuiStatusBar.au3>
#include <StaticConstants.au3>
#include <WindowsConstants.au3>
#region ### START Koda GUI section ### Form=
$Form1 = GUICreate("文章采集测试", 580, 448, 192, 124)
$Input1 = GUICtrlCreateInput("", 56, 14, 385, 22)
GUICtrlSetData(-1, "http://saihua870908.blog.163.com/blog/static/1284290382010112211532975/")
$Label1 = GUICtrlCreateLabel("URL:", 24, 18, 30, 17)
$Button1 = GUICtrlCreateButton("获取文章", 464, 12, 81, 25)
$Edit1 = GUICtrlCreateEdit("", 3, 48, 575, 378)
$StatusBar1 = _GUICtrlStatusBar_Create($Form1)
_GUICtrlStatusBar_SetMinHeight($StatusBar1, 20)
GUISetState(@SW_SHOW)
#endregion ### END Koda GUI section ###
While 1
$nMsg = GUIGetMsg()
Switch $nMsg
Case $GUI_EVENT_CLOSE
Exit
Case $Button1
_GetText()
EndSwitch
WEnd
Func _GetText()
$begin = TimerInit()
$url = GUICtrlRead($Input1)
$html = _XmlHttp($url)
$title = _GetTitle($html)
$author = _GetAuthor($html)
$html = _StrCut($html, '<div class="nbw-blog-start">', '<div class="nbw-blog-end">')
$html = _FormatText($html)
GUICtrlSetData($Edit1, $title & @CRLF & $author & @CRLF & $html)
$diff = TimerDiff($begin)
_GUICtrlStatusBar_SetText($StatusBar1, " 采集完成!共计耗时:" & $diff & " 毫秒。")
EndFunc ;==>_GetText
Func _XmlHttp($url)
Local $oHTTP, $sReturn
$oHTTP = ObjCreate("microsoft.xmlhttp")
$oHTTP.Open("get", $url, False)
$oHTTP.Send()
$sReturn = BinaryToString($oHTTP.responseBody)
Return $sReturn
EndFunc ;==>_XmlHttp
Func _StrCut($Str, $StartStr, $EndStr)
$Start = StringInStr($Str, $StartStr) + StringLen($StartStr)
$End = StringInStr($Str, $EndStr)
$Str = StringMid($Str, $Start, $End - $Start)
Return $Str
EndFunc ;==>_StrCut
Func _FormatText($text)
$text = StringReplace($text, '<BR>', @CRLF)
$text = StringRegExpReplace($text, '<.*?>', '')
$text = StringReplace($text, ' ', '')
$text = StringReplace($text, ' ', '')
Return $text
EndFunc ;==>_FormatText
Func _GetTitle($html)
$html = _StrCut($html, '<title>', ' - ')
Return $html
EndFunc ;==>_GetTitle
Func _GetAuthor($html)
$html = _StrCut($html, '<title>', '</title>')
$html = _StrCut($html, '- ', '的日志')
Return $html
EndFunc ;==>_GetAuthor
|