通过topic的编号下载单篇幅文章,附件,缩略图到本地。
Global Const $gc_StoreFolder = @ScriptDir & "\Autoitscript\";主目录
Global Const $gc_HtmlFileFolder = $gc_StoreFolder & "html\";html 页面目录
Global Const $gc_TopicList_File = $gc_StoreFolder & "TopicList.ini";文章列表
Global Const $gc_TopicList_Section = "TopicList"
Global Const $gc_AttachFileExt = "zip|rar|au3";所支持的附件格式,用|隔开
Global Const $gc_AttachFile_FolderName = "Attach"
Global Const $gc_AttachFile_Folder = $gc_StoreFolder & $gc_AttachFile_FolderName & "\"
_CheckCssStyle();初始化 下载 css 文件,css中的 背景图片没做下载
_DownLoad_TopicBySN(77503);下载编号为 77503 的文章
Func _DownLoad_TopicBySN($s_SN)
Local $s_File, $s_Valve = 1
If Not(FileExists($gc_HtmlFileFolder & $s_SN & ".html") And StringLen(_ReadTopicTitleInfoBySN($s_SN))) Then
Local $s_Return = _LocalCssStyle(_DeleteJsScript(_INetGetSource("http://www.autoitscript.com/forum/index.php?act=Print&client=html&f=9&t=" & $s_SN)))
Local $s_Title = StringRegExp($s_Return, "<h3>.*?_.*?_\s*(.*?)\s*</h3>", 3)
If UBound($s_Title) > 0 Then
_RecordTopicInfo($s_Title[0], $s_SN)
Else
$s_Valve = 0
EndIf
Else
$s_Return = FileRead($gc_HtmlFileFolder & $s_SN & ".html")
EndIf
$s_Return = _LocalHtmlImages($s_Return)
$s_Return = _LocalHtmlAttachFile($s_Return)
$s_File = FileOpen($gc_HtmlFileFolder & $s_SN & ".html", 2+8)
FileWrite($s_File, $s_Return)
FileClose($s_File)
EndFunc
Func _INetGetSource($sInetUrl ,$sCodeFormat = 1)
Local $sReturn = _INetGet_Http($sInetUrl)
If IsBinary($sReturn) Then
$sReturn = BinaryToString($sReturn ,$sCodeFormat)
EndIf
Return $sReturn
EndFunc
Func _INetGet_Http($sInetUrl)
Local $WinINet_hDLL = DllOpen("wininet.dll")
If @error Or $WinINet_hDLL = -1 Then Return SetError(1, 0, 0)
Local $FTAcceptTypes = '*/*', $FTAgent = 'Explorer', $FTUserName = '', $FTPort = 80, $FTPostQuery = 'GET', $FTReferer = ''
Local $hSession, $hConnect, $hRequest
Local $sNetName = StringRegExp($sInetUrl, 'http://(.*?)/(.*)', 3)
Local $sHostName = $sNetName[0], $sFileName = $sNetName[1]
Local $hSession = DllCall($WinINet_hDLL,"ptr","InternetOpenW","wstr","","dword",1,"ptr",0,"ptr",0,"dword",0x04000000)
If @error Or Not $hSession[0] Then Return SetError(2, 0, '')
Local $hConnect = DllCall($WinINet_hDLL,"ptr","InternetConnectW","ptr",$hSession[0],"wstr",$sHostName,"dword",0,"ptr",0,"ptr",0,"dword",3,"dword",0,"ptr",0)
If @error Or Not $hConnect[0] Then Return SetError(3, 0, '')
Local $hRequest = DllCall($WinINet_hDLL,"ptr","HttpOpenRequestW","ptr",$hConnect[0],"wstr",$FTPostQuery,"wstr",$sFileName,"wstr","HTTP/1.1","ptr",0,"ptr",0,"dword",0,"ptr",0)
If @error Or Not $hRequest[0] Then Return SetError(4, 0, '')
DllCall($WinINet_hDLL,"int","HttpSendRequestW","ptr",$hRequest[0],"ptr",0,"dword",0,"ptr",0,"dword",0)
If @error Then Return SetError(5, 0, '')
Local $iNumberOfBytesToRead = 128
Local $tNumberOfBytesRead = DllStructCreate("dword")
Local $tBuffer = DllStructCreate("byte[" & $iNumberOfBytesToRead & "]")
Local $nError = 0, $nExtended = 1, $sReturn = '', $tReturn = ''
While BitAND($nError = 0,$nExtended > 0)
$tReturn = DllCall($WinINet_hDLL,"int","InternetReadFile","ptr",$hRequest[0],"ptr",DllStructGetPtr($tBuffer),"dword",$iNumberOfBytesToRead,"ptr",DllStructGetPtr($tNumberOfBytesRead))
$nError = @error
$nExtended = DllStructGetData($tNumberOfBytesRead, 1)
$sReturn &= BinaryMid(DllStructGetData($tBuffer, 1), 1, $nExtended)
WEnd
DllCall($WinINet_hDLL,"int","InternetCloseHandle","ptr",$hRequest[0] )
DllCall($WinINet_hDLL,"int","InternetCloseHandle","ptr",$hConnect[0])
DllCall($WinINet_hDLL,"int","InternetCloseHandle","ptr",$hSession[0])
$sReturn = '0x' & StringRegExpReplace($sReturn, '0x', '')
DllClose($WinINet_hDLL)
Return Binary($sReturn)
EndFunc
Func _DeleteJsScript($s_HtmlSource)
Return StringRegExpReplace($s_HtmlSource, "(?i)(<script[^\xff]*?</script>)", "")
EndFunc
Func _LocalCssStyle($s_HtmlSource)
Return StringRegExpReplace($s_HtmlSource, "(?i)(<style[^\xff]*?</style>)", '<link rel="stylesheet" type="text/css" href="../css.css" />')
EndFunc
Func _ReplaceForLink($s_Link)
Return StringRegExpReplace($s_Link, "&", "&")
EndFunc
Func _LocalHtmlImages($s_HtmlSource)
Local $s_Return = StringRegExp($s_HtmlSource, '(?i)<img src="(http://www\.autoitscript\.com/forum/([^"]*))"', 4), $s_Temp, $s_File, $s_Valve, $s_Path
For $s_I = 0 To UBound($s_Return)-1
$s_Temp = $s_Return[$s_I]
$s_Path = StringReplace($gc_StoreFolder & $s_Temp[2], "/", "\")
If Not FileExists($s_Path) Then
$s_Valve = 1
$s_Temp[1] = _INetGet_Http($s_Temp[1])
If BinaryLen($s_Temp[1]) > 0 Then
$s_File = FileOpen($s_Path, 2+8+16)
FileWrite($s_File, $s_Temp[1])
FileClose($s_File)
Else
$s_Valve = 0
EndIf
Else
$s_Valve = 1
EndIf
If $s_Valve Then
$s_HtmlSource = StringReplace($s_HtmlSource, $s_Temp[0], '<img src="../' & $s_Temp[2] & '"')
ConsoleWrite($s_Temp[0] & @CRLF)
ConsoleWrite('<img src="../' & $s_Temp[2] & '"' & @CRLF)
EndIf
Next
Return $s_HtmlSource
EndFunc
Func _LocalHtmlAttachFile($s_HtmlSource)
Local $s_Return = StringRegExp($s_HtmlSource, '(?i)<a[^>]*href="(http://www\.autoitscript\.com/forum/[^"]*?)"[^>]*>(.*?\.(?i:' & $gc_AttachFileExt & '))</a>', 4), $s_Temp, $s_File, $s_Valve, $s_Path
For $s_I = 0 To UBound($s_Return)-1
$s_Temp = $s_Return[$s_I]
$s_Path = $gc_AttachFile_Folder & $s_Temp[2]
If Not FileExists($s_Path) Then
$s_Valve = 1
$s_Temp[1] = _INetGet_Http(_ReplaceForLink($s_Temp[1]))
If BinaryLen($s_Temp[1]) > 0 Then
$s_File = FileOpen($s_Path, 2+8+16)
FileWrite($s_File, $s_Temp[1])
FileClose($s_File)
Else
$s_Valve = 0
EndIf
Else
$s_Valve = 1
EndIf
If $s_Valve Then
$s_Temp = $s_Return[$s_I]
$s_Temp[0] = StringLeft($s_Temp[0], StringInStr($s_Temp[0], $s_Temp[1]) - 1)
$s_HtmlSource = StringReplace($s_HtmlSource, $s_Temp[0] & $s_Temp[1], $s_Temp[0] & '../' & $gc_AttachFile_FolderName & "/" & $s_Temp[2])
ConsoleWrite($s_Temp[0] & $s_Temp[1] & @CRLF)
ConsoleWrite($s_Temp[0] & '../' & $gc_AttachFile_FolderName & "/" & $s_Temp[2] & @CRLF)
EndIf
Next
Return $s_HtmlSource
EndFunc
Func _RecordTopicInfo($s_Title, $s_SN)
IniWrite($gc_TopicList_File, $gc_TopicList_Section, $s_SN, $s_Title)
EndFunc
Func _ReadTopicTitleInfoBySN($s_SN)
Local $s_Return = IniRead($gc_TopicList_File, $gc_TopicList_Section, $s_SN, ""), $s_Error = 0
If StringLen($s_Return) = 0 Then
$s_Error = 1
EndIf
Return SetError($s_Error, 0, $s_Return)
EndFunc
Func _CheckCssStyle()
Local $s_Path = $gc_StoreFolder & "css.css"
If Not FileExists($s_Path) Then
Local $s_Return = _INetGet_Http("http://www.autoitscript.com/forum/style_images/css_14.css")
If BinaryLen($s_Return) > 0 Then
Local $s_File = FileOpen($s_Path, 2+8+16)
FileWrite($s_File, $s_Return)
FileClose($s_File)
EndIf
EndIf
EndFunc
|