#include <IE.au3>
#include <INet.au3>
#include <file.au3>
#Region public var
; 文件夹
Global Const $gc_floder_ini = @ScriptDir & "\ini" ; ini 的文件夹
; 网址
Global Const $gc_url_google = 'http://www.google.com' ; google
Global Const $gc_url_google_dir = $gc_url_google & '/dirhp' ; google directory
;文件名
Global Const $gc_name_index = "Index" ; Index
Global Const $gc_ini_index = $gc_floder_ini & '\' & $gc_name_index & '.ini'
Global Const $gc_name_myDate = @YEAR & "-" & @MON & "-" & @MDAY & " " & @HOUR ;2009-09-11 03
#EndRegion public var
_check_Initial()
;获取单独下级分类
$sUrl = 'http://www.google.com/Top/Arts/'
_test($sUrl)
Func _test($sUrl)
;$sUrl = $gc_url_google & $sUrl
$sText = _INetGetSource($sUrl) ;获取源代码
;正则数组
$regex_sub = '<a href="([^"]+)">(?:<b>)?([^<]+)(?:</b>)?</a> <font color=#6f6f6f size=-1>\(\d+\)</font>'
$aDirs = StringRegExp($sText,$regex_sub,3)
;检查错误
If @error Then
MsgBox(0,'Wrong','regex error')
Else
;获取大类名称
$sMailDirName = StringRegExp($sUrl,'http://www\.google\.com/Top/([^/]+)/',3)
$sMailDirName = $sMailDirName[0]
ConsoleWrite('$sMailDirName:' & $sMailDirName & @CRLF)
$sUrl_parent = StringReplace($sUrl,'http://www.google.com','')
;<== 获取大类名称
; ini
$ini_subdir = $gc_floder_ini & '\' & $sMailDirName & '.ini'
;loop dir
For $iDir=0 To UBound($aDirs) / 2 -1
$sUrl_sub = $aDirs[2 * $iDir]
$sName_sub = $aDirs[2 * $iDir + 1]
;格式化链接
If Not StringInStr($sUrl_sub,'/Top/') Then
$sUrl_sub = $sUrl_parent & $sUrl_sub
EndIf
;write to ini
IniWrite($ini_subdir,$sMailDirName,$sName_sub,$sUrl_sub)
Next ;<== loop dir
EndIf;<== 检查错误
EndFunc
;首页大类
Func _index()
$sText = _INetGetSource($gc_url_google_dir)
;ConsoleWrite($sText & @CRLF)
$aDirs = StringRegExp($sText,'href="/Top/([^/]+)/">',3)
If @error Then
MsgBox(0,'Wrong','regex error')
Else
;loop dir
For $iDir=0 To UBound($aDirs)-1
$sDir = $aDirs[$iDir]
If $sDir<>'World' Then
IniWrite($gc_ini_index,$gc_name_index,$sDir,'/Top/' & $sDir & '/')
EndIf
Next ;<== loop dir
EndIf
EndFunc
Func _check_Initial()
;检测 site 文件夹是否存在
If Not FileExists($gc_floder_ini) Then ; ini path
DirCreate($gc_floder_ini)
EndIf
EndFunc ;==>_check_Initial
|