求教获取匹配的字符串数量的高效代码(已解决)
本帖最后由 阿福 于 2010-12-21 19:01 编辑获得字符串$str1中$str2和$str3分别有多少个.我试了5种写法,其中方法1最快,但结果还是不理想,最理想是达到个位数的毫秒用时.
请教用时更短的写法.效率提高5倍以上的加300以上"金钱",其他有提高效率的加10-100"金钱"不等.恳请各位大侠不吝赐教.
LOG.TXT和au3代码下载地址: http://210.34.80.96/wbwj/ocr/Help.rar(也可在本贴下载,不过会浪费"金钱")
;原串
$str1 = FileRead("log.txt");因为原串太长了,所以放在文件中,不参与计算用时.不用在这里想办法.
;要找的串
$str2 = "F9F4F0F9F4F00000AA0000AA0000AA0000AA0000AA0000AA0000AA0000AA0000AA0000AA0000AA0000AAF9F4F00000AA"
$str3 = "F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0"
;方法1:
$t=TimerInit()
StringReplace($str1, $str2, "",0, 1)
$iNum2 = @extended
$t2=TimerDiff($t)
StringReplace($str1, $str3, "",0, 1)
$iNum3 = @extended
$t3=TimerDiff($t)
ConsoleWrite("总用时"&$t3&", str2找到" & $iNum2 & "个,用时:" & $t2 &",str3找到"&$iNum3&"个,用时"&$t3-$t2& @CRLF)
;方法2:
$t=TimerInit()
$array = StringRegExp($str1, $str2, 3)
$iNum2 = UBound($array)
$t2=TimerDiff($t)
$array = StringRegExp($str1, $str3, 3)
$iNum3 = UBound($array)
$t3=TimerDiff($t)
ConsoleWrite("总用时"&$t3&", str2找到" & $iNum2 & "个,用时:" & $t2 &",str3找到"&$iNum3&"个,用时"&$t3-$t2& @CRLF)
;方法3:
$t=TimerInit()
StringRegExpReplace($str1, $str2, "")
$iNum2 = @extended
$t2=TimerDiff($t)
StringRegExpReplace($str1, $str3, "")
$iNum3 = @extended
$t3=TimerDiff($t)
ConsoleWrite("总用时"&$t3&", str2找到" & $iNum2 & "个,用时:" & $t2 &",str3找到"&$iNum3&"个,用时"&$t3-$t2& @CRLF)
;方法4:
$t=TimerInit()
$iNum2 = 0
$iPos = 1
$iLen = 1;StringLen($str2)
Do
$iPos = StringInStr($str1, $str2, 1, 1, $iPos+$iLen)
If $iLen=1 Then $iLen=StringLen($str2)
$iNum2 += 1
Until $iPos = 0
$iNum2 -= 1
$t2=TimerDiff($t)
$iNum3 = 0
$iPos = 1
$iLen = 1;StringLen($str3)
Do
$iPos = StringInStr($str1, $str3, 1, 1, $iPos+$iLen)
If $iLen=1 Then $iLen=StringLen($str3)
$iNum3 += 1
Until $iPos = 0
$iNum3 -= 1
$t3=TimerDiff($t)
ConsoleWrite("总用时"&$t3&", str2找到" & $iNum2 & "个,用时:" & $t2 &",str3找到"&$iNum3&"个,用时"&$t3-$t2& @CRLF)
;方法5:
$t=TimerInit()
$array = StringSplit($str1, $str2, 1)
$iNum2 = UBound($array)-2
$t2=TimerDiff($t)
$array = StringSplit($str1, $str3, 1)
$iNum3 = UBound($array)-2
$t3=TimerDiff($t)
ConsoleWrite("总用时"&$t3&", str2找到" & $iNum2 & "个,用时:" & $t2 &",str3找到"&$iNum3&"个,用时"&$t3-$t2& @CRLF)
传说中的验证码大师?阿福!{:face (461):} 当水平达到某种程度,效率和算法就是其所追求的目标。 这段代码的其中一项应用是提高纯au3搜图的效率,如:
http://210.34.80.96/wbwj/ocr/BitmapSearch.rar
想解决诸如此类用时有较大波动,有时超过10毫秒(不同电脑的用时不同,以我电脑为例)的瓶颈.
#Include <GDIPlus.au3>
Opt('MustDeclareVars', 1)
Local $s_dir = @ScriptDir
If StringRight($s_dir,1)="\" Then $s_dir = StringTrimRight($s_dir,1);根目录
Local $sBmpFile1=$s_dir&"\00.bmp"
Local $sBmpFile2=$s_dir&"\11.bmp"
_GDIPlus_Startup ()
Local $hBitmap1 = _GDIPlus_BitmapCreateFromFile($sBmpFile1)
Local $aBmp1 = _myReadBitmapMsg($hBitmap1, 1);
Local $hBitmap2 = _GDIPlus_BitmapCreateFromFile($sBmpFile2)
Local $aBmp2 = _myReadBitmapMsg($hBitmap2, 1)
Local $t=TimerInit()
Local $aPosMsg = ArrayComp($aBmp1, $aBmp2, False, 16);
ConsoleWrite("找到1个图片块用去时间:"&TimerDiff($t)&'毫秒,位置信息(为空未找到):'&$aPosMsg&@CRLF)
$t=TimerInit()
$aPosMsg = ArrayComp($aBmp1, $aBmp2, True, 16);
ConsoleWrite("找到所有图片块用去时间:"&TimerDiff($t)&'毫秒,位置信息(为空未找到):'&$aPosMsg&@CRLF)
If $aPosMsg<>"" Then
Local $aPos, $i, $hGraphics = _GDIPlus_ImageGetGraphicsContext ($hBitmap1)
$aPosMsg = StringSplit($aPosMsg,"|",2)
For $i = 0 To UBound($aPosMsg)-1
$aPos = StringSplit($aPosMsg[$i],",",2)
_GDIPlus_GraphicsDrawRect($hGraphics, $aPos, $aPos, $aPos, $aPos)
Next
_GDIPlus_ImageSaveToFile($hBitmap1, $s_dir&"\Target.bmp")
_GDIPlus_GraphicsDispose ($hGraphics)
EndIf
_GDIPlus_ImageDispose ($hBitmap1)
_WinAPI_DeleteObject ($hBitmap1)
_GDIPlus_ImageDispose ($hBitmap2)
_WinAPI_DeleteObject ($hBitmap2)
_GDIPlus_ShutDown ()
Exit
Func ArrayComp($array1, $array2, $SearchAll=False, $iY=0)
If UBound($array1)<UBound($array2) Then Return ""
Local $s_re="", $y, $y2,$iW2=StringLen($array2[$iY]), $iPos;, $iOc
For $y = $iy To UBound($array1)-1
$iPos = 0;$iOc = 1,新版本StringInStr支持Start(The starting position of the search),进行改造
While $y+UBound($array2)<=UBound($array1)
$iPos = StringInStr($array1[$y], $array2[$iY], 1, 1, $iPos+1);$iOc
Select
Case $iPos = 0
ContinueLoop(2)
Case Mod($iPos-1,6)<>0 ;Or $y<$iy
; $iOc += 1
ContinueLoop
EndSelect
;ConsoleWrite($iPos&@crlf)
;$iW2 = StringLen($array2[$iY])
For $y2 = $iY To UBound($array2)-1
If StringMid($array1[$y+$y2-$iy], $iPos, $iW2)<>$array2[$y2] Then
; $iOc += 1
ContinueLoop(2)
EndIf
Next
For $y2 = 0 To $iY-1
;ConsoleWrite($y+$y2-$iy &','&@crlf)
If StringMid($array1[$y+$y2-$iy], $iPos, $iW2)<>$array2[$y2] Then
; $iOc += 1
ContinueLoop(2)
EndIf
Next
If $SearchAll Then
$s_re &= ($iPos-1)/6&','&$y-$iy&','&$iW2/6&','&UBound($array2)&"|"
Else
Return ($iPos-1)/6&','&$y-$iy&','&$iW2/6&','&UBound($array2)
EndIf
;$iOc += 1
WEnd
Next
If StringRight($s_re,1)="|" Then $s_re = StringTrimRight($s_re,1)
Return $s_re
EndFunc
Func _myReadBitmapMsg($hBitmap, $ReturnType=1);
Local $aBmpData
$aBmpData = _GDIPlus_ImageGetWidth ($hBitmap)
$aBmpData = _GDIPlus_ImageGetHeight ($hBitmap)
;Local $aGdip=_GDIPlus_ImageGetPixelFormat($hBitmap);暂时不用
Local $BitmapData = _GDIPlus_BitmapLockBits($hBitmap, 0, 0, $aBmpData, $aBmpData, $GDIP_ILMREAD, $GDIP_PXF24RGB);$aGdip);
$aBmpData = Abs(DllStructGetData($BitmapData, "Stride"))
Local $Scan0 = DllStructGetData($BitmapData, "Scan0");
Local $pixelData = DllStructCreate("ubyte lData[" & ($aBmpData * $aBmpData) & "]", $Scan0)
$aBmpData = DllStructGetData($pixelData, "lData")
_GDIPlus_BitmapUnlockBits($hBitmap, $BitmapData)
Select
Case $ReturnType = -1
Return $aBmpData
Case $ReturnType=0
$aBmpData = StringRegExpReplace($aBmpData, "(?:0x)?({"&$aBmpData*6&"}).{"&$aBmpData*2-$aBmpData*6&"}", "$1")
Return $aBmpData
Case Else
Return StringRegExp($aBmpData, "({"&$aBmpData*6&"}).{"&$aBmpData*2-$aBmpData*6&"}", 3)
EndSelect
EndFunc
我试了好几种方法,结果都不尽人意,看来只能期待高手出现了。 能想到的方法前辈都试过了,没辙了… 本帖最后由 pusofalse 于 2010-12-21 18:48 编辑
对于没有特殊算法的问题,使用机器语言可能会稍快一些。新开了两个工作线程用于匹配子字符串,测试总用时35ms左右。只能用于32位系统,并且只能搜索纯文本Ansi数据,区分大小写。
#include <Thread.au3>
Const $tagMATCH_STRING = "ptr BaseString;ptr SubString;dword SubStringLength"
Local $sFilePath, $hFile, $hSection, $pBaseAddress, $hThread
Local $pMatchStringA, $sSubString, $iLength, $tBuffer, $pBuffer, $iTimer
$sFilePath = "log.txt" ; 文件路径
$hFile = DllCall("Kernel32.dll", "handle", "CreateFile", "str", $sFilePath, _
"dword", 0xC0000000, "dword", 3, "dword", 0, "dword", 3, _
"dword", 0, "handle", 0)
$hFile = $hFile
; 将文件映射至内存
$hSection = _RTCreateSection(0, $RT_SEC_COMMIT, $RT_PAGE_READWRITE, $RT_SECTION_ALL_ACCESS, 0, $hFile)
$pBaseAddress = _RTMapViewOfSection($hSection, $RT_SECTION_ALL_ACCESS)
$pMatchStringA = _MatchStringA()
; 子字符串
$sSubString1 = "F9F4F0F9F4F00000AA0000AA0000AA0000AA0000AA0000AA0000AA0000AA0000AA0000AA0000AA0000AAF9F4F00000AA"
$sSubString2 = "F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0F9F4F0"
$iLength1 = StringLen($sSubString1)
$iLength2 = StringLen($sSubString2)
$pBuffer1 = _RTHeapAlloc(16 + $iLength1)
$tBuffer1 = DllStructCreate($tagMATCH_STRING, $pBuffer1)
DllStructSetData($tBuffer1, "BaseString", $pBaseAddress)
DllStructSetData($tBuffer1, "SubString", $pBuffer1 + 12)
DllStructSetData($tBuffer1, "SubStringLength", $iLength1)
_RTWriteBytes($pBuffer1 + 12, $sSubString1, "str", $iLength1)
$pBuffer2 = _RTHeapAlloc(16 + $iLength2)
$tBuffer2 = DllStructCreate($tagMATCH_STRING, $pBuffer2)
DllStructSetData($tBuffer2, "BaseString", $pBaseAddress)
DllStructSetData($tBuffer2, "SubString", $pBuffer2 + 12)
DllStructSetData($tBuffer2, "SubStringLength", $iLength2)
_RTWriteBytes($pBuffer2 + 12, $sSubString2, "str", $iLength2)
$iTimer = TimerInit()
; 创建工作线程。
$hThread1 = _RTCreateThread($pMatchStringA, $pBuffer1)
$hThread2 = _RTCreateThread($pMatchStringA, $pBuffer2)
; 等待匹配完成。
_RTWaitForObject($hThread1)
_RTWaitForObject($hThread2)
; 获取子串的数量。
$iCount1 = DllCall("Kernel32.dll", "bool", "GetExitCodeThread", "handle", $hThread1, "dword*", 0)
$iCount2 = DllCall("Kernel32.dll", "bool", "GetExitCodeThread", "handle", $hThread2, "dword*", 0)
MsgBox(0, $iCount1 & " " & $iCount2, TimerDiff($iTimer))
_RTHeapFree($pBuffer)
_RTCloseHandle($hFile)
_RTCloseHandle($hSection)
_RTCloseHandle($hThread1)
_RTCloseHandle($hThread2)
_RTUnmapViewOfSection($pBaseAddress)
Func _MatchStringA()
Local Static $pStartAddr, $bCode, $pProcedures
If $pStartAddr Then Return $pStartAddr
$pStartAddr = _RTVirtualAlloc(1024)
$pProcedures = $pStartAddr + 52
$bCode = "0x" & _
"55" & _ ; push ebp
"8BEC" & _ ; mov ebp, esp
"8B4508" & _ ; mov eax, dword ptr
"85C0" & _ ; test eax, eax
"7423" & _ ; jz $+23
"56" & _ ; push esi
"8BF0" & _ ; mov esi, eax
"8B06" & _ ; mov eax, dword ptr
"57" & _ ; push edi
"33FF" & _ ; xor edi, edi
"FF7604" & _ ; push dword ptr
"50" & _ ; push eax
"FF15" & _RTLongPtrToBytes($pProcedures) & _ ; call dword ptr [&strstr]
"83C408" & _ ; add esp, 8
"85C0" & _ ; test eax, eax
"7406" & _ ; jz $+6
"47" & _ ; inc edi
"034608" & _ ; add eax, dword ptr
"EBE9" & _ ; jmp $-17
"8BC7" & _ ; mov eax, edi
"5F" & _ ; pop edi
"5E" & _ ; pop esi
"5D" & _ ; pop ebp
"C20400" & _ ; ret 4
"909090" & _ ; nop
_RTLongPtrToBytes(_RTGetProcAddress("msvcrt.dll", "strstr"))
_RTInject($pStartAddr, $bCode)
Return $pStartAddr
EndFunc ;==>_MatchStringA
Thread.au3 - http://www.autoitx.com/forum.php?mod=viewthread&tid=18153&page=1&extra= 很强大!机器语言确实会快一些!向牛人学习! 本帖最后由 阿福 于 2010-12-21 19:00 编辑
非常感谢pusofalse 及各位前辈.
虽然pusofalse 前辈不缺"钱",我还是要表达一下敬意. 只是我一次加不了那么多,不好意思了. 回复 9# 阿福
客气。 都是大大前辈级,小弟都不好意思露脸!!! 呵呵,正需要这个哈 拜读福大,P大。
马克学习 大人物聚会,露个脸,混各脸熟! 阿福:Func _myReadBitmapMsg()里的
$tBuff = DllStructCreate("byte[" & ($aBmpData*3) & "]", $aBmpData + ($iH-1)*$aBmpData)
$aRet[$iH-1] = StringTrimLeft(DllStructGetData($tBuff, 1), 2)
看不懂,请大家不吝赐教
页:
[1]
2