处男1 发表于 2014-2-8 18:23:05

验证码识别初级

本帖最后由 处男1 于 2014-2-8 18:26 编辑

#include <GDIPlus.au3>

_GDIPlus_Startup ()

$sBmpFile = @ScriptDir & "\028.bmp";要识别的文件
$hBitmap = _GDIPlus_BitmapCreateFromFile($sBmpFile)

$i_Left = 8
$i_Top = 4
$i_Width = 7
$i_Height = 11
$i_Between = 1

$sOcr = ""
For $i=1 To 4
        $hClone = _GDIPlus_BitmapCloneArea($hBitmap, $i_Left + ($i-1) * $i_Width, $i_Top, $i_Width - $i_Between, $i_Height, $GDIP_PXF24RGB);取出单个验证码位图信息
        $aBmpData = _myReadBitmapMsg($hClone,0);颜色信息读为串
        ;ConsoleWrite($aBmpData&@crlf)
        Switch $aBmpData
                Case "F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400C86400C86400F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0C86400C86400C86400C86400F5F3F0"
                        $sOcr &= "0"
                Case "F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0C86400F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0"
                        $sOcr &= "1"
                Case "F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400C86400C86400F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400C86400C86400C86400C86400"
                        $sOcr &= "2"
                Case "F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400C86400C86400F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0C86400C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0C86400C86400C86400C86400F5F3F0"
                        $sOcr &= "3"
                Case "F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0C86400F5F3F0C86400F5F3F0F5F3F0F5F3F0C86400F5F3F0C86400F5F3F0F5F3F0C86400F5F3F0F5F3F0C86400F5F3F0F5F3F0C86400F5F3F0F5F3F0C86400F5F3F0C86400C86400C86400C86400C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0"
                        $sOcr &= "4"
                Case "F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400C86400C86400C86400F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400C86400C86400C86400F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0C86400C86400C86400C86400F5F3F0"
                        $sOcr &= "5"
                Case "F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400C86400C86400F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0C86400C86400C86400F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0C86400C86400C86400C86400F5F3F0"
                        $sOcr &= "6"
                Case "F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400C86400C86400C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0"
                        $sOcr &= "7"
                Case "F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400C86400C86400F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0C86400C86400C86400C86400F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0C86400C86400C86400C86400F5F3F0"
                        $sOcr &= "8"
                Case "F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400C86400C86400F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0C86400C86400C86400F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0F5F3F0F5F3F0F5F3F0C86400C86400F5F3F0F5F3F0F5F3F0C86400F5F3F0F5F3F0C86400C86400C86400F5F3F0F5F3F0"
                        $sOcr &= "9"
        EndSwitch
Next
ConsoleWrite("识别结果:" & $sOcr & @crlf)

_GDIPlus_ImageDispose ($hClone)
_WinAPI_DeleteObject ($hClone)
_GDIPlus_ImageDispose ($hBitmap)
_WinAPI_DeleteObject ($hBitmap)
_GDIPlus_ShutDown ()
Exit


Func _myReadBitmapMsg($hBitmap, $ReturnType=1);
        Local $aBmpData
    $aBmpData = _GDIPlus_ImageGetWidth ($hBitmap)

    $aBmpData = _GDIPlus_ImageGetHeight ($hBitmap)

        Local $BitmapData = _GDIPlus_BitmapLockBits($hBitmap, 0, 0, $aBmpData, $aBmpData, $GDIP_ILMREAD, $GDIP_PXF24RGB)
        $aBmpData = Abs(DllStructGetData($BitmapData, "Stride"))

        $aBmpData = DllStructGetData($BitmapData, "Scan0")

        _GDIPlus_BitmapUnlockBits($hBitmap, $BitmapData)
        $ReturnType=Int($ReturnType)
        If $ReturnType<-2 Or $ReturnType>1 Then $ReturnType=1
        Local $tBuff
        Select
        Case $ReturnType = -2;仅返回位图信息
                MsgBox(0,3,3)
                        Return $aBmpData
                Case $ReturnType = -1;返回位图信息及颜色串(Binary)
                        MsgBox(0,2,2)
                        $tBuff = DllStructCreate("byte lData[" & ($aBmpData * $aBmpData) & "]", $aBmpData)
                        $aBmpData = DllStructGetData($tBuff, "lData")
                        Return $aBmpData
                Case $ReturnType=0;返回位图信息及颜色串(String)
                        MsgBox(0,1,1)
                        Local $iH
                        For $iH = 1 To $aBmpData
                                $tBuff = DllStructCreate("byte 2Data[" & ($aBmpData*3) & "]", $aBmpData + ($iH-1)*$aBmpData)
                                $aBmpData &=StringTrimLeft(DllStructGetData($tBuff, "2Data"),2)
;~                                 MsgBox(0,1,$aBmpData)
                        Next
                        Return $aBmpData
                Case Else;返回颜色一维数组
                        MsgBox(0,4,4)
                        Local $iH, $aRet[$aBmpData]
                        For $iH = 1 To $aBmpData
                                $tBuff = DllStructCreate("byte[" & ($aBmpData*3) & "]", $aBmpData + ($iH-1)*$aBmpData)
                                $aRet[$iH-1] = StringTrimLeft(DllStructGetData($tBuff, 1), 2)
                        Next
                        Return $aRet
        EndSelect
EndFunc以上代码为阿福之前发布的,在这里我作为一个例子和初学者一起分析。

$i_Left = 8
$i_Top = 4
$i_Width = 7
$i_Height = 11
$i_Between = 1,,,,,,,,,,,,,,,,定义验证码每个数字的大小,为了取出每个验证码上数字的位图信息。

_GDIPlus_BitmapCloneArea,,,,,,,,,创建一个位图,为了之后获取位图相关信息。

为什么要锁定_GDIPlus_BitmapLockBits,因为位图在内存中根据他的格式锁定后,可以对图像进行修改,(和PHOTOshop类似)。能进行修改才能把图片转化成字符。

锁定之后,就要进行转化了。首先要获取位图的的跨度。跨度【(STRIDE)DllStructGetData($BitmapData, "Stride")】跨度就是一个图像扫描第一行的长度,但是他不等于像素,比如一个50*50的32位图像,它的跨度就是WIDTH*4=200,因为32位的图像中每个像素是4个字节,也就是说STRIDE的单位是字节。比如一个11*20的24位图像,他的跨度就是11*3+3=36,因为24位他的每个像素为3个字节,又因为stride的值必须是4的倍数,不够4的倍数要补值。所以*3还是*4要看$hClone = _GDIPlus_BitmapCloneArea($hBitmap, $i_Left + ($i-1) * $i_Width, $i_Top, $i_Width - $i_Between, $i_Height, $GDIP_PXF24RGB)中$GDIP_PXF24RGB【本文纯分析阿福这段代码】


获取了每个验证码数字后就要获取他在内存中的首地址。scan0就是该位图在内存中的首地址。也就是说该图像的数据都以字符形式保留在了内存中,找到了首地址,就找到了验证码数字的最上面一行的信息,因为数字是width*height,所以要找到整个数字的数据不能只扫描第一行的STRDE,应该扫描HEIgHT个行。


这样,就把一个图像数字化了。

zhongzijie 发表于 2014-2-8 19:23:10

好強大啊,謝謝分享。

lilachue 发表于 2014-12-20 11:29:13

不错哈,虽然没看明白,还是谢谢

jiaosc 发表于 2015-6-3 09:01:36

謝謝分享。
页: [1]
查看完整版本: 验证码识别初级