本帖最后由 gto250 于 2012-6-2 20:38 编辑
先附源码
#Include <Array.au3>
#include <ButtonConstants.au3>
#include <EditConstants.au3>
#include <GUIConstantsEx.au3>
#include <WindowsConstants.au3>
$Form1 = GUICreate("Form1", 968, 304, 342, 220)
$Edit1 = GUICtrlCreateEdit("", 8, 13, 233, 169)
GUICtrlSetData(-1, StringFormat("<Row ss:AutoFitHeight="&Chr(34)&"0"&Chr(34)&" ss:Height="&Chr(34)&"21.9375"&Chr(34)&" ss:StyleID="&Chr(34)&"s30"&Chr(34)&">\r\n <Cell ss:StyleID="&Chr(34)&"s29"&Chr(34)&"><Data ss:Type="&Chr(34)&"Number"&Chr(34)&">1</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s29"&Chr(34)&"><Data ss:Type="&Chr(34)&"Number"&Chr(34)&">500033659</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s29"&Chr(34)&"><Data ss:Type="&Chr(34)&"String"&Chr(34)&">电杆</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s29"&Chr(34)&"><Data ss:Type="&Chr(34)&"String"&Chr(34)&">锥形水泥杆,非预应力,整根杆,10m,190mm,I</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s29"&Chr(34)&"><Data ss:Type="&Chr(34)&"Number"&Chr(34)&">2</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s29"&Chr(34)&"><Data ss:Type="&Chr(34)&"String"&Chr(34)&">基</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s29"&Chr(34)&"><Data ss:Type="&Chr(34)&"String"&Chr(34)&">Ф190*10非</Data></Cell>\r\n </Row> "))
$Edit2 = GUICtrlCreateEdit("", 248, 13, 233, 169)
GUICtrlSetData(-1, StringFormat("<Row ss:AutoFitHeight="&Chr(34)&"0"&Chr(34)&" ss:Height="&Chr(34)&"18.75"&Chr(34)&">\r\n <Cell ss:StyleID="&Chr(34)&"s24"&Chr(34)&" ss:Formula="&Chr(34)&"=ROW()-2"&Chr(34)&"><Data ss:Type="&Chr(34)&"Number"&Chr(34)&">1</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s24"&Chr(34)&"><Data ss:Type="&Chr(34)&"Number"&Chr(34)&">500034429</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s24"&Chr(34)&"><Data ss:Type="&Chr(34)&"String"&Chr(34)&">横担</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s24"&Chr(34)&"><Data ss:Type="&Chr(34)&"String"&Chr(34)&">L6*60*1500</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s24"&Chr(34)&"><Data ss:Type="&Chr(34)&"Number"&Chr(34)&">6</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s24"&Chr(34)&"><Data ss:Type="&Chr(34)&"String"&Chr(34)&">块</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s24"&Chr(34)&"><Data ss:Type="&Chr(34)&"String"&Chr(34)&">线路角铁横担,∠60×6,1500mm,不计孔距,中间</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s24"&Chr(34)&"><Data ss:Type="&Chr(34)&"String"&Chr(34)&"></Data></Cell>\r\n </Row>"))
$Edit3 = GUICtrlCreateEdit("", 489, 13, 233, 169)
GUICtrlSetData(-1, StringFormat(" <Row ss:AutoFitHeight="&Chr(34)&"0"&Chr(34)&" ss:Height="&Chr(34)&"21.9375"&Chr(34)&" ss:StyleID="&Chr(34)&"s30"&Chr(34)&">\r\n <Cell ss:StyleID="&Chr(34)&"s29"&Chr(34)&"><Data ss:Type="&Chr(34)&"Number"&Chr(34)&">1</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s29"&Chr(34)&"><Data ss:Type="&Chr(34)&"Number"&Chr(34)&">500033659</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s29"&Chr(34)&"><Data ss:Type="&Chr(34)&"String"&Chr(34)&">电杆</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s29"&Chr(34)&"><Data ss:Type="&Chr(34)&"String"&Chr(34)&">锥形水泥杆,非预应力,整根杆,10m,190mm,I</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s29"&Chr(34)&"><Data ss:Type="&Chr(34)&"Number"&Chr(34)&">2</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s29"&Chr(34)&"><Data ss:Type="&Chr(34)&"String"&Chr(34)&">基</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s29"&Chr(34)&">\r\n </Row> "))
$Edit4 = GUICtrlCreateEdit("", 729, 13, 233, 169)
GUICtrlSetData(-1, StringFormat(" <Row ss:AutoFitHeight="&Chr(34)&"0"&Chr(34)&" ss:Height="&Chr(34)&"18.75"&Chr(34)&">\r\n <Cell ss:StyleID="&Chr(34)&"s24"&Chr(34)&" ss:Formula="&Chr(34)&"=ROW()-2"&Chr(34)&"><Data ss:Type="&Chr(34)&"Number"&Chr(34)&">1</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s24"&Chr(34)&"><Data ss:Type="&Chr(34)&"Number"&Chr(34)&">500034429</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s24"&Chr(34)&"><Data ss:Type="&Chr(34)&"String"&Chr(34)&">横担</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s24"&Chr(34)&"><Data ss:Type="&Chr(34)&"String"&Chr(34)&">L6*60*1500</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s24"&Chr(34)&"><Data ss:Type="&Chr(34)&"Number"&Chr(34)&">6</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s24"&Chr(34)&"><Data ss:Type="&Chr(34)&"String"&Chr(34)&">块</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s24"&Chr(34)&"><Data ss:Type="&Chr(34)&"String"&Chr(34)&">线路角铁横担,∠60×6,1500mm,不计孔距,中间</Data></Cell>\r\n <Cell ss:StyleID="&Chr(34)&"s24"&Chr(34)&">\r\n </Row>"))
$Button1 = GUICtrlCreateButton("(1)匹配1和2", 24, 200, 75, 25)
$Button2 = GUICtrlCreateButton("(2)匹配1和2", 118, 202, 75, 25)
$Button3 = GUICtrlCreateButton("Button1", 221, 203, 75, 25)
$Button4 = GUICtrlCreateButton("Button1", 312, 203, 75, 25)
GUISetState(@SW_SHOW)
While 1
$nMsg = GUIGetMsg()
Switch $nMsg
Case $GUI_EVENT_CLOSE
Exit
Case $Button1
$c=str_arr(GUICtrlRead($Edit1))
_ArrayDisplay($c)
Case $Button2
$c=str_arr(GUICtrlRead($Edit2))
_ArrayDisplay($c)
EndSwitch
WEnd
Func str_arr($str)
Local $arr[1]
$arr[0]="序号 物料代码 材料名称 规格型号 数量 单位 备注"
$reg_str_1="<Row.*>\s*<Cell.*><Data.*>(.*)</Data></Cell>\s*<Cell.*><Data.*>(.*)</Data></Cell>\s*<Cell.*><Data.*>(.*)</Data></Cell>\s*<Cell.*><Data.*>(.*)</Data></Cell>\s*<Cell.*><Data.*>(.*)</Data></Cell>\s*<Cell.*><Data.*>(.*)</Data></Cell>\s*<Cell.*><Data.*>(.*)</Data></Cell>\s*</Row>"
$reg_str_2="<Row.*>\s*<Cell.*><Data.*>(.*)</Data></Cell>\s*<Cell.*><Data.*>(.*)</Data></Cell>\s*<Cell.*><Data.*>(.*)</Data></Cell>\s*<Cell.*><Data.*>(.*)</Data></Cell>\s*<Cell.*><Data.*>(.*)</Data></Cell>\s*<Cell.*><Data.*>(.*)</Data></Cell>\s*<Cell.*><Data.*>(.*)</Data></Cell>\s*<Cell.*><Data.*>(.*)</Data></Cell>\s*</Row>"
Local $array
Local $nOffset = 1
While 1
$array = StringRegExp($str, $reg_str_1, 1, $nOffset)
If @error=1 Then
$array = StringRegExp($str, $reg_str_2, 1, $nOffset)
EndIf
If @error = 0 Then
$nOffset = @extended
Else
ExitLoop
EndIf
If $array[0]<>"序号" Then
$c=_ArrayToString($array,@TAB)
_ArrayAdd($arr,$c)
EndIf
WEnd
Return $arr
EndFunc
我想实现的是对一些文本文件的匹配工作,因为格式的问题,正则并不能完全起工作,文本的格式有以下四种
1、
<Row ss:AutoFitHeight="0" ss:Height="21.9375" ss:StyleID="s30">
<Cell ss:StyleID="s29"><Data ss:Type="Number">1</Data></Cell>
<Cell ss:StyleID="s29"><Data ss:Type="Number">500033659</Data></Cell>
<Cell ss:StyleID="s29"><Data ss:Type="String">电杆</Data></Cell>
<Cell ss:StyleID="s29"><Data ss:Type="String">锥形水泥杆,非预应力,整根杆,10m,190mm,I</Data></Cell>
<Cell ss:StyleID="s29"><Data ss:Type="Number">2</Data></Cell>
<Cell ss:StyleID="s29"><Data ss:Type="String">基</Data></Cell>
<Cell ss:StyleID="s29"><Data ss:Type="String">Ф190*10非</Data></Cell>
</Row>
2、
<Row ss:AutoFitHeight="0" ss:Height="18.75">
<Cell ss:StyleID="s24" ss:Formula="=ROW()-2"><Data ss:Type="Number">1</Data></Cell>
<Cell ss:StyleID="s24"><Data ss:Type="Number">500034429</Data></Cell>
<Cell ss:StyleID="s24"><Data ss:Type="String">横担</Data></Cell>
<Cell ss:StyleID="s24"><Data ss:Type="String">L6*60*1500</Data></Cell>
<Cell ss:StyleID="s24"><Data ss:Type="Number">6</Data></Cell>
<Cell ss:StyleID="s24"><Data ss:Type="String">块</Data></Cell>
<Cell ss:StyleID="s24"><Data ss:Type="String">线路角铁横担,∠60×6,1500mm,不计孔距,中间</Data></Cell>
<Cell ss:StyleID="s24"><Data ss:Type="String"></Data></Cell>
</Row>
3、
<Row ss:AutoFitHeight="0" ss:Height="21.9375" ss:StyleID="s30">
<Cell ss:StyleID="s29"><Data ss:Type="Number">1</Data></Cell>
<Cell ss:StyleID="s29"><Data ss:Type="Number">500033659</Data></Cell>
<Cell ss:StyleID="s29"><Data ss:Type="String">电杆</Data></Cell>
<Cell ss:StyleID="s29"><Data ss:Type="String">锥形水泥杆,非预应力,整根杆,10m,190mm,I</Data></Cell>
<Cell ss:StyleID="s29"><Data ss:Type="Number">2</Data></Cell>
<Cell ss:StyleID="s29"><Data ss:Type="String">基</Data></Cell>
<Cell ss:StyleID="s29">
</Row>
4、
<Row ss:AutoFitHeight="0" ss:Height="18.75">
<Cell ss:StyleID="s24" ss:Formula="=ROW()-2"><Data ss:Type="Number">1</Data></Cell>
<Cell ss:StyleID="s24"><Data ss:Type="Number">500034429</Data></Cell>
<Cell ss:StyleID="s24"><Data ss:Type="String">横担</Data></Cell>
<Cell ss:StyleID="s24"><Data ss:Type="String">L6*60*1500</Data></Cell>
<Cell ss:StyleID="s24"><Data ss:Type="Number">6</Data></Cell>
<Cell ss:StyleID="s24"><Data ss:Type="String">块</Data></Cell>
<Cell ss:StyleID="s24"><Data ss:Type="String">线路角铁横担,∠60×6,1500mm,不计孔距,中间</Data></Cell>
<Cell ss:StyleID="s24">
</Row>
当然,这是全部文本就截取的部分,1和3是一样的,只不过<Cell这个标签后面少了<Data的标签,2和4也同样。1和2是不一样的,一个有7行数据,一个有8行数据
我要做的是将<data>标签间的数据提取出来
我写了一个func,就是上面贴的代码,能做到1和2中的数据提取,但是做不了3和4中的数据也提取
想请教各位正则高手,如何写个正则可以对这个4种格式的数据提取,通用的正则
二楼的代码很好,但是我没有表述清楚,<row></row>标签是之内的数据一行数据,<cell></cell>是每个表格的数据,我这里给出的只是截取的一段数据,整个文本中是有很多<row></row>标签的
我只要提取<row></row>标签内有7个或者8个<cell></cell>标签内的<Data></Data>标签中的数据,(因为其他有些<row>标签内是小于7个<cell>标签或者大于8个<cell>的)从我上面给出的4个格式中可以看出,3和4文本中最后一个<cell>标签是不完整的,但是还是符合在<row>标签内有7个或者8个<cell>标签
发现自己说的很啰嗦,不知道大家看懂没!
1、我要找到文本中有7个或者8个<cell>标签的<row>,然后提取里面的全部<Data>标签内的数据
2、能做到以上4种格式的通用正则形式
|