从文本中提取文件名
请教各位大大,如何从下面文本中提取出TxT_19430.txt、att000.jpg等等文件名?<smil>
<head>
<layout>
<root-layout width="176" height="208"/>
<region id="Image" width="56%" height="24%" left="0%" top="0%"/>
<region id="Text" width="56%" height="24%" left="0%" top="24%"/>
</layout>
</head>
<body>
<par dur="180000ms">
<text src="TxT_19430.txt" region="Text"/>
<img src="att000.jpg" region="Image"/>
</par>
<par dur="180000ms">
<text src="TxT_18864.txt" region="Text"/>
</par>
<par dur="180000ms">
<text src="TxT_29443.txt" region="Text"/>
</par>
<par dur="180000ms">
<text src="TxT_21602.txt" region="Text"/>
</par>
<par dur="180000ms">
<text src="TxT_2030.txt" region="Text"/>
</par>
<par dur="10000ms">
<img src="att060.jpg" region="Image"/>
</par>
<par dur="180000ms">
<text src="TxT_22339.txt" region="Text"/>
</par>
<par dur="180000ms">
<text src="TxT_6280.txt" region="Text"/>
</par>
<par dur="180000ms">
<text src="TxT_16223.txt" region="Text"/>
</par>
<par dur="10000ms">
<img src="att100.jpg" region="Image"/>
</par>
<par dur="180000ms">
<text src="TxT_24982.txt" region="Text"/>
</par>
<par dur="10000ms">
<img src="att120.jpg" region="Image"/>
</par>
<par dur="180000ms">
<text src="TxT_29351.txt" region="Text"/>
</par>
<par dur="180000ms">
<text src="TxT_25734.txt" region="Text"/>
</par>
<par dur="180000ms">
<text src="TxT_15145.txt" region="Text"/>
</par>
<par dur="180000ms">
<text src="TxT_8240.txt" region="Text"/>
</par>
<par dur="120000ms">
<text src="TxT_11575.txt" region="Text"/>
</par>
<par dur="10000ms">
<img src="att180.gif" region="Image"/>
</par>
</body>
</smil> #include<array.au3>
$string = _
'<smil>' & @CRLF & _
' <head>' & @CRLF & _
' <layout>' & @CRLF & _
' <root-layout width="176" height="208"/>' & @CRLF & _
' <region id="Image" width="56%" height="24%" left="0%" top="0%"/>' & @CRLF & _
' <region id="Text" width="56%" height="24%" left="0%" top="24%"/>' & @CRLF & _
' </layout>' & @CRLF & _
' </head>' & @CRLF & _
' <body>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_19430.txt" region="Text"/>' & @CRLF & _
' <img src="att000.jpg" region="Image"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_18864.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_29443.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_21602.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_2030.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="10000ms">' & @CRLF & _
' <img src="att060.jpg" region="Image"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_22339.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_6280.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_16223.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="10000ms">' & @CRLF & _
' <img src="att100.jpg" region="Image"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_24982.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="10000ms">' & @CRLF & _
' <img src="att120.jpg" region="Image"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_29351.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_25734.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_15145.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_8240.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="120000ms">' & @CRLF & _
' <text src="TxT_11575.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="10000ms">' & @CRLF & _
' <img src="att180.gif" region="Image"/>' & @CRLF & _
' </par>' & @CRLF & _
' </body>' & @CRLF & _
'</smil>'
Local $filename=StringRegExp($string,'(?m)(?<=src\=\").+(?=\"\sregion)',3)
_ArrayDisplay($filename) 回复 1# zsltxx
#include <Array.au3>
Local $Str = _
'<smil>' & @CRLF & _
' <head>' & @CRLF & _
' <layout>' & @CRLF & _
' <root-layout width="176" height="208"/>' & @CRLF & _
' <region id="Image" width="56%" height="24%" left="0%" top="0%"/>' & @CRLF & _
' <region id="Text" width="56%" height="24%" left="0%" top="24%"/>' & @CRLF & _
' </layout>' & @CRLF & _
' </head>' & @CRLF & _
' <body>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_19430.txt" region="Text"/>' & @CRLF & _
' <img src="att000.jpg" region="Image"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_18864.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_29443.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_21602.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_2030.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="10000ms">' & @CRLF & _
' <img src="att060.jpg" region="Image"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_22339.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_6280.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_16223.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="10000ms">' & @CRLF & _
' <img src="att100.jpg" region="Image"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_24982.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="10000ms">' & @CRLF & _
' <img src="att120.jpg" region="Image"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_29351.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_25734.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_15145.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_8240.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="120000ms">' & @CRLF & _
' <text src="TxT_11575.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="10000ms">' & @CRLF & _
' <img src="att180.gif" region="Image"/>' & @CRLF & _
' </par>' & @CRLF & _
' </body>' & @CRLF & _
'</smil>'
Local $Test
$Test = StringRegExp($str, '<(?i)text\hsrc\="([^"]+)', 3)
If Not @Error Then _ArrayDisplay($Test, UBound($Test))
$Test = StringRegExp($str, '<(?i)img\hsrc\="([^"]+)', 3)
If Not @Error Then _ArrayDisplay($Test, UBound($Test))
$Test = StringRegExp($str, '<(?i)(?:text|img)\hsrc\="([^"]+)', 3)
If Not @Error Then _ArrayDisplay($Test, UBound($Test))
谢谢!阿杰好强,对正则头疼 不懂正则 学习 处理此类网页数据, 我喜欢用([^"]+)
匹配链接, 标题 等等, 都可以用它, 只是前面的字符要相应修改下. 狂补正则中……向汗颜致敬 #include <Array.au3>
$String = _
'<smil>' & @CRLF & _
' <head>' & @CRLF & _
' <layout>' & @CRLF & _
' <root-layout width="176" height="208"/>' & @CRLF & _
' <region id="Image" width="56%" height="24%" left="0%" top="0%"/>' & @CRLF & _
' <region id="Text" width="56%" height="24%" left="0%" top="24%"/>' & @CRLF & _
' </layout>' & @CRLF & _
' </head>' & @CRLF & _
' <body>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_19430.txt" region="Text"/>' & @CRLF & _
' <img src="att000.jpg" region="Image"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_18864.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_29443.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_21602.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_2030.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="10000ms">' & @CRLF & _
' <img src="att060.jpg" region="Image"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_22339.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_6280.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_16223.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="10000ms">' & @CRLF & _
' <img src="att100.jpg" region="Image"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_24982.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="10000ms">' & @CRLF & _
' <img src="att120.jpg" region="Image"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_29351.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_25734.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_15145.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="180000ms">' & @CRLF & _
' <text src="TxT_8240.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="120000ms">' & @CRLF & _
' <text src="TxT_11575.txt" region="Text"/>' & @CRLF & _
' </par>' & @CRLF & _
' <par dur="10000ms">' & @CRLF & _
' <img src="att180.gif" region="Image"/>' & @CRLF & _
' </par>' & @CRLF & _
' </body>' & @CRLF & _
'</smil>'
Local $aFileName = StringRegExp($String, '(?<=src=")[^\.]+\.\w+', 3)
_ArrayDisplay($aFileName) 结帖,谢谢2#、3#、9#
2#:http://www.autoitx.com/forum.php?mod=redirect&goto=findpost&ptid=31630&pid=423809&fromuid=7658890
3#:http://www.autoitx.com/forum.php?mod=redirect&goto=findpost&ptid=31630&pid=423813&fromuid=7658890
9#:http://www.autoitx.com/forum.php?mod=redirect&goto=findpost&ptid=31630&pid=424058&fromuid=7658890
页:
[1]