newuser 发表于 2011-1-6 10:02:05

[已解决]正则:匹配某部分内容?

本帖最后由 newuser 于 2011-1-7 10:37 编辑

以下是字符串内容:
var $=getObject=function(element){if(arguments.length>1){for(var i=0,elements=[],length=arguments.length;i<length;i++){elements.push($(arguments));}return elements;}if(typeof element=='string'){return document.getElementById(element);}return element;};var createScript=function(cid,url,f,arg){try{var _f="function"==typeof f?f:false;if(url.indexOf(".js")>=0){url=url.indexOf("?")==-1?url+"?t="+new Date().getTime():url+"&t="+new Date().getTime();};var v=document.createElement("script");v.setAttribute('type','text/javascript');v.setAttribute('id',""!=cid?cid:"createScript"+new Date().getTime());v.setAttribute('src',url);document.getElementsByTagName("head").appendChild(v);if(document.all){v.onreadystatechange=function(){var state=v.readyState;if(state=="loaded"||state=="interactive"||state=="complete"){this.parentNode.removeChild(this);if(_f){return _f.apply(this,(arg||[]))}}else{v.src=url;}};}else{v.onload=function(){this.parentNode.removeChild(this);if(_f){return _f.apply(this,(arg||[]))}};}}catch(e){}return createScript;};</SCRIPT>

<SCRIPT src="http://txt.go.sohu.com/ip/soip" type=text/javascript></SCRIPT>
<LINK href="http://www.sohu.com/upload/style/global1212.css" type=text/css rel=stylesheet><LINK href="http://www.sohu.com/upload/style/layout091102.css" type=text/css rel=stylesheet><LINK href="http://www.sohu.com/upload/style/style101230.css" type=text/css rel=stylesheet><LINK href="http://www.sohu.com/upload/style/pp101029.css" type=text/css rel=stylesheet>
<SCRIPT src="http://sy.brand.sogou.com/brand_ad_new?pid=sohu__brand&amp;charset=gb2312&amp;sohuurl=http://www.sohu.com/&amp;iw1=450&amp;ih1=105&amp;place1=104&amp;isvip=1&amp;div_id1=TurnAD9"></SCRIPT>

<SCRIPT src="http://sy.brand.sogou.com/brand_ad_new?pid=sohu__brand&amp;charset=gb2312&amp;sohuurl=http://www.sohu.com/&amp;iw1=760&amp;ih1=100&amp;place1=106&amp;isvip=1&amp;div_id1=TurnAD10"></SCRIPT>

<SCRIPT src="//pv.sohu.com/suv/?t?=1294275922390775_1024_768?r?="></SCRIPT>
</HEAD>
<BODY class=bodybg><!-- 2008.9.2 -->
<DIV class=sohuTop id=sohuTop>
<DIV id=fullscreenad style="DISPLAY: none"></DIV>
<DIV class=Area id=loginNav><!-- 通行证 -->
<DIV id=loginPP collection="Y" sdevindex="0">
<SCRIPT src="http://js.sohu.com/passport/pp18030_31.js" type=text/javascript sdevindex="0"></SCRIPT>

<SCRIPT src="http://js.mail.sohu.com/passport/pi18030.201011300952.js" type=text/javascript sdevindex="0"></SCRIPT>

<SCRIPT type=text/javascript sdevindex="0">
        passportSSInit();
        function _drawAppInfo(node)
        {
        document.getElementById("pmailcontent").innerHTML='<img height=10 alt="" src="http://www.sohu.com/passport/images/letter_2.gif"> <p>未读邮件(<span>0</span>)积分(<span>0</span>)</p>';
        if (PassportSC.cookieHandle().split('@') != TopUtils.getCookie('SOHUID').split('|')) {
        TopUtils.Deletecookie('SOHUID');
        }
        var bid = TopUtils.getBID("SOHUID");
        node.innerHTML = "<div class='passpord_content' id='NewMailCount'></div>";
        getNewMailCount();
        setInterval(getNewMailCount, 900000);
        }
       
        PassportSC.campImg="http://www.sohu.com/passport/images/pic007.gif";
        PassportSC.campImgAlt="大本营";
        PassportSC.cardTitle="上搜狐,知天下";
        PassportSC.appid = "8888";

        document.write('<div id="ppbanner" class="sspp"></div><div id="ppcard" class="posa"></div>');
        try { PassportSS.drawPassportMail(document.getElementById("ppbanner"),document.getElementById("ppcard")); } catch (e) {}
</SCRIPT>

<DIV class=sspp id=ppbanner sdevindex="0">
<FORM name=loginform onsubmit="return PassportSS.loginHandle();" method=post sdevindex="0"><INPUT type=hidden name=id sdevindex="0"><INPUT type=hidden name=password sdevindex="0"><INPUT type=hidden name=username sdevindex="0"><INPUT type=hidden name=m sdevindex="0"><INPUT type=hidden name=domain sdevindex="0"><INPUT type=hidden name=mpass sdevindex="0"><INPUT type=hidden name=loginid sdevindex="0"><INPUT type=hidden name=passwd sdevindex="0"><INPUT type=hidden name=appid sdevindex="0"><INPUT type=hidden name=ru sdevindex="0"><INPUT type=hidden name=eru sdevindex="0"><INPUT type=hidden name=fl sdevindex="0"><INPUT type=hidden name=ct sdevindex="0"><INPUT type=hidden name=vr sdevindex="0"><INPUT type=hidden name=sg sdevindex="0"><A href="http://passport.sohu.com/" target=_blank sdevindex="0">通行证</A> | 登录名<INPUT id=pemail style="COLOR: gray" size=16 value=通行证帐号/手机号 disableautocomplete autocomplete="off" sdevindex="0"> 密码<INPUT id=ppwd type=password size=11 value="" sdevindex="0"> <INPUT type=submit value=登录 name=Submit sdevindex="0"> <A href="http://passport.sohu.com/web/reguser?appid=8888" target=_blank sdevindex="0">注册</A>&nbsp;<A href="http://passport.sohu.com/help/" target=_blank sdevindex="0">帮助</A> | <A href="http://mail.sohu.net/" target=_blank sdevindex="0">企业邮箱</A></FORM></DIV>
<DIV class=posa id=ppcard sdevindex="0">
<DIV class=ppselecter style="DISPLAY: none; POSITION: absolute" sdevindex="0">
<TABLE cellSpacing=0 cellPadding=0 width="100%" sdevindex="0">
<TBODY sdevindex="0">
<TR sdevindex="0">
<TD class=ppseltit id=ppseltitId sdevindex="0">选择您所要登录的邮箱</TD></TR>
<TR sdevindex="0">
<TD height=2 sdevindex="0"></TD>
<TR sdevindex="0">
<TD sdevindex="0"></TD></TR></TBODY></TABLE></DIV>
<DIV style="DISPLAY: none" sdevindex="0"></DIV>
<DIV class="" sdevindex="0"></DIV></DIV></DIV>
<DIV id=setIndex collection="Y" sdevindex="1">[<SPAN sdevindex="1"></SPAN><A style="BEHAVIOR: url(#default#homepage)" onclick="this.style.behavior='url(#default#homepage)';this.setHomePage('http://www.sohu.com/');return(false);" href="javascript:void(0)" sdevindex="1">设搜狐为首页</A>]</DIV><!-- 通行证:End -->
<DIV id=navRight collection="Y" sdevindex="2"><!-- 矩阵 -->
<P sdevindex="2"><A href="http://www.sohu.com/" target=_blank sdevindex="2">搜狐</A> | <A class=fontArial href="http://www.chinaren.com/" target=_blank sdevindex="2">ChinaRen</A> | <A href="http://www.focus.cn/" target=_blank sdevindex="2">焦点房地产</A> | <A href="http://www.17173.com/" target=_blank sdevindex="2">17173</A> | <A href="http://www.sogou.com/" target=_blank sdevindex="2">搜狗</A></P><!-- 矩阵:End --></DIV>
<DIV class=clear></DIV></DIV>
<DIV class="blank5 Area" id=topblank></DIV><!-- 主导航 -->
<DIV class=Area id=mainNav collection="Y" sdevindex="3"><!-- SOHU_logo -->
<DIV class=sohuLogo id=sohuLogo sdevindex="3">
<H1 sdevindex="3"><A title=搜狐-中国最大的门户网站 href="http://www.sohu.com" sdevindex="3">搜狐</A></H1></DIV>
<SCRIPT type=text/javascript sdevindex="3">
        getObject('sohuLogo').className = "sohuLogo";
        </SCRIPT>
<!-- SOHU_logo:End -->
<DIV id=navList sdevindex="3"><!-- 导航 -->
<DIV class=nav1 sdevindex="3">
<UL sdevindex="3">我想匹配出所有类似 <SCRIPT src="http://txt.go.sohu.com/ip/soip" type=text/javascript></SCRIPT> 的内容,用了下面的正则:
\<(?:SCRIPT)\s+(.*)\>\<\/\1\>

3mile 发表于 2011-1-6 10:57:18

先试试笨办法,坐等AFAN兄正解!
(?ms)\<SCRIPT\s*src\=.*?\>.*?\<\/SCRIPT\>

afan 发表于 2011-1-6 11:23:54

这个可以随便整
<SCRIPT.+?</SCRIPT>

xuzhenjun130 发表于 2011-1-6 11:44:51

以为容易的,花了我三小时啊,终于写出来了
<SCRIPT.*>(?:(?!SCRIPT)[\s\S])*</SCRIPT>

已经测试
因为要匹配下面的代码不是那么容易的<SCRIPT type=text/javascript sdevindex="0">

      passportSSInit();

      function _drawAppInfo(node)

      {

      document.getElementById("pmailcontent").innerHTML='<img height=10 alt="" src="http://www.sohu.com/passport/images/letter_2.gif"> <p>未读邮件(<span>0</span>)积分(<span>0</span>)</p>';

      if (PassportSC.cookieHandle().split('@') != TopUtils.getCookie('SOHUID').split('|')) {

      TopUtils.Deletecookie('SOHUID');

      }

      var bid = TopUtils.getBID("SOHUID");

      node.innerHTML = "<div class='passpord_content' id='NewMailCount'></div>";

      getNewMailCount();

      setInterval(getNewMailCount, 900000);

      }

      

      PassportSC.campImg="http://www.sohu.com/passport/images/pic007.gif";

      PassportSC.campImgAlt="大本营";

      PassportSC.cardTitle="上搜狐,知天下";

      PassportSC.appid = "8888";



      document.write('<div id="ppbanner" class="sspp"></div><div id="ppcard" class="posa"></div>');

      try { PassportSS.drawPassportMail(document.getElementById("ppbanner"),document.getElementById("ppcard")); } catch (e) {}

</SCRIPT>

xuzhenjun130 发表于 2011-1-6 11:47:01

这个可以随便整
afan 发表于 2011-1-6 11:23 http://www.autoitx.com/images/common/back.gif

这样是匹配不了<SCRIPT type=text/javascript sdevindex="3">

      getObject('sohuLogo').className = "sohuLogo";

      </SCRIPT>
这样写法的script的

xuzhenjun130 发表于 2011-1-6 11:49:55

本帖最后由 xuzhenjun130 于 2011-1-6 16:51 编辑

还没发现一个比较完美的能匹配<script/>标签的正则表达式。因为考虑JS嵌套!!!比如:
<script ...>
   ....
   document.writeln("<script ....>...</script>");
   ....
</script>

xowen 发表于 2011-1-6 11:58:18

回复 1# newuser
(<SCRIPT.*>.*<\/SCRIPT>?)

afan 发表于 2011-1-6 12:05:56

回复 5# xuzhenjun130


    LZ没说要这样的吧~ 达到需求即可

3mile 发表于 2011-1-6 12:58:10

这样是匹配不了这样写法的script的
xuzhenjun130 发表于 2011-1-6 11:47 http://www.autoitx.com/images/common/back.gif
加(?ms)也不行?

xuzhenjun130 发表于 2011-1-6 15:32:24

回复 9# 3mile
(?ms)是什么意思?测试不行

lxz 发表于 2011-1-6 15:39:55

学习学习正则...

newuser 发表于 2011-1-7 08:33:05

回复 2# 3mile
谢谢,能解决我的问题,有2个问题请教:
1.(?ms)什么意思?最近看了不少资料,但确实没有看到有关它的?
2. = 也是元字符吗?为什么要 \=,我把\去掉,得到的结果相同,但我想不能结果相同就完事,老大用它肯定有用意?

newuser 发表于 2011-1-7 08:37:28

回复 3# afan
老大给的答案真的简单又简练,我还在模仿学习资料上的匹配HTML标签弄呢?结果越弄效果越差,看来有时候真应该 忘记些什么 在再实践中锻炼自己,不过感觉真的很难!

newuser 发表于 2011-1-7 08:42:29

回复 4# xuzhenjun130
非常感谢!

3mile 发表于 2011-1-7 10:19:27

回复 12# newuser
哈哈,=应该不是元字符吧,但我有些懒,于是凡是字符统统加\,这样可以减少记忆量吧。
页: [1] 2
查看完整版本: [已解决]正则:匹配某部分内容?