|
本帖最后由 maker 于 2010-4-16 10:23 编辑
功能:从文档读取范文,比较每行字串和当前字串的相似度,字串长度不一,望各位前辈赐教优化方法,谢谢
补充一下算法,有的朋友看代码会理解错误,浪费大家的时间很抱歉
相似度我的算法是这样的
因为字符串长度不一,所以要变换位置,比如:
字串1:123a456789
字串2:1234567
这样需要比较3次,起始位置不一
因为串1比串2长,所以要2动位置,可比较次数为4次,就是距离,注意短串是整体移动
相似度就是相同字符个数在整个字串中的占有率,取最大相同数和短串长度来计算,比较的时候字串不能穿插字符和删减字符,以短串整行移动步进做比较
比较1:相同为123, 3除以短串长度为比率
123a456787
1234567
比较2:相同为4567, 4除以短串长度为比率
123a456787
1234567
比较3:无相同
123a456787
1234567
比较4:相同为7, 1除以短串长度为比率
123a456787
1234567
最后取其中比率最大的数值为比较结果
文档范文:6,0111100111111001100100000110
u,0001001100110011001111110001
f,0000111111001110110011001100
k,1001101111011011100110011000
d,11111111110111011101110111111111
u,10001100011000110001100010101100100
u,01001010010100111001110011111100110
r,111101101011110111001101010001
d,1110101110111011101111010001
a,1010011011111101111100010001
r,1111010001010111110111011100
e,11111100100011111100010001110110
f,11110100011111111111100111000110001
c,11101101111000110001101111111001100
h,1001100111111111100111011000
c,011111001110000100001001001100
5,111101100011111100011000110110
z,110010111100110011001100011110
e,111111100011111110001111111000
f,1111100010001110100010001000
y,1000010101010010001000100010
5,110000011111001100111100
k,010001100001100010101100111000
2,0111000100010011010110010110
w,1000101110111001010101010000
3,011100101000110100101101101110
4,0001001101110011111100100010
h,0010000000011101100110011000
a,001100111001010010101111010011
s,01111110010111000011110110111100110
b,11110110111111011111110111111010000
b,111010011001101010011111
f,01111100110011110100110011001100
e,0111000001001011100010000011
f,1000111111001100111011000100
z,1111001000100100110011111111
a,0100011000101000101110011000
d,11111001100110011001100111111100
f,1110100010001111110011000100
6,0001111110011111101110111110
5,0111011111010011001010100100
6,1100100011111011101111010001
c,1110000100010001000110110100
e,01110100110011111100110010011000
9,11001010101101110011010100010001
e,0111010001111100111110001000
z,1111001000100100000011111000
b,111101111111011111111101111111
k,1011111011101110101110111000
u,0001001100110011001100111100
z,0000011100110011010111110111
z,1100001111011001000111101000
r,111111101111111010110101101000
r,1111010001000111010111000100
9,0111110111011111010111101000
9,1010101010110110101000000001
3,00010101111100110111001110101110
b,11101001101011111001100111111100
e,111111110011111111101111110110
f,1111100111011001100110010001
3,0110000100101001100111011000
a,01100110111010101110111100110001
3,1111001101110011001110110001
x,1011101001101100111010100010
9,0001111110111011011111111100
k,0010010001001100101010011000
4,0101010111011101111100010001
r,1111100110001110100110011000
r,000000111101111111101101001
b,111110110101111011111110111111
q,011111101111001110010111100111
4,0110011010101010111100110001
y,100110101101000001000010000100
w,1011101110111001010001000100
2,1110001000100010010001111111
s,1111100111011111001110111110
k,0100110101011111110101010100
f,011110100001001111111100110001
5,1100111111001111100111011111
5,1100100011010011001011000000
e,1110100111111001100111110001
b,0001111110111110111110111111
8,11001001010110111011010100010001
k,1011101011001110101010110001
s,011110001111001100010111
6,100000111101111101101110
q,011110001000100010010111
w,0100010001100010000110011000
y,100101010100001000100010
5,0100111111101111001111110110
9,111111011101111100010111
x,000011101101100011100101111011
p,111101001110011101001000010000
2,011110011001101011000111
h,0001000100100110001100100010
f,0111110111001110100010001000
8,0010011101010111110111010111
w,1101110111111111011101010000
5,11001000111100111011111100010001
d,1111100110011100100111111110
x,1011101011101110111101010100
r,0111000010001111100110010001
8,011101101101110010111101101110
c,01111100111000110001100111111100000
c,0111100110001000100101110110
k,1101110111111111010101000100
8,1011011001110011111011110011
2,111100010011001001001111
8,1111101101101010101011110001
u,110011100111001110010101101110
f,1111110111011111110011011101
h,100001000011101100011000110001
c,1111100110011001101101110001
3,111100110111001100111111
p,111100110011111100010001
h,0000001100110011001100110001
r,1111101110111110101110110011
p,11110100111001111111100011000100001
x,0011101111100110011111011100
w,0101111111111111101110111001
q,0100000010001000000001010000
8,011110011111011111011111
h,10111011101111111011101110111000
d,00011101111111111101110111111110
c,011110100111000110001100101111
q,0111100110001000101101110000
9,0110100110011111100101011010
2,1111101100100100000011110001
b,1000111010111111011111111110
k,1001101011101100101010100010
f,111010001001111111010101010100010001
e,11111000100011101000100011100010
u,11011100110011000100011101110011
2,100111100110110101111011001
2,011101101101110110111000100100
5,01101100111011110001100101100000
p,1110100110011110100010000000
k,1011101111101111111111011100
d,0111010101000100010111111000
3,11001001010110111011010100010001
b,11101111111111111011111110010001
r,1111111111111110101110111001
r,110101001110011100101001010011
f,11111100011000111101100011000110001
d,11111100111000110001100111111011000
y,1001101111110101010101010001
b,00011101111110111111101110101110
h,1001100110011111100110010001
2,0110100100100010010011111111
g,0111100010000011100110010011
z,111110110011111011111111
5,01110100110010110001000111101000
x,1001100001100110010110001000
f,0111010001001111110011001100
r,111101001010110100101001010001
d,11101100011000110001100011001101100
x,10010001000100001100101000001100001
f,011101000110001101001000010000
b,1111110111111111110111111100
e,11101000100011001000111110001000
5,0011111111001111000100010110
d,11100101101001010010100111110010000
s,011110010110000110010111
z,011110001000100010001000001110
e,111101100011110110000111101100
s,011011000111111000111110100001
h,100111001111111100111001010010
q,111101001000001100011011001101
s,011100100101100000101001101110
8,01100100100111110111101101110001100
w,0101110111011011101110111000
3,0100111000110111001110110110
q,00011110101000010001001111100010
x,0010100001000100001010011000
8,010000001001101100101001100100
4,01101011011110100101111110010100100
p,111110010000100110000000100011
z,11100010000100010000100011111110000
a,011110111110101101111111100010
u,00110011000110011001100111100110
x,11011011110111000110011110100110000
r,0111010001110101010111001000
2,11110100110001000110001001111101000
a,0011011101111111111111001100
8,001110011010111101011110100111
k,10010100101110011100110100100101000
s,011101100011110001110001111110
s,0111000001000011100000010010
s,011101101001110000111101101110
6,0100000010100001010110001000
y,0010101111001100110011011001
3,1110001001000110001111111100
a,0001011101101010111000010001
2,1100001100110101100111110001
z,1111001100100010010001110110
x,0001010101100010010101011000
h,0001110111111111110101000100
q,1000101111101110111011110011
w,01010101110011001110111011001100
2,0111110100110010011101111100
e,01110100011001110100111100010001
z,10001111001100110110110011110111
c,01010100110011000100001110001000
k,111000101101110011111100101001
3,11110100010001000001010011011010000
k,100111011011100111001001010011
s,0010011111000111100111010111
6,0110100011101100010111101000
a,110011100110101101011111100011
5,1011101010101001100001000011
k,0101110101110111110011001000
f,1001011101001011101001100010
4,100000100010111000101010111100100010
c,0010011111001000100001010111
y,10011110100111001111011000110001100
w,01001110011001010110001100011000010
2,011100101100011001100100011110
4,001000111000110101111111000010
c,0011111011001100011000110001
8,01100101111011010101011010001000
x,10010010100110001100010101101110000
y,1000000101100110010001000100
8,11111101010110010100101011110100000
y,110011101010110101001010010100
u,1101110111011101110111110111
4,0110011100111111111000100000
5,0111010001110001000111011110
r,111111001100110111001000
2,1000111111010001101111100111
3,01100010101010001001011010001000
x,10010000011000100100010110001000
y,0100010100110011000100010001
q,100010000100100100100101001000
f,010001001100111011001100110100001000
2,100001000000000100110001001
a,01101110101010101111011100110000
z,01111000100010001001010011100111111
d,0001111110011001000100001110
w,0110011001001100110111000000
q,10001010110010001000100001000011
z,111110011100110011001100111111
k,11001110110111001110010111100110000
e,1000011101100111010011000111
b,01110001111101011101011110001000
9,1010101010000101100101110000
c,1110100110000000100011100100
6,000010001000100110010100
4,00010001000100001100001100100010
4,000110011101111101110011100100
h,00011010101011101011001000100010
z,1110000101010001100111110001
h,0010001011101110101010111001
z,0111000100100100100011000011
6,0111110011111101110101110110
w,10010100110100101001011000110000100
q,01110101110011011101001100010001
6,011101100011111110111101001100
y,00011101001100110011001110010001
h,00001000101110111111101010100010
g,010011000010000100010000100110
x,01010000001010101101100010001000
b,11110010010111001001010011111100000
8,0110110101111110100111110111
z,00001110111100100110110011101111
u,1001100110011001100100010110
9,11011011001110111111001100111101
b,11100011001111110011001010101100
q,11101001000100010001101111110101
2,11101010001111011101111111110001
e,11110100001111010001100011111100000
6,0000011010001110101110110110
3,11101010001100110011011100010001
2,0101000100010010010011000010
f,00011111111110011111100110001000
w,110111101011010111100010000100
u,100011001010010100101001101100
2,111010001000110010101001000001
s,1100001011110011000100010001
w,011111111111111101111011110010
e,1111100011111100110011110000
8,0110010101101110100110101000
9,01010100101001000011000011010100000
w,00100010000100000001100111001000010
q,011001100011000010000100000111
f,00100000000110001000110001100010000
s,011101101111100001101001111110
8,000000000000100101011011000100
w,01001011011110110110101101011000010
3,010100001000011000010001100001
w,001000110001101001011001010010
q,00111110001100001000010000011100010
e,1110110001000100010010010100
3,11010000100001000001000010010010000
f,11111100011010110001100001000010000
w,001000001011010110111001110011
9,001100100111001101111000100110
z,11111110110011001100011011111110000
x,0101011100100011010101001000
x,010010101000110011000101010011
r,11110100011000110110100101001000010
g,01110100011000010011100010111100100
s,011101000011100000010000101111
4,00100110001010111111001100010001
p,111101111111111111101000010000
g,0111110010001011110001110011
a,10000011011111111101111101000100
4,100101001010110110101111110010
z,1110011001000100100011111110
3,1111001100110011001110100100
g,111000010011000100011110
9,01101011101001101010110100010001
z,111110111101101110011100011110
c,01110100111000010000100101111001100
5,01111011101111000111101011110001000
c,111000001100001000001001011100
y,10111010111001000100010001000100
h,01001000011100110011100011001100010
3,1111001101000010001010110001
x,0010110011001100111010101001
2,11111100110001000100010001000011110
5,01101100111110011101111110001000
z,1100000100101100110010000111
6,01100100001110110011100110101101100
8,1110101111111110101011100100
u,010011100110011100111001011110
s,11110110101111001111110111111001100
9,0110111111011111001111110110
x,0100010100100010010010010001
f,0111110001000100000000010000
x,0101010100110010011111010001
p,0111010111110110010001000000
6,1010110110001000100100010110
s,1110100110010011000110010110
d,11110100111101111011110111111010000
6,01111100111011111011111101100000
9,001001010111101101111011101111010
p,11111011101111111100110011001100
f,11111110011111111111110011100110000
u,100111001110011100111011011110
k,01110111001110011100001100011100001
f,01111110110101110100011001000000
z,0111100110011010110011000111
s,1111100101000011000001011010
a,110111101110111101110011100011
u,00100011001110111011111100010001
2,0110000100010011010111000011
q,000110001001001001110000010
e,111000000000000100000000010001
z,01100000000001000000100010001100
s,1110100111100011100101110001
x,1001101011001100110001000010
u,1011101110111001100111100000
2,1110101100100110010111111000
8,01110110110111011110110111111001110
5,0111100110011010000110000110
9,1101001100111111001110100010
9,0110100110010110001110100100
9,1101001100101110001111010001
z,11000100110110011001111100010011
e,001001101111101111100100111
s,1101001111011111001111110100
r,11100001000011100010000110011000
w,100101000010100001010100101001
f,01110101011111111100110010001000
p,011100101111011111101100011000
5,011101100011100001111011101100
4,011110111011111110111111101011
s,111111000011100000101001001110
y,10101011011101110111011100110011
e,0001111110011111110110011110
8,110101110111101111010000000
f,111111111111000111101100010000
s,10011101100011101001100001010010
x,0001110011100010111111001100
2,00000111111110111010011011111100
d,1111110111011101010101101000
f,111100100111100101100000000
9,001001111101111111001111110
y,00110101010110011001100110011000
h,00011001000111111001100110011000
8,11101010101011101001100111101100
a,001100110101010111111000
u,0110011011101110111000110001
3,10010111011100100010001001100100
y,010110110000011011011011001001
q,11101111001100110011011111101110
a,001100011101111011111111111001
9,01010111111101110010111010001000
c,100010001111110111001100110101110010
8,01111110111111101011010111111000000
s,1110100011000001100111100000
r,01111010111101111110110111101100000
k,010010101111110111111101101001
9,01010101010110000001001010001000
4,10011100101001011010111100001000000
2,111101011000110011001111111100
u,100101001110011100111001100001
r,111101011011110111001011010111
8,0110010101100110100110010111
8,0110100101101010101011100100
4,00100001000110000100111111010010000
4,001100111010100101011110101000
s,01111001111010011001011100010001
z,10000111001011101100100011100010
u,110111101111011110111101101111
c,10001111110011001100010001110001
9,1111010011000010000011010010
8,0110111111111001101101110001
d,111101011010011100111111011100
8,000001000101100100101001000001
c,10001000110000100001000111011101000
k,10001001000101010001001100010000
u,00010001101110100010001010101100
9,01000000000100110011101100010001
q,1110101100110011111111110000
y,10010101111100111011101100011000
c,111100001000000000011001001100
g,01101001100010111001011100000001
8,01101010111110110001011100001000
9,011101010101111110010110
b,0111000011011011100111010010
f,00011111100111111111100111001000
y,10010101001100001000110001000010000
5,11110110001111000011000111111000100
2,011100001000010001001100011111
9,011001001110011011110001101100
g,111101001110001101111001001110
6,01101011111110111011010100010001
r,0000111110111110111110111011
k,01100011001100011100101101011010000
x,10010010000010000100010100000110000
h,10101010111011101001100100010001
a,01100111001111011110111111001110000
4,00110001100101001011111111101110010
k,10011101011100110101101010011100010
d,111001001110011100111001011100
y,11001011111111010111101111011000110
4,1011011100011101110001000100
c,011111100111000110000111100110
z,0110001000001100111100000000
c,011101101111001110001111101110
a,001000110101011111111001100000
g,111111001100101000111111001100
w,101001010011111110111101111011
y,110010111101110001100011000111
a,1010011101010101111110010001
x,01000001001000100101100110001000
c,0111110110001000010101111000
f,1111100110001110100110011001
d,0111010001001000100010010110
2,11101010001100010100011000010000
s,01110100101100011111001111110111101
u,011000111000101000101000001111000110
3,10001000111100010010001100010110
s,00101000110000011001011000010001
r,01110100110001010100010010001000
f,101010110011110000001100011000
9,01100101110110110000110010001000
4,001001110111101011110010
4,00100110111011100111001010001000
3,011010100011011100110001011001010000
6,11101000110010101010101110110001
4,000000001011011111111111011010
u,10011001100110011001100111110110
9,01101001100111110111100101100110
h,000010000111111100111001000010
z,000011111100101011010100111110
b,111111011111111100111111110001
s,0010110011101100111101101100
9,00101010100110010111000100010110
5,01111000111000000001100011100100
y,100001011111101101101100100100
2,100111101001001010010100111
h,0110111001100111010001000000
5,1111100011111011001111110110
s,01101000110001100001100101100001
p,11101001110111110110010011001000
2,00110011001100010101100111010001
6,00010111000111101000100000100110
u,00010100111001110001100010111000000
b,11110100111001010010100111001111110
x,0101001110111011101011001000
c,1111101000110001100011111
s,001010100101000000101000101110
w,101101011011110011010110100100
b,111111101111111011010111101110
3,01101001101110011101111100010001
z,11100111111011011100110001111111111
4,00010011100111011001100110001000
3,11110100110011000110100111111001100
k,000011011111110110100100100100
f,111010010011110010000001000
w,010110110101101101000000000
y,100010101100111001000011000010
c,01111100001000010000100010101100110
e,01110000100011100100010011100000
8,0000001110110011010111010011
3,0110100010101001000111011000
w,100101011011111011010110101001
z,01110001011001101100111110001000
4,0000000100110101010111110010
6,0110100111111010100010100110
q,111000001000011000010001011111
9,0101101110111111001110110100
k,0010010011001100001000100001
3,000100111110101101110011001
y,001100000110110110100100100
x,1001001001110101101010100001
c,01111111111100011000110110111100010
x,00010110010011100011100110000000
g,11001111100000001110001011111101100
3,011101000011001001011001001
e,11111001111111111000100011110000
6,110010001001101110111011101101010001
p,11101001100111100000000000010001
2,11101011001100100110110011111111
4,001000111101111101111111100111
w,00101011011111111011110111101111000
2,1101110110011010101011100000
2,11100001110010100101110011111111000
k,110111111111110111111101111001
a,0011101100110011011111001000
8,110101010101101011000000000
d,000000100111101101101111111
k,1001101010101110101010011001
5,11101000111100010001001100010001
f,00101110111010001101100110001000
p,111101101101111011000100001000
g,011111100010000100111101101111
f,011100100110100110100001100
f,000010111100000111001000010000
w,100100001000101011011100101000
w,010010000110110001100010000000
f,1110100010101111101011111001
b,11100001001011101001000111111110
q,0111100110011001110100100011
3,11011011010000110011011100011000
r,00100011000100010011001100010001
3,011110001100110000111101101110
b,011100100111111010011110100110
k,0001101010001000101010000001
k,10011101101110011100101101001000011
d,011101100110001100011000111110
x,1101011101100110111010110001
3,01100000110011100011100111111011100
g,1110100100000011100111110110
q,01101001100110011001011000110011
r,11111001100111101001100100001000
2,1111010100010010010011101000
9,00101110111001101110111111001100
u,0010101010111001100110110110
4,0011011101111010111100100010
r,1110101110111110101010101001
y,000001011001001001000000001
x,00000000101111100110111010101011
5,11101000110100111011010100010000
h,1001100111111111101110110010
3,01110110110011000011110110111000100
8,101010101001001110111010001000010001
5,11011111100011001110001100101100
s,1011000110010111000100001010
9,11101010101111110101001100100100
z,11110011101011001100100010001111
s,01111001100001110001000001110010
3,0110000100101011100111010111
6,111001000111111101111011111101
8,01100110100110111111100111110101100
3,1011101110101000000100000110
s,0111100101000011000110010111
z,11110010001001101100110011111111
3,01110101000100110001110101110011
e,011101001100111101001100111110000000
8,11110101010110110101010011010011
2,1110101000100000010111110001
5,11111110011111110011000110111001000
3,00010111001100110011101111100110
a,001000001001010010101000110001
k,010011101011100010101101111001
d,0101100110011000100010011110
y,0010101110101111111011011001
w,1001110101010100011000100010
k,00100010011101100010100010001000
3,0000101111010101000110100011
d,11010101100001100011000111110011000
f,11110100101111011011100111000110000
3,011110001000110001100001111100
x,101111010101001111011011110001
w,010110101101111011111001110010
g,101111100011000110010110100111
s,0000110000101000110010011000
x,010000001000100001100100111000
a,001100111001111011011111101000
2,0011100100110010010110011111
8,011101010111011010010101
f,11101001100111010001000100010000
3,0010000100010001100101010111
y,10001010100111000100001000010000100
5,1110100111110011001010110101
s,11000001110101110111111100110001
x,00000001000110000100010100000100000
3,011000001000100000110101100101
d,11110100101001110011100111111001100
4,001100101001010111110111100010
x,0010101001000110011000011000
f,0111010001000111110011000000
x,10010010100110001100101001001000010
y,100110101101100011000110101101
p,11110100011001111110100001000010000
4,001000011100111111110011100001
e,1110100011101000111011010000
b,0000011100011110100111110010
w,001001011010110111010110101000
k,110011101111110111101101110011
h,01000100111101000100010000001000
s,01101001111000011001111010001000
y,0001100100100100010001000100
4,001000100010011001111100000100010001
z,01110001001101100100100011100111
3,11101010001001100011001111111110
3,01110001001101100011100111111110
4,00110011010000001010111100100010
4,00010011011101111111111100100000
6,11111001110110111010101010110100
5,00011110101010011111011101111101
e,11110001000111110001000011101100
u,00100010001100010001100110100100
b,1111100110011111100001010111
z,100011111011101011001100100011110011
4,10011011100111010111101110011000
3,101000100010000000100001000110100110
5,100000100111101110111001101111010
2,0111101100110001010111110001
h,00011011101110101110101010100010
w,001000000010010110101100101001
f,011111110011100110001100001001
k,0100000111101110100010010001
6,0001110001010101010100000000
k,0101010101101110100100010000
5,0000011111001110000110011111
z,00010000100010000000010001011110000
a,10110101101011111011111111000100001
6,010100100101101001001010000001001
9,000010010110101110111100100
c,100011111110110110010011011
3,010001101110101101111110100100
y,00010011111111011100110011001100
c,1110000100000001100101100000
r,111000100011110100100010000100010001
x,000000011011111101111110111010100010
6,01110110001111111011110110111101110
5,01101000111010111011011100011001
c,00000110111110001000100111110110
函数模型:$txt = 'file.txt'
$t = TimerInit()
checkCode('1000101011001110101010011000') ;k
MsgBox(0,'',TimerDiff($t))
$t = TimerInit()
checkCode('11100101100001000100011000111101000') ;2
MsgBox(0,'',TimerDiff($t))
Func checkCode($tz)
$ret = ''
$str = ''
$mydu = 0
$mfile = FileOpen($txt, 0)
If $mfile = -1 Then
MsgBox(16, "错误", "文件打开错误")
Else
While 1
$line = FileReadLine($mfile)
If @error = -1 Then
ExitLoop
EndIf
$Nowdu = execSTR($line, $tz)
If $Nowdu =100 Or $Nowdu > 93 Then
$str = $line
ExitLoop
Else
If $Nowdu >85 And $Nowdu > $mydu Then
$mydu = $Nowdu
$str = $line
EndIf
EndIf
WEnd
$aInfo = StringSplit($str, ",")
If @error Then
$ret = '-'
Else
$ret = $aInfo[1]
EndIf
TrayTip('字串为:'&$ret,'相似度:'&$mydu,1)
FileClose($mfile)
EndIf
If $ret<>'' And $ret<>'-' Then Beep(1000,100)
Return $ret
EndFunc
Func execSTR($string1, $string2);返回相同比率
$du =0
If $string1 = $string2 Or StringInStr($string1,$string2) Then Return 100
$len1 = StringLen($string1)
$len2 = StringLen($string2)
$len = $len2
If $len1 > $len2 Then
For $mi=1 To $len1-$len2+1
$nn = 1
$ndu = 0
For $mt=$mi To $len2+$mt-1
If StringMid($string1, $mt, 1) = StringMid($string2, $nn, 1) Then $ndu += 1
$nn += 1
Next
If $ndu > $du Then $du = $ndu
Next
$len = $len2
ElseIf $len2 > $len1 Then
For $mi=1 To $len2-$len1+1
$nn = 1
$ndu = 0
For $mt=$mi To $len1+$mt-1
If StringMid($string1, $nn, 1) = StringMid($string2, $mt, 1) Then $ndu += 1
$nn += 1
Next
If $ndu > $du Then $du = $ndu
Next
$len = $len1
Else
For $mi=1 To $len2
If StringMid($string2, $mi, 1) = StringMid($string1, $mi, 1) Then $du += 1
Next
EndIf
Return Round(($du/$len) * 100)
EndFunc
|
|