本帖最后由 zghwelcome 于 2023-9-19 22:55 编辑
主代码import sys
from vector import Vector
from sketch import Sketch
# 测试文档列表
filenames = ['a.txt','b.txt','c.txt']
k = 5 #k-grams
d = 10000 #文档摘要维度
sketches = [0 for i in filenames]
print("sketches: " , sketches)
for i in range(len(filenames)):
with open(filenames[i], 'r', encoding='UTF-8') as f:
text = f.read()
sketches[i] = Sketch(text, k, d)
# print("sketches[i]: " , sketches[i],'\n\n\n')
# 输出结果标题
print(' ' * 20, end = ' ')
for filename in filenames:
print('{:>25}'.format(filename), end = ' ')
print()
# 输出结果比较明细
for i in range(len(filenames)):
print('{:10}'.format(filenames[i]), end = ' ')
for j in range(len(filenames)):
print('{: <22}'.format(sketches[i].similarTo(sketches[j])), end = ' ')
print()
用到的2个库文件:
求教下各位懂Python的大佬。
|