grep -v END"
def check_dc_line():
has_duplicate = False
dc_set = set()
for dc_line in os.popen(cmd, 'r').readlines():
dc_token = dc_line.split()
dn = dc_token[0]
cv = dc_token[1]
dc = dn + "," + cv
if dc in dc_set:
print "duplicate dc found:", dc
has_duplicate = True
else:
dc_set.add(dc)
return has_duplicate
if not check_dc_line():
print "no duplicate dc"
对于250个文件,共60万行的数据,过滤一遍约1.67秒
有点不甘心这个效率,于是又写了一个同样功能的shell脚本
#! /bin/bash
cat /home/zhangj/hosts/* 关键词:详细说明小程序的python与bash版本比较