# 字典提供了很多种构造方法 a = dict(one=1, two=2, three=3) b = {'one': 1, 'two': 2, 'three': 3} c = dict(zip(['one', 'two', 'three'], [1, 2, 3])) d = dict([('two', 2), ('one', 1), ('three', 3)]) e = dict({'three': 3, 'one': 1, 'two': 2}) a == b == c == d == e
index = {} with open(sys.argv[1], encoding='uft-8') as fp: for line_no, line in enumerate(fp, 1): for match in WORD_RE.finditer(line): word = match.group() column_no = match.start() + 1 location = (line_no, column_no) # 提取单词出现情况,如果没有出现过返回 [] occurences = index.get(word, []) occurences.append(location) index[word] = occurences
# 以字符顺序打印结果 for word in sorted(index, key=str.upper): print(word, index[word])
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
$ python index0.py zen.txt a [(19, 48), (20, 53)] Although [(11, 1), (16, 1), (18, 1)] ambiguity [(14, 16)] and [(15, 23)] are [(21, 12)] aren [(10, 15)] at [(16, 38)] bad [(19, 50)] be [(15, 14), (16, 27), (20, 50)] beats [(11, 23)] Beautiful [(3, 1)] better [(3, 14), (4, 13), (5, 11), (6, 12), (7, 9), (8, 11), (17, 8), (18, 25)] break [(10, 40)] by [(1, 20)] cases [(10, 9)] complex [(5, 23)] ...
index = {} with open(sys.argv[1], encoding='uft-8') as fp: for line_no, line in enumerate(fp, 1): for match in WORD_RE.finditer(line): word = match.group() column_no = match.start() + 1 location = (line_no, column_no) # 注意这行与上面的区别 index.setdefault(word, []).append(location) # 效果等同于: # if key not in my_dict: # my_dict[key] = [] # my_dict[key].append(new_value)
# 以字符顺序打印结果 for word in sorted(index, key=str.upper): print(word, index[word])
index = collections.defaultdict(list) with open(sys.argv[1], encoding='utf-8') as fp: for line_no, line in enumerate(fp, 1): for match in WORD_RE.finditer(line): word = match.group() column_no = match.start()+1 location = (line_no, column_no) # index 如何没有 word 的记录, default_factory 会被调用,这里是创建一个空列表返回 index[word].append(location)
# print in alphabetical order for word in sorted(index, key=str.upper): print(word, index[word])