"""
text_file_indexer.py
Exercise:
Given a text file somefile.txt, the program will read it completely,
and while doing so, record the occurrences of each unique word,
and the line numbers on which they occur. This information is
then written to an index file somefile.idx, which is also a text
file.
Author: Lauren Gregoire
"""
def create_index(text_filename, delimiter_chars=",.;:!?"):
# the index is stored as a dictionary
# with words as keys and the line numbers as a list of integers
index = {}
with open(text_filename, "r") as fid:
line_num = 1 # line counter start at 1
for line in fid:
# Split the line into words
words = line.split()
# Remove unwanted delimiter characters adjoining words.
words_striped = [ word.strip(delimiter_chars) for word in words ]
# Find and save the occurrences of each word in the line.
for word in words_striped:
if not index.has_key(word):
index[word] = [ line_num ] # add word and line_num to index
else:
index[word].append(line_num) # add line_num to list of lines
line_num+=1
return index
def write_index(index, index_filename):
index_file = open(index_filename, "w")
for word in sorted(index.keys()):
lines = index[word]
lines_string=', '.join([str(line) for line in lines])
index_file.write(word + ": " + lines_string + "\n")
index_file.close()
def index_text_file(text_filename, index_filename="index.txt", delimiter_chars=",.;:!?"):
""" Creates and index of the occurences of words in a file
and writes the index in the text file index_filename.
The output file has one line per word as follow:
word: line_num line_num line_num
with words sorted alphabetically
"""
index = create_index(text_filename, delimiter_chars=",.;:!?")
write_index(index, index_filename)
index_text_file('TheTyger_WilliamBlake.txt', 'index_TheTyger_WilliamBlake.txt')