diff --git a/sources/06-top-words.py b/sources/06-top-words.py deleted file mode 100644 index 7bdaa8e..0000000 --- a/sources/06-top-words.py +++ /dev/null @@ -1,73 +0,0 @@ -import sys - - -def clean_fragment(fragment): - result = "" - for c in fragment: - if c.isalpha() or c in ["-", "'"]: - result += c - - return result - - -def split_words(text): - fragments = split_fragments(text) - res = list() - for fragment in fragments: - fragment = fragment.lower() - fragment = clean_fragment(fragment) - if fragment: - res.append(fragment) - return res - - -def split_fragments(text): - res = list() - for fragment in text.split(): - if "’" in fragment: - before = fragment.split("’")[0] - after = fragment.split("’")[1] - res.append(before) - res.append(after) - else: - res.append(fragment) - return res - - -def get_frequencies(words): - res = dict() - for word in words: - if word in res: - res[word] += 1 - else: - res[word] = 1 - return res - - -def get_scores(frequencies): - res = list() - for word, count in frequencies.items(): - res.append((count, word)) - res.sort(reverse=True) - return res - - -def print_scores(scores): - for count, word in scores: - print(count, word) - - -def main(): - if len(sys.argv) < 2: - sys.exit("not enough arguments") - filename = sys.argv[1] - file = open(filename) - contents = file.read() - words = split_words(contents) - frequencies = get_frequencies(words) - scores = get_scores(frequencies) - top_words = scores[:20] - print_scores(top_words) - - -main() diff --git a/sources/06-top-words.py b/sources/06-top-words.py new file mode 120000 index 0000000..9137809 --- /dev/null +++ b/sources/06-top-words.py @@ -0,0 +1 @@ +topwords/6.py \ No newline at end of file