You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.

06-top-words.py 825 B

6 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. import sys
  2. def get_value(pair):
  3. key, value = pair
  4. return value
  5. def get_word(chunk):
  6. if all(x.isalpha() for x in chunk): # is_alpha()
  7. if len(chunk) < 4:
  8. return None
  9. return chunk.lower() # lower()
  10. else:
  11. return None
  12. def main():
  13. filename = sys.argv[1]
  14. file = open(filename, "r")
  15. lines = file.readlines()
  16. file.close()
  17. scores = {}
  18. for line in lines:
  19. for chunk in line.split():
  20. word = get_word(chunk)
  21. if word:
  22. if not word in scores:
  23. scores[word] = 1
  24. else:
  25. scores[word] += 1
  26. to_sort = []
  27. for k in scores: # iterate on dicts
  28. v = scores[k]
  29. to_sort.append([v, k])
  30. to_sort.sort()
  31. print(to_sort[-10:])
  32. main()