| @@ -1,45 +1,41 @@ | |||
| import sys | |||
| # On veut obtenir la fréquence de chaque mot | |||
| def get_freq(nom_fich): | |||
| fich=open(nom_fich) | |||
| contenu=fich.read() | |||
| liste_frag=contenu.split() # coupe sur 'espace' et \ | |||
| liste_mot=list() | |||
| for fragment in liste_frag: | |||
| fragment_min=fragment.lower() | |||
| frag_clean=clean(fragment_min) | |||
| liste_mot.append(frag_clean) | |||
| return liste_mot | |||
| def clean(fragment): | |||
| result="" | |||
| for c in fragment: | |||
| if c.isalpha(): | |||
| result+=c | |||
| return result | |||
| def tri(d): | |||
| list_tuples=list() | |||
| for clé, valeur in d.items(): | |||
| list_tuples.append((valeur, clé)) | |||
| list_tuples.sort(reverse=True) | |||
| print(list_tuples) | |||
| return d | |||
| nom_fich="ruffin.txt" | |||
| f=get_freq(nom_fich) | |||
| print(f) | |||
| def get_value(pair): | |||
| key, value = pair | |||
| return value | |||
| def get_word(chunk): | |||
| if all(x.isalpha() for x in chunk): # is_alpha() | |||
| if len(chunk) < 4: | |||
| return None | |||
| return chunk.lower() # lower() | |||
| else: | |||
| return None | |||
| def main(): | |||
| filename = sys.argv[1] | |||
| file = open(filename, "r") | |||
| lines = file.readlines() | |||
| file.close() | |||
| scores = {} | |||
| for line in lines: | |||
| for chunk in line.split(): | |||
| word = get_word(chunk) | |||
| if word: | |||
| if not word in scores: | |||
| scores[word] = 1 | |||
| else: | |||
| scores[word] += 1 | |||
| to_sort = [] | |||
| for k in scores: # iterate on dicts | |||
| v = scores[k] | |||
| to_sort.append([v, k]) | |||
| to_sort.sort() | |||
| print(to_sort[-10:]) | |||
| main() | |||