From 83c6f5af27f784d7958d558b9a1a57b77cda2911 Mon Sep 17 00:00:00 2001 From: Dimitri Merejkowsky Date: Sat, 19 Jan 2019 17:26:42 +0100 Subject: [PATCH] WIP atelier --- sources/06-top-words.py | 78 +++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 41 deletions(-) diff --git a/sources/06-top-words.py b/sources/06-top-words.py index 8654a0b..199af74 100644 --- a/sources/06-top-words.py +++ b/sources/06-top-words.py @@ -1,45 +1,41 @@ -import sys +# On veut obtenir la fréquence de chaque mot + +def get_freq(nom_fich): + fich=open(nom_fich) + contenu=fich.read() + liste_frag=contenu.split() # coupe sur 'espace' et \ + liste_mot=list() + for fragment in liste_frag: + fragment_min=fragment.lower() + frag_clean=clean(fragment_min) + liste_mot.append(frag_clean) + + + return liste_mot + +def clean(fragment): + + result="" + for c in fragment: + if c.isalpha(): + result+=c + + return result + + + +def tri(d): + list_tuples=list() + for clé, valeur in d.items(): + list_tuples.append((valeur, clé)) + list_tuples.sort(reverse=True) + print(list_tuples) + return d + +nom_fich="ruffin.txt" +f=get_freq(nom_fich) +print(f) -def get_value(pair): - key, value = pair - return value -def get_word(chunk): - if all(x.isalpha() for x in chunk): # is_alpha() - if len(chunk) < 4: - return None - return chunk.lower() # lower() - else: - return None - - -def main(): - filename = sys.argv[1] - file = open(filename, "r") - lines = file.readlines() - file.close() - - scores = {} - - for line in lines: - for chunk in line.split(): - word = get_word(chunk) - if word: - if not word in scores: - scores[word] = 1 - else: - scores[word] += 1 - - to_sort = [] - for k in scores: # iterate on dicts - v = scores[k] - to_sort.append([v, k]) - to_sort.sort() - - - print(to_sort[-10:]) - - -main()