|
|
@@ -1,45 +1,41 @@ |
|
|
|
import sys |
|
|
|
# On veut obtenir la fréquence de chaque mot |
|
|
|
|
|
|
|
def get_freq(nom_fich): |
|
|
|
fich=open(nom_fich) |
|
|
|
contenu=fich.read() |
|
|
|
liste_frag=contenu.split() # coupe sur 'espace' et \ |
|
|
|
liste_mot=list() |
|
|
|
for fragment in liste_frag: |
|
|
|
fragment_min=fragment.lower() |
|
|
|
frag_clean=clean(fragment_min) |
|
|
|
liste_mot.append(frag_clean) |
|
|
|
|
|
|
|
|
|
|
|
return liste_mot |
|
|
|
|
|
|
|
def clean(fragment): |
|
|
|
|
|
|
|
result="" |
|
|
|
for c in fragment: |
|
|
|
if c.isalpha(): |
|
|
|
result+=c |
|
|
|
|
|
|
|
return result |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def tri(d): |
|
|
|
list_tuples=list() |
|
|
|
for clé, valeur in d.items(): |
|
|
|
list_tuples.append((valeur, clé)) |
|
|
|
list_tuples.sort(reverse=True) |
|
|
|
print(list_tuples) |
|
|
|
return d |
|
|
|
|
|
|
|
nom_fich="ruffin.txt" |
|
|
|
f=get_freq(nom_fich) |
|
|
|
print(f) |
|
|
|
|
|
|
|
|
|
|
|
def get_value(pair): |
|
|
|
key, value = pair |
|
|
|
return value |
|
|
|
|
|
|
|
|
|
|
|
def get_word(chunk): |
|
|
|
if all(x.isalpha() for x in chunk): # is_alpha() |
|
|
|
if len(chunk) < 4: |
|
|
|
return None |
|
|
|
return chunk.lower() # lower() |
|
|
|
else: |
|
|
|
return None |
|
|
|
|
|
|
|
|
|
|
|
def main(): |
|
|
|
filename = sys.argv[1] |
|
|
|
file = open(filename, "r") |
|
|
|
lines = file.readlines() |
|
|
|
file.close() |
|
|
|
|
|
|
|
scores = {} |
|
|
|
|
|
|
|
for line in lines: |
|
|
|
for chunk in line.split(): |
|
|
|
word = get_word(chunk) |
|
|
|
if word: |
|
|
|
if not word in scores: |
|
|
|
scores[word] = 1 |
|
|
|
else: |
|
|
|
scores[word] += 1 |
|
|
|
|
|
|
|
to_sort = [] |
|
|
|
for k in scores: # iterate on dicts |
|
|
|
v = scores[k] |
|
|
|
to_sort.append([v, k]) |
|
|
|
to_sort.sort() |
|
|
|
|
|
|
|
|
|
|
|
print(to_sort[-10:]) |
|
|
|
|
|
|
|
|
|
|
|
main() |