qse/ase/cmd/awk/wordfreq.awk
2008-03-21 03:49:53 +00:00

21 lines
351 B
Awk

# wordfreq.awk --- print list of word frequencies
{
$0 = tolower($0); # remove case distinctions
# remove punctuation
a=0;
gsub(/[^[:alnum:]_[:blank:]]/, " ", $a);
#gsub(/[^[:alnum:]_[:blank:]]/, " ");
for (i = 1; i <= NF; i++) freq[$i]++;
}
#/[^kkka-bcx-dd-y]|abc|def/
END {
for (word in freq)
print word, freq[word];
}