qse/ase/test/awk/wordfreq.awk

21 lines
351 B
Awk
Raw Normal View History

2006-09-02 15:08:50 +00:00
# wordfreq.awk --- print list of word frequencies
{
$0 = tolower($0); # remove case distinctions
2006-09-30 17:03:11 +00:00
2006-09-02 15:08:50 +00:00
# remove punctuation
2006-09-30 17:03:11 +00:00
a=0;
gsub(/[^[:alnum:]_[:blank:]]/, " ", $a);
#gsub(/[^[:alnum:]_[:blank:]]/, " ");
2006-09-02 15:08:50 +00:00
for (i = 1; i <= NF; i++) freq[$i]++;
}
2007-03-01 07:48:51 +00:00
#/[^kkka-bcx-dd-y]|abc|def/
2006-09-02 15:08:50 +00:00
END {
for (word in freq)
2006-09-30 17:03:11 +00:00
print word, freq[word];
2006-09-02 15:08:50 +00:00
}