#!/usr/bin/python # # Copyright 2007 William A. Carrel. All Rights Reserved # BSD Licensed - See bottom of file for exact license information. """Summarize the frequency of lower-case trigrams in an input stream. This script takes text on standard input and outputs the frequency of appearance of lower-case trigrams. Input can be any text. Output is unordered in the form "occurances : trigram". e.g. 0 : aaa 0 : dks 103 : oos """ __author__ = 'william.a@carrel.org (William A. Carrel)' import sys import os trigrams = {} for i in xrange(97,123): for j in xrange(97,123): for k in xrange(97,123): trigrams[chr(i)+chr(j)+chr(k)] = 0 def main(): for line in sys.stdin: linelen = len(line) if linelen >= 3: for start in xrange(0,linelen-3): try: if line[start:start+3].islower() and line[start:start+3].isalpha(): trigrams[line[start:start+3]] += 1 except: # Crazy kids and their symbols these days... pass for key in trigrams: print "%d : %s" % (trigrams[key], key) if __name__ == '__main__': main() # LICENSE: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # * Neither the name of William Carrel nor the names of its contributors # may be used to endorse or promote products derived from this software # without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE.