Markov Text Generator

by Gary Burd · November 10, 2003

Here's a Markov text generator implemented in Python:

"""
The script markov.py reads text from standard input and writes
a pagragraph of text to standard output.  Blank lines in the
input are treated as paragraph separators and are represented
as '\n' in the code.
"""
import random

nlnl = '\n', '\n'

def new_key(key, word):
  if word == '\n': return nlnl
  else: return (key[1], word)

def markov_data_from_words(words):
  data = {}
  key = nlnl
  for word in words:
      data.setdefault(key, []).append(word)
      key = new_key(key, word)
  return data

def words_from_markov_data(data):
  key = nlnl
  while 1:
      word = random.choice(data.get(key, nlnl))
      key = new_key(key, word)
      yield word

def words_from_file(f):
  for line in f:
      words = line.split()
      if len(words):
          for word in words:
              yield word
      else:
          yield '\n'
  yield '\n'

def paragraph_from_words(words):
  result = []
  for word in words:
      if word == '\n': break
      result.append(word)
  return ' '.join(result)

if __name__ == '__main__':
  import sys
  print paragraph_from_words(
          words_from_markov_data(
              markov_data_from_words(
                  words_from_file(
                      sys.stdin))))