字種から作った形態素辞書と書き換え規則を使った文章の生成

形態素の辞書は「坊ちゃん」のテキストを字種から判別した

#! /usr/bin/env python
# -*- coding: utf-8 -*-

import random

#filename = raw_input("辞書ファイルを入力:")
filename = "dict_trimmed_bocchan.txt"
f = open(filename)

noun = ['私','彼','彼女']
verb = ['歩く','走る','泳ぐ','寝る']
adj = ['赤い','青い']
adjv = ['静かだ','暖かだ']

for line in f:
    line = line.strip()
    lsplt = line.split(":")
    print lsplt
    if lsplt[1] == "meishi":
        noun.append(lsplt[0])
    elif lsplt[1] == "doshi":
        verb.append(lsplt[0])
    elif lsplt[1] == "keiyoshi":
        adj.append(lsplt[0])
    elif lsplt[1] == "keiyodoshi":
        adjv.append(lsplt[0])

#文の書き換え規則
def sentence():
    np_array = random.choice([np2(),np3()])
    vp_array = random.choice([vp4(),vp5(),vp6()])
    return np_array + vp_array
    
#名詞句の書き換え規則
def np2():
    #print "np2() was called."
    array = []
    adj_array = random.choice([adj7(),adj8()])
    array = array +adj_array
    noun_choice = random.choice(noun)
    array.append(noun_choice)
    array.append("は")
    return array
def np3():
    #print "np3() was called."
    array = []
    array.append(noun[random.randint(0,len(noun)-1)])
    array.append("は")
    return array
    
#動詞句の書き換え規則
def vp4():
    #print "np4() was called."
    array = []
    array.append(verb[random.randint(0,len(verb)-1)])
    return array

def vp5():
    #print "vp5() was called"
    array = []
    array.append(random.choice(adj))
    return array
def vp6():
    #print "vp6() was called."
    array = []
    array.append(random.choice(adjv))
    return array

#形容詞句の書き換え規則
def adj7():
    #print "adj7() was called."
    array = []
    array.append(random.choice(adj))
    #choice = [adj7(),adj8()]    
    #chosen = random.choice([adj7(),adj8()]) #adj7()の再帰が失敗する
    chosen = random.choice([adj8()])
    #print chosen,"was chosen."
    adj_array = chosen
    #print type(adj_array)
    #adj_array = adj8()
    array = array + adj_array
    #print "output of adj7()",array 
    return array
def adj8():
    array = []
    array.append(random.choice(adj))
    #print "adj8() was called."
    return array
def adj9(): #ダミーメソッド
    #print "adj9() called."
    array = ['dummy']
    return array
    
for x in range(20): #20個の文を出力
    array = sentence()
    #print "###################"
    for a in array:
        print a,
    print