形態素の辞書は「坊ちゃん」のテキストを字種から判別した
#! /usr/bin/env python # -*- coding: utf-8 -*- import random #filename = raw_input("辞書ファイルを入力:") filename = "dict_trimmed_bocchan.txt" f = open(filename) noun = ['私','彼','彼女'] verb = ['歩く','走る','泳ぐ','寝る'] adj = ['赤い','青い'] adjv = ['静かだ','暖かだ'] for line in f: line = line.strip() lsplt = line.split(":") print lsplt if lsplt[1] == "meishi": noun.append(lsplt[0]) elif lsplt[1] == "doshi": verb.append(lsplt[0]) elif lsplt[1] == "keiyoshi": adj.append(lsplt[0]) elif lsplt[1] == "keiyodoshi": adjv.append(lsplt[0]) #文の書き換え規則 def sentence(): np_array = random.choice([np2(),np3()]) vp_array = random.choice([vp4(),vp5(),vp6()]) return np_array + vp_array #名詞句の書き換え規則 def np2(): #print "np2() was called." array = [] adj_array = random.choice([adj7(),adj8()]) array = array +adj_array noun_choice = random.choice(noun) array.append(noun_choice) array.append("は") return array def np3(): #print "np3() was called." array = [] array.append(noun[random.randint(0,len(noun)-1)]) array.append("は") return array #動詞句の書き換え規則 def vp4(): #print "np4() was called." array = [] array.append(verb[random.randint(0,len(verb)-1)]) return array def vp5(): #print "vp5() was called" array = [] array.append(random.choice(adj)) return array def vp6(): #print "vp6() was called." array = [] array.append(random.choice(adjv)) return array #形容詞句の書き換え規則 def adj7(): #print "adj7() was called." array = [] array.append(random.choice(adj)) #choice = [adj7(),adj8()] #chosen = random.choice([adj7(),adj8()]) #adj7()の再帰が失敗する chosen = random.choice([adj8()]) #print chosen,"was chosen." adj_array = chosen #print type(adj_array) #adj_array = adj8() array = array + adj_array #print "output of adj7()",array return array def adj8(): array = [] array.append(random.choice(adj)) #print "adj8() was called." return array def adj9(): #ダミーメソッド #print "adj9() called." array = ['dummy'] return array for x in range(20): #20個の文を出力 array = sentence() #print "###################" for a in array: print a, print