ここを参考に
http://aidiary.hatenablog.com/entry/20150225/1424863972
brew install portaudio brew install homebrew/x11/sptk
easy_vc.py
#coding: utf-8 import pyaudio import struct import subprocess import pdb # SPTKを使った簡単なボイスチェンジャー CHANNELS = 1 RATE = 16000 CHUNK = 1024 def record(raw_file, record_seconds=5): """音声ファイルを録音する 録音時間は固定。キーボードを押すとループ終わりができなかった・・・""" fp = open(raw_file, "wb") for _ in range(0, int(RATE / CHUNK * record_seconds)): data = stream.read(CHUNK) fp.write(struct.pack('s' * CHUNK * 2, data)) fp.close() stream.stop_stream() stream.close() p.terminate() def extract_pitch(raw_file, pitch_file): """ピッチパラメータの抽出""" cmd = "x2x +sf %s | pitch -a 1 -s 16 -p 80 > %s" % (raw_file, pitch_file) subprocess.call(cmd, shell=True) def extract_mcep(raw_file, mcep_file): """メルケプストラムパラメータの抽出""" cmd = "x2x +sf %s | frame -p 80 | window | mcep -m 25 -a 0.42 > %s" % (raw_file, mcep_file) subprocess.call(cmd, shell=True) def modify_pitch(m, pitch_file, mcep_file, raw_file): """ピッチを変形して再合成 mが1より大きい => 低い声 mが1より小さい => 高い声""" cmd = "sopr -m %f %s | excite -p 80 | mlsadf -m 25 -a 0.42 -p 80 %s | clip -y -32000 32000 | x2x +fs > %s" % (m, pitch_file, mcep_file, raw_file) subprocess.call(cmd, shell=True) def modify_speed(frame_shift, pitch_file, mcep_file, raw_file): """話速を変形して再合成 frame_shiftが小さい => 早口 frame_shiftが大きい => ゆっくり""" cmd = "excite -p %f %s | mlsadf -m 25 -a 0.42 -p %f %s | clip -y -32000 32000 | x2x +fs > %s" % (frame_shift, pitch_file, frame_shift, mcep_file, raw_file) subprocess.call(cmd, shell=True) def hoarse_voice(pitch_file, mcep_file, raw_file): """ささやき声""" modify_pitch(0, pitch_file, mcep_file, raw_file) def robot_voice(frame_period, record_seconds, mcep_file, raw_file): """ロボット声 frame_periodが小さい => 低い frame_periodが大きい => 高い""" sequence_length = record_seconds * RATE * frame_period cmd = "train -p %d -l %d | mlsadf -m 25 -a 0.42 -p 80 %s | clip -y -32000 32000 | x2x +fs > %s" % (frame_period, sequence_length, mcep_file, raw_file) subprocess.call(cmd, shell=True) def child_voice(pitch_file, mcep_file, raw_file): """子供声""" cmd = "sopr -m 0.4 %s | excite -p 80 | mlsadf -m 25 -a 0.1 -p 80 %s | clip -y -32000 32000 | x2x +fs > %s" % (pitch_file, mcep_file, raw_file) subprocess.call(cmd, shell=True) def deep_voice(pitch_file, mcep_file, raw_file): """太い声""" cmd = "sopr -m 2.0 %s | excite -p 80 | mlsadf -m 25 -a 0.6 -p 80 %s | clip -y -32000 32000 | x2x +fs > %s" % (pitch_file, mcep_file, raw_file) subprocess.call(cmd, shell=True) def raw2wav(raw_file, wav_file): cmd = "sox -e signed-integer -c %d -b 16 -r %d %s %s" % (CHANNELS, RATE, raw_file, wav_file) subprocess.call(cmd, shell=True) def play(raw_file): """rawファイルを再生""" p = pyaudio.PyAudio() stream = p.open(format=p.get_format_from_width(2), channels=CHANNELS, rate=RATE, output=True) f = open(raw_file, "rb") data = f.read(CHUNK) while data != '': stream.write(data) data = f.read(CHUNK) stream.stop_stream() stream.close() p.terminate() if __name__ == "__main__": # 録音時間(固定) record_seconds = 10 p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) pitch_file = "temp.pitch" mcep_file = "temp.mcep" raw_file = "temp.raw" output_file = "output.raw" # オリジナルの音声を録音してrawファイルとして書き出し print "*** Now recording ... (%d sec)" % record_seconds record(raw_file, record_seconds) # パラメータ抽出 print "*** extract pitch ..." extract_pitch(raw_file, pitch_file) print "*** extract mel cepstrum" extract_mcep(raw_file, mcep_file) # パラメータ変形いろいろ print "*** modify parameters ..." # どれか一つしか有効にできない # modify_pitch(0.3, pitch_file, mcep_file, output_file) # modify_speed(300, pitch_file, mcep_file, output_file) # hoarse_voice(pitch_file, mcep_file, output_file) # robot_voice(100, record_seconds, mcep_file, output_file) # child_voice(pitch_file, mcep_file, output_file) deep_voice(pitch_file, mcep_file, output_file) # 変換した音声を再生 print "*** play!" play("output.raw")