MIKU_TALK 0.2.1 のVSQ生成部ソースを公開します。サーバ上のCGIスクリプトとして動作しますが、ローカルPC上で動くように変更するのは、それほど難しくはないと思います。説明なくとも理解できるよう、できるだけ沢山のコメントを入れました。これを参考に更に良いものが世の中に生み出されればと思います。
#!/usr/local/bin/perl
BEGIN { unshift(@INC, "/cgi-bin/lib"); }
########################################################################
#
# Filename : miku_talk021.cgi @infoseek
# Charcode : UTF-8
#
# Released on 26/Apr/2008, Programmed by A.Izumi
#
#
use strict;
use CGI;
use CGI::Carp qw(fatalsToBrowser);
use Jcode;
use MIDI; # MIDI-Perl のインストールを要する。(非標準モジュール)
#
# プロバイダ固有部分:ワークファイルの相対書込み先
#
my $myWorkFolderPath = "work";
#
# プロバイダ固有部分:URL
my $myUrl = "work";
my $myAdv = "";
$CGI::POST_MAX = 1024*1024*100;
#
# ワークファイル書込みフォルダを作成しておく。
mkdir($myWorkFolderPath, 0777);
my $q = new CGI;
my $mySessionId = GetSessionId($q->remote_host);
my $myPostedString = $q->param("myText1");
my $myDebugMode = $q->param("debug");
if( !defined($myDebugMode) ) {
$myDebugMode = 0;
}
# 改行記号が \x0d の場合に \x0d\x0a に正規化する。
$myPostedString =~ s/\x0d\x0a/\x0d/g;
$myPostedString =~ s/\x0d/\x0d\x0a/g;
$myPostedString =~ s/%([0-9a-f][0-9a-f])/pack("C",hex($1))/egi;
$myPostedString = Jcode->new($myPostedString)->utf8;
#
# 入力された形態素のチェック
#
my $maErrorLine = checkMa($myPostedString,$mySessionId);
if( $maErrorLine > 0 ) {
print $q->header(-type=>'text/html; charset=UTF-8');
print $q->start_html(-lang=>'ja',
-charset => 'UTF-8',
-encoding => 'UTF-8',
-title => 'MIKU_TALK Ver. 0.2.0');
print $myAdv, "\n";
print $q->br;
print $q->h1('MIKU_TALK はエラーを検出しました。'),
$q->hr;
print "MIKU_TALK は、".$maErrorLine."行目の形態素を読み取れません。\n";
print $q->br;
exit();
}
if( $myDebugMode != 0 ) {
print $q->header(-type=>'text/html; charset=UTF-8');
print $q->start_html(-lang=>'ja',
-charset => 'UTF-8',
-encoding => 'UTF-8',
-title => 'MIKU_TALK Ver. 0.2.0');
print $myAdv, "\n";
print $q->h1('MIKU_TALK がVSQファイルを生成しました。'),
$q->hr,
$q->br;
}
my $lockdir = "miku_lock";
my $retry = 20; # リトライ回数セット
while (!mkdir($lockdir, 0755)) { # ロック作成。出来なければ待つ
if (--$retry <= 0) {
last;
} # 20秒間ダメならあきらめて強行
sleep(1); # 1秒待つ
}
my $myDebugPath = "./". $myWorkFolderPath. "/". $mySessionId. ".txt";
my $myVsqPath = "./". $myWorkFolderPath. "/". $mySessionId. ".vsq";
my $myVsqUrl = $myUrl. "/". $mySessionId. ".vsq";
WriteMA("ma000.txt",$myPostedString);
$myPostedString = Hankaku2Zenkaku($myPostedString);
WriteMA("ma001.txt",$myPostedString);
$myPostedString = JoinTyouonKigou($myPostedString);
WriteMA("ma002.txt",$myPostedString);
$myPostedString = AddPhonetic($myPostedString);
WriteMA("ma003.txt",$myPostedString);
$myPostedString = AddSeparator($myPostedString);
WriteMA("ma004.txt",$myPostedString);
$myPostedString = ParseSentence($myPostedString);
WriteMA("ma005.txt",$myPostedString);
mecab2VsqGenerate( $myPostedString, $myVsqPath );
my @myStatVsq = ();
@myStatVsq = stat $myVsqPath;
rmdir($lockdir); # ロックの削除
#
# 正常にVSQ生成できた。
#
if( $myDebugMode != 0 ) {
#
# デバッグモードでは、ダウンロードリンクを表示する。
#
my $myVsqCgi = "miku_talk_dl021.cgi";
$myVsqCgi .= "?download_name=". $mySessionId. ".vsq";
$myVsqCgi .= "&source_path=" . $myVsqPath;
$myVsqCgi .= "&debug=" . $myDebugMode;
print $q->a({href=> $myVsqCgi }, "VSQファイルのダウンロード");
print $q->br;
print "↑ ここをクリックしてください。";
print $q->br;
print $q->end_html;
} else {
#
# 正常にVSQ生成できたなら、ダウンロードする。
#
print $q->header(
-content_type => "audio/midi",
-content_disposition => "attachment;filename=". $mySessionId. ".vsq"
);
my $contents;
my @srcStat = stat $myVsqPath;
open(hFile,"< ".$myVsqPath);
binmode hFile;
read hFile,$contents, $srcStat[7] + 1024;
print $contents;
close(hFile);
#
# 元ファイルは消す。
#
unlink $myVsqPath;
}
exit;
########################################################################
#
# サブルーチン名:WriteMA
#
# 第1引数:出力するファイルパス
# 第2引数:出力データ
#
sub WriteMA {
open hFile,">".$_[0];
my @myWords = split(/\n/, $_[1]);
my $i=0;
for($i=0; $i<@myWords-1; $i++) {
print hFile $myWords[$i]."\n";
}
close hFile;
return $_[1];
}
########################################################################
#
# サブルーチン名:Hankaku2Zenkaku
#
# 形態素解析の読みが半角カナならば全角カナに変換する。
#
# Mecab拡張形式入力
# 意味 :変数
#-------------------------------------------------
# 見出し :$midasi
# タブ区切り
# 0,品詞 :$hinsi
# 8,読み :$yomi
#
sub Hankaku2Zenkaku {
my @myWords = split(/\n/, $_[0]);
my $i=0;
for($i=0; $i<@myWords; $i++) {
#
# 現在着目している単語
#
my $thisWord = $myWords[$i];
chomp($thisWord);
$thisWord =~ s/\x0d//g;
my( $midasi, $thisWord19 ) = (split( /\t/, $thisWord ))[0,1];
if( !defined($thisWord19) || length($thisWord19)==0 ) {
$myWords[$i] = sprintf("%s\x0d\n",$thisWord);
} else {
my( $hinsi, $yomi ) = (split( /,/, $thisWord19 ))[0,8];
#
# Mecab対策・・・カタカナ記載は読みを出力しない場合があるので、
# $midasi をそのまま読みとする。
#
if( $hinsi eq "名詞" && $yomi eq "" ) {
$yomi = $midasi;
}
my $yomi_sj = Jcode->new($yomi,'utf8')->sjis;
$yomi = Jcode->new($yomi_sj,'sjis')->h2z->utf8;
$myWords[$i] = sprintf("%s\t%s,,,,,,,,%s\x0d\n", $midasi, $hinsi, $yomi );
}
}
my $result = "";
for($i=0; $i<@myWords; $i++) {
my $thisWord = $myWords[$i];
$result .= $thisWord;
}
return $result;
}
########################################################################
#
# サブルーチン名:JoinTyouonKigou
#
# 単独の長音記号を直前の語に付けて、発音を確定させる。
#
# Mecab拡張形式入力
# 意味 :変数
#-------------------------------------------------
# 見出し :$midasi
# タブ区切り
# 0,品詞 :$hinsi
# 8,読み :$yomi
#
sub JoinTyouonKigou {
my @myWords = split(/\n/, $_[0]);
my $i=0;
for($i=0; $i<@myWords-1; $i++) {
#
# 現在着目している単語の各要素を切り出す。
#
my $thisWord = $myWords[$i];
chomp($thisWord);
$thisWord =~ s/\x0d//g;
my( $midasi, $thisWord19 ) = (split( /\t/, $thisWord ))[0,1];
if( !defined($thisWord19) || length($thisWord19)==0 ) {
$myWords[$i] = sprintf("%s\x0d\n",$thisWord);
} else {
my( $hinsi, $yomi ) = (split( /,/, $thisWord19 ))[0,8];
#
# 次に着目する単語が長音記号ならば、見出しと読みに追加してゆく。
#
my $j=0;
for($j=$i+1; $j<@myWords; $j++) {
my $nextWord = $myWords[$j];
chomp($nextWord);
$nextWord =~ s/\x0d//g;
my( $nextMidasi, $nextWord19 ) = (split( /\t/, $nextWord ))[0,1];
if( length($nextWord19)!=0 ) {
my( $nextHinsi, $nextYomi ) = (split( /,/, $nextWord19 ))[0,8];
if( $nextMidasi eq "ー" || $nextMidasi eq "-" ) {
$midasi .= $nextMidasi;
$yomi .= $nextMidasi;
$myWords[$j] = "";
} else {
last;
}
}
}
$myWords[$i] = sprintf("%s\t%s,,,,,,,,%s\x0d\n", $midasi, $hinsi, $yomi );
}
}
my $result = "";
for($i=0; $i<@myWords; $i++) {
my $thisWord = $myWords[$i];
if( length($thisWord) != 0 ) {
$result .= $thisWord;
}
}
return $result;
}
########################################################################
#
# サブルーチン名:AddPhonetic
#
# 機能:発音を付加する。
#
# Mecab拡張形式入力
# 意味 :変数
#-------------------------------------------------
# 見出し :$midasi
# タブ区切り
# 0,品詞 :$hinsi
# 8,読み :$yomi
#
#
# Mecab拡張形式出力
# 意味 :変数
#-------------------------------------------------
# 見出し :$midasi
# タブ区切り
# 0,品詞 :$hinsi
# 8,読み :$yomi
# ● 9,ミク発音と歌詞 :$lyricPhonetics
# ●10,発音数 :$phoneticCount
#
#
sub AddPhonetic {
my @myWords = split(/\n/, $_[0]);
my %phonetic = (
"あ"=>"a", "い"=>"i", "う"=>"M", "え"=>"e", "お"=>"o",
"ぁ"=>"a", "ぃ"=>"i", "ぅ"=>"M", "ぇ"=>"e", "ぉ"=>"o",
"か"=>"k a","き"=>"k' i","く"=>"k M", "け"=>"k e", "こ"=>"k o",
"さ"=>"s a","し"=>"S i", "す"=>"s M", "せ"=>"s e", "そ"=>"s o",
"た"=>"t a","ち"=>"tS i","つ"=>"ts M", "て"=>"t e", "と"=>"t o",
"な"=>"n a","に"=>"J i", "ぬ"=>"n M", "ね"=>"n e", "の"=>"n o",
"は"=>"h a","ひ"=>"C i", "ふ"=>"p\\ M","へ"=>"h e", "ほ"=>"h o",
"ま"=>"m a","み"=>"m' i","む"=>"m M", "め"=>"m e", "も"=>"m o",
"や"=>"j a", "ゆ"=>"j M", "いぇ"=>"j e","よ"=>"j o",
"ら"=>"4 a","り"=>"4' i","る"=>"4 M", "れ"=>"4 e", "ろ"=>"4 o",
"わ"=>"w a","ゐ"=>"w i",
# "ゑ"=>"w e",
"を"=>"o","うぃ"=>"w i", "うぇ"=>"w e",
"ん"=>"n",
#"んが"=>"N a", "んぎ"=>"N i", "んぐ"=>"N M", "んげ"=>"N e", "んご"=>"N o",
#"んにゃ"=>"N' a","んにぃ"=>"N' i","んにゅ"=>"N' M","んにぇ"=>"N' e","んにょ"=>"N' o",
"が"=>"g a", "ぎ"=>"g' i", "ぐ"=>"g M", "げ"=>"g e", "ご"=>"g o",
"ざ"=>"dz a", "じ"=>"dZ i", "ず"=>"dz M", "ぜ"=>"dz e", "ぞ"=>"dz o",
"だ"=>"d a", "ぢ"=>"dZ i", "づ"=>"dz M", "で"=>"d e", "ど"=>"d o",
"ば"=>"b a", "び"=>"b' i", "ぶ"=>"b M", "べ"=>"b e", "ぼ"=>"b o",
"ぱ"=>"p a", "ぴ"=>"p' i", "ぷ"=>"p M", "ぺ"=>"p e", "ぽ"=>"p o",
"きゃ"=>"k' a","きぃ"=>"k' i","きゅ"=>"k' M","きぇ"=>"k' e","きょ"=>"k' o",
"しゃ"=>"S a", "しぃ"=>"S i", "しゅ"=>"S M", "しぇ"=>"S e", "しょ"=>"S o",
"ちゃ"=>"tS a","ちぃ"=>"tS i","ちゅ"=>"tS M","ちぇ"=>"tS e","ちょ"=>"tS o",
"にゃ"=>"J a", "にぃ"=>"J i", "にゅ"=>"J M", "にぇ"=>"J e", "にょ"=>"J o",
"ひゃ"=>"C a", "ひぃ"=>"C i", "ひゅ"=>"C M", "ひぇ"=>"C e", "ひょ"=>"C o",
"ふゃ"=>"p\\' a","ふぃ"=>"p\\' i","ふゅ"=>"p\\' M","ふぇ"=>"p\\' e","ふょ"=>"p\\' o",
"みゃ"=>"m' a","みぃ"=>"m' i","みゅ"=>"m' M","みぇ"=>"m' e","みょ"=>"m' o",
"りゃ"=>"4' a","りぃ"=>"4' i","りゅ"=>"4' M","りぇ"=>"4' e","りょ"=>"4' o",
"ぎゃ"=>"g' a","ぎぃ"=>"g' i","ぎゅ"=>"g' M","ぎぇ"=>"g' e","ぎょ"=>"g' o",
"じゃ"=>"Z a", "じぃ"=>"Z i", "じゅ"=>"Z M", "じぇ"=>"Z e", "じょ"=>"Z o",
"じゃ"=>"dZ a","じぃ"=>"dZ i","じゅ"=>"dZ M","じぇ"=>"dZ e","じょ"=>"dZ o",
"ぢゃ"=>"dZ a","ぢぃ"=>"dZ i","ぢゅ"=>"dZ M","ぢぇ"=>"dZ e","ぢょ"=>"dZ o",
"びゃ"=>"b' a","びぃ"=>"b' i","びゅ"=>"b' M","びぇ"=>"b' e","びょ"=>"b' o",
"ぴゃ"=>"p' a","ぴぃ"=>"p' i","ぴゅ"=>"p' M","ぴぇ"=>"p' e","ぴょ"=>"p' o",
"ふぁ"=>"p\\ a","ふぃ"=>"p\\ i","ふぅ"=>"p\\ M","ふぇ"=>"p\\ e","ふぉ"=>"p\\ o",
"てゃ"=>"t' a","てぃ"=>"t' i","てゅ"=>"t' M","てぇ"=>"t' e","てょ"=>"t' o",
"でゃ"=>"d' a","でぃ"=>"d' i","でゅ"=>"d' M","でぇ"=>"d' e","でょ"=>"d' o",
"ぁ"=>"h\\ a", "ぃ"=>"h\\ i", "ぅ"=>"h\\ M","ぇ"=>"h\\ e", "ぉ"=>"h\\ o",
"っ"=>"tS ",
"すぃ"=>"s i", "ずぃ"=>"dz i", "とぅ"=>"t M","どぅ"=>"d M",
"ア"=>"a", "イ"=>"i", "ウ"=>"M", "エ"=>"e", "オ"=>"o",
"カ"=>"k a","キ"=>"k' i","ク"=>"k M", "ケ"=>"k e", "コ"=>"k o",
"サ"=>"s a","シ"=>"S i", "ス"=>"s M", "セ"=>"s e", "ソ"=>"s o",
"タ"=>"t a","チ"=>"tS i","ツ"=>"ts M", "テ"=>"t e", "ト"=>"t o",
"ナ"=>"n a","ニ"=>"J i", "ヌ"=>"n M", "ネ"=>"n e", "ノ"=>"n o",
"ハ"=>"h a","ヒ"=>"C i", "フ"=>"p\\ M","ヘ"=>"h e", "ホ"=>"h o",
"マ"=>"m a","ミ"=>"m' i","ム"=>"m M", "メ"=>"m e", "モ"=>"m o",
"ヤ"=>"j a", "ユ"=>"j M", "イェ"=>"j e","ヨ"=>"j o",
"ラ"=>"4 a","リ"=>"4' i","ル"=>"4 M", "レ"=>"4 e", "ロ"=>"4 o",
"ワ"=>"w a","ヰ"=>"w i",
# "ヱ"=>"w e",
"ヲ"=>"o", "ウィ"=>"w i", "ウェ"=>"w e",
"ン" =>"n",
#"ンガ" =>"N a", "ンギ"=>"N i", "ング"=>"N M", "ンゲ"=>"N e", "ンゴ"=>"N o",
#"ンニャ"=>"N' a","ンニィ"=>"N' i","ンニュ"=>"N' M","ンニェ"=>"N' e","ンニョ"=>"N' o",
"ガ"=>"g a", "ギ"=>"g' i", "グ"=>"g M", "ゲ"=>"g e", "ゴ"=>"g o",
"ザ"=>"dz a", "ジ"=>"dZ i", "ズ"=>"dz M", "ゼ"=>"dz e", "ゾ"=>"dz o",
"ダ"=>"d a", "ヂ"=>"dZ i", "ヅ"=>"dz M", "デ"=>"d e", "ド"=>"d o",
"バ"=>"b a", "ビ"=>"b' i", "ブ"=>"b M", "ベ"=>"b e", "ボ"=>"b o",
"パ"=>"p a", "ピ"=>"p' i", "プ"=>"p M", "ペ"=>"p e", "ポ"=>"p o",
# "ヴ"=>""p M",
"キャ"=>"k' a","キィ"=>"k' i","キュ"=>"k' M","キェ"=>"k' e","キョ"=>"k' o",
"シャ"=>"S a", "シィ"=>"S i", "シュ"=>"S M", "シェ"=>"S e", "ショ"=>"S o",
"チャ"=>"tS a","チィ"=>"tS i","チュ"=>"tS M","チェ"=>"tS e","チョ"=>"tS o",
"ニャ"=>"J a", "ニィ"=>"J i", "ニュ"=>"J M", "ニェ"=>"J e", "ニョ"=>"J o",
"ヒャ"=>"C a", "ヒィ"=>"C i", "ヒュ"=>"C M", "ヒェ"=>"C e", "ヒョ"=>"C o",
"フャ"=>"p\\' a","フィ"=>"p\\' i","フュ"=>"p\\' M","フェ"=>"p\\' e","フョ"=>"p\\' o",
"ミャ"=>"m' a", "ミィ"=>"m' i", "ミュ"=>"m' M", "ミェ"=>"m' e", "ミョ"=>"m' o",
"リャ"=>"4' a", "リィ"=>"4' i", "リュ"=>"4' M", "リェ"=>"4' e", "リョ"=>"4' o",
"ギャ"=>"g' a","ギィ"=>"g' i","ギュ"=>"g' M","ギェ"=>"g' e","ギョ"=>"g' o",
"ジャ"=>"Z a", "ジィ"=>"Z i", "ジュ"=>"Z M", "ジェ"=>"Z e", "ジョ"=>"Z o",
"ジャ"=>"dZ a","ジィ"=>"dZ i","ジュ"=>"dZ M","ジェ"=>"dZ e","ジョ"=>"dZ o",
"ヂャ"=>"dZ a","ヂィ"=>"dZ i","ヂュ"=>"dZ M","ビェ"=>"dZ e","ビョ"=>"dZ o",
"ビャ"=>"b' a","ビィ"=>"b' i","ビュ"=>"b' M","ビェ"=>"b' e","ビョ"=>"b' o",
"ピャ"=>"p' a","ピィ"=>"p' i","ピュ"=>"p' M","ピェ"=>"p' e","ピョ"=>"p' o",
"ファ"=>"p\\ a","フィ"=>"p\\ i","フゥ"=>"p\\ M","フェ"=>"p\\ e","フォ"=>"p\\ o",
"テャ"=>"t' a", "ティ"=>"t' i", "テゥ"=>"t' M", "テェ"=>"t' e", "テォ"=>"t' o",
"デャ"=>"d' a", "ディ"=>"d' i", "デゥ"=>"d' M","デェ"=>"d' e", "デォ"=>"d' o",
"ァ"=>"h\\ a", "ィ"=>"h\\ i", "ゥ"=>"h\\ M", "ェ"=>"h\\ e", "ォ"=>"h\\ o",
"ッ"=>"tS ",
"スィ"=>"s i", "ズィ"=>"dz i", "トゥ"=>"t M","ドゥ"=>"d M",
","=>"br1","、"=>"br1", "。"=>"br2","!"=>"br2","?"=>"br2",
"("=>"br4","『"=>"br4","「"=>"br4"
);
my $i=0;
for($i=0; $i<@myWords; $i++) {
#
# 現在着目している単語
#
my $thisWord = $myWords[$i];
chomp($thisWord);
$thisWord =~ s/\x0d//g;
my( $midasi, $thisWord19 ) = (split( /\t/, $thisWord ))[0,1];
if( !defined($thisWord19) || length($thisWord19)==0 ) {
$myWords[$i] = sprintf("%s\x0d\n",$thisWord);
} else {
my( $hinsi, $yomi ) = (split( /,/, $thisWord19 ))[0,8];
my $lyricPhonetics = ""; # 歌詞と発音の構造体
my $phoneticCount = 0;
#
# 読みに対応したミクの歌詞と発音記号の構造体を得る
#
my $idxYomi;
for($idxYomi=0; $idxYomi<(length($yomi)-1);) {
#
# UTF8 の3文字(9bytes)→2文字→1文字の優先順で
# カタカナとミクの発音テーブルから、発音記号を取得する。
#
my $myLyric = "";
my $myPhonetic = "";
my $myLyricLen = 0;
for($myLyricLen=9; $myLyricLen>=3; $myLyricLen-=3) {
$myLyric = substr($yomi,$idxYomi,$myLyricLen);
if( exists($phonetic{$myLyric}) ) {
$myPhonetic = $phonetic{$myLyric};
$phoneticCount++;
last;
} elsif( $myLyricLen<=3 ) {
$myPhonetic = "";
last;
}
}
$idxYomi += $myLyricLen;
#
# 発音の直後が長音記号「ー」「-」のときは歌詞に長音記号を付加する。
#
if( $myPhonetic ne "" ) {
for(;;) {
my $myLyric_appendix = substr($yomi,$idxYomi,3);
if( $myLyric_appendix ne "ー" && $myLyric_appendix ne "-" ) {
last;
}
$phoneticCount++;
$myLyric .= $myLyric_appendix;
$idxYomi += 3;
}
$lyricPhonetics .= $myLyric.":".$myPhonetic.";";
} else {
$lyricPhonetics .= ":;";
}
}
$myWords[$i] = sprintf("%s\t%s,,,,,,,,%s,%s,%d\x0d\n",
$midasi, $hinsi, $yomi, $lyricPhonetics, $phoneticCount );
}
}
my $result = "";
for($i=0; $i<@myWords; $i++) {
my $thisWord = $myWords[$i];
$result .= $thisWord;
}
return $result;
}
########################################################################
#
# サブルーチン名:AddSeparator
#
# 区切りを入れる。
#
# 発音区切 "W575", 息継区切 "BR", 文末区切 "EOS"
# 区切りの場合にはタブ等は無く、そのまま改行記号が入る。
#
sub AddSeparator {
my @myWords = split(/\n/, $_[0]);
my $phoneticCountFromW575 = 0;
my $myHinsiLast = "";
my $i=0;
for($i=0; $i<@myWords; $i++) {
#
# 現在着目している単語の各要素を切り出す。
#
my $thisWord = $myWords[$i];
chomp($thisWord);
$thisWord =~ s/\x0d//g;
my( $midasi, $thisWord19 ) = (split( /\t/, $thisWord ))[0,1];
if( !defined($thisWord19) || length($thisWord19) == 0 ) {
#
# 区切りそのものだった場合は次の語の判断に入る
#
$myHinsiLast = "";
$phoneticCountFromW575 = 0;
next;
}
if( $i == @myWords - 1) {
#
# 終端の単語は常に文末とする。
#
$myWords[$i] = sprintf("%s\x0d\n%s\x0d\n", $thisWord, "EOS" );
next;
}
my( $hinsi, $yomi, $lyricphonetics, $phoneticCount )
= (split( /,/, $thisWord19 ))[0,8,9,10];
$phoneticCountFromW575 += $phoneticCount;
my $nextWord = $myWords[$i+1];
chomp($nextWord);
$nextWord =~ s/\x0d//g;
my( $nextMidasi, $nextWord19 ) = (split( /\t/, $nextWord ))[0,1];
if( (!defined($nextWord19) || length($nextWord19) == 0)
&& $nextMidasi eq "EOS" ) {
#
# 既にMecabが文末 "EOS" を判定したら、新たな判定は不要。
#
next;
}
my( $nextHinsi ) = (split( /,/, $nextWord19 ))[0];
my $mySeparator = "";
if( $midasi eq "。" || $midasi eq "!" || $midasi eq "?" ) {
$mySeparator = "EOS";
} elsif( 0 <= index($lyricphonetics,"br") ) {
$mySeparator = "BR";
}
if( length($mySeparator) == 0 ) {
if( $phoneticCountFromW575 < 5 ) {
#
# 何もしない。
#
;
} elsif( 5 <= $phoneticCountFromW575 && $phoneticCountFromW575 <= 7 ) {
#
# 5音節~7音節を発音し、かつ文節の頭となる単語だったら、そこで区切って発音する。
#
if(($hinsi ne "名詞" && $hinsi ne "形容詞" && $hinsi ne "形容動詞"
&& $hinsi ne "接頭詞" && $hinsi ne "感動詞" && $hinsi ne "接頭辞"
&& $hinsi ne "副詞" )
&& ($nextHinsi eq "名詞" || $nextHinsi eq "形容詞" || $nextHinsi eq "形容動詞"
|| $nextHinsi eq "接頭詞" || $nextHinsi eq "感動詞" || $nextHinsi eq "接頭辞"
|| $nextHinsi eq "副詞" ) ) {
$mySeparator = "W575";
}
} else {
#
# 発音があまりに長く8音節以上継続する場合には、次が名詞等(動詞を含む)ならば区切る。
#
if(($nextHinsi eq "名詞" || $nextHinsi eq "形容詞" || $nextHinsi eq "形容動詞"
|| $nextHinsi eq "接頭詞" || $nextHinsi eq "感動詞" || $nextHinsi eq "接頭辞"
|| $nextHinsi eq "副詞" || $nextHinsi eq "動詞" ) ) {
$mySeparator = "W575";
}
}
}
$myHinsiLast = $hinsi;
$myWords[$i] = sprintf("%s\x0d\n", $thisWord );
if( defined($mySeparator) && length($mySeparator) != 0 ) {
$phoneticCountFromW575 = 0;
$myHinsiLast = "";
$myWords[$i] .= sprintf("%s\x0d\n", $mySeparator );
}
}
#
#
#
my $result = "";
for($i=0; $i<@myWords; $i++) {
my $thisWord = $myWords[$i];
if( length($thisWord) != 0 ) {
$result .= $thisWord;
}
}
return $result;
}
########################################################################
#
# サブルーチン名:ParseSentence
#
# 文章全体を解析する。
#
# Mecab拡張形式入力
#
# 意味 :変数
#-------------------------------------------------
# 見出し :$midasi
# タブ区切り
# 0,品詞 :$hinsi
# 8,読み :$yomi
# 9,ミク発音 :$lyricPhonetics
# 10,発音数 :$phoneticCount
#
# Mecab拡張形式出力(●部分)
#
# 意味 :変数
#-------------------------------------------------
# 見出し :$midasi
# タブ区切り
# 0,品詞 :$hinsi
# 8,読み :$yomi
# 9,ミク発音 :$lyricPhonetics
# 10,発音数 :$phoneticCount
# ●11,文章終端までの形態素数 :$eosCount
# ●12,息継ぎまでの形態素数 :$brCount
# ●13,575区切りまでの形態素数:$w575Count
#
#
sub ParseSentence {
my @myWords = split(/\n/, $_[0]);
my $myEosCount = 0;
my $myBrCount = 0;
my $myW575Count = 0;
my $i=0;
for($i=@myWords-1; $i>=0; $i--) {
#
# 現在着目している単語
#
my $thisWord = $myWords[$i];
chomp($thisWord);
$thisWord =~ s/\x0d//g;
my( $midasi, $thisWord19 ) = (split( /\t/, $thisWord ))[0,1];
if( !defined($thisWord19) || length($thisWord19)==0 ) {
if( $midasi eq "EOS" ) {
$myEosCount = 0;
$myBrCount = 0;
$myW575Count = 0;
} elsif( $midasi eq "BR" ) {
$myBrCount = 0;
$myW575Count = 0;
} elsif( $midasi eq "W575" ) {
$myW575Count = 0;
}
$myWords[$i] = sprintf("%s\x0d\n",$thisWord);
} else {
my( $hinsi, $yomi, $lyricphonetics, $phoneticCount )
= (split( /,/, $thisWord19 ))[0,8,9,10];
$myWords[$i] = sprintf("%s,%d,%d,%d\x0d\n",
$thisWord,$myEosCount,$myBrCount,$myW575Count );
if( $phoneticCount != 0 && index($lyricphonetics,"br") < 0 ) {
$myEosCount++;
$myBrCount++;
$myW575Count++;
}
}
}
my $result = "";
for($i=0; $i<@myWords; $i++) {
my $thisWord = $myWords[$i];
$result .= $thisWord;
}
return $result;
}
########################################################################
#
# 現在のセッションを識別する文字列を生成する(年月日時分妙から生成)
# なお、hack されにくくするため末尾に8桁の乱数を付加する。
#
sub GetSessionId {
# my $ss,$mn,$hh, $dd, $mm, $yy, $yb;
my ($ss, $mn, $hh, $dd, $mm, $yy, $yb) = localtime(time);
$yy += 1900;
$mm++;
my $mySessionId = sprintf("%04d%02d%02d%02d%02d%02d%08d",
$yy, $mm, $dd, $hh, $mn, $ss, int(rand(99999999)));
return $mySessionId;
}
########################################################################
#
# 形態素の正当性チェック:行が 'EOS' または TAB を含めば正当とする。
# 正当ならば 0、不当ならばエラー行番号を返すこと。
#
sub checkMa {
my @myWords = split(/\n/, $_[0]);
my $mySessionId = $_[1];
my $lineNumber = 1;
my $thisWord = "";
foreach $thisWord (@myWords) {
chomp($thisWord);
if( substr($thisWord,0,3) eq "EOS" ) {
next;
}
my( $midasi, $thisWord2 ) = (split(/\t/, $thisWord))[0,1];
if( length($thisWord2) == 0 ) {
return $lineNumber;
}
$lineNumber++;
}
return 0;
}
########################################################################
#
# サブルーチン名:mecab2VsqGenerate
#
# 機能:Mecab 拡張形式からVSQファイルを生成する
#
# 第1引数:Mecabの出力
# 第2引数:出力VSQファイル名
#
# Mecab拡張形式入力
#
# 意味 :変数
#-------------------------------------------------
# 見出し :$midasi
# タブ区切り
# 0,品詞 :$hinsi
# 8,読み :$yomi
# 9,ミク発音 :$lyricPhonetics
# 10,発音数 :$phoneticCount
# 11,文章終端までの形態素数 :$eosCount
# 12,息継ぎまでの形態素数 :$brCount
# 13,575区切りまでの形態素数:$w575Count
#
#
sub mecab2VsqGenerate {
my @myWords = split(/\n/, $_[0]);
my $myVsqFile = $_[1];
#
# マスタートラックのイベント定義。
#
my @masterEvents = (
['track_name', 0, 'Master Track'],
['set_tempo', 0, 500_000], # 1qn = .50 seconds
['time_signature',0,4,2,24,8 ],
['end_track' , 0],
);
#
# ミクの発音用テキストに於ける初期値の設定。
#
my @myCommonSection =();
push @myCommonSection, "[Common]";
push @myCommonSection, "Version=DSB301";
push @myCommonSection, "Name=Voice1";
push @myCommonSection, "Color=181,162,123";
push @myCommonSection, "DynamicsMode=1";
push @myCommonSection, "PlayMode=1";
my @myMasterSection =();
push @myMasterSection, "[Master]";
push @myMasterSection, "PreMeasure=4";
my @myMixerSection =();
push @myMixerSection, "[Mixer]";
push @myMixerSection, "MasterFeder=0";
push @myMixerSection, "MasterPanpot=0";
push @myMixerSection, "MasterMute=0";
push @myMixerSection, "OutputMode=0";
push @myMixerSection, "Tracks=1";
push @myMixerSection, "Feder0=0";
push @myMixerSection, "Panpot0=0";
push @myMixerSection, "Mute0=0";
push @myMixerSection, "Solo0=0";
my @myEventListSection =();
push @myEventListSection, "[EventList]";
push @myEventListSection, "0=ID#0000";
my @myIDSection =();
push @myIDSection, "[ID#0000]";
push @myIDSection, "Type=Singer";
push @myIDSection, "IconHandle=h#0000";
my @myHandleSection =();
push @myHandleSection, "[h#0000]";
push @myHandleSection, "IconID=\$07010000";
push @myHandleSection, "IDS=Miku";
push @myHandleSection, "Original=0";
push @myHandleSection, "Caption=";
push @myHandleSection, "Length=1";
push @myHandleSection, "Language=0";
push @myHandleSection, "Program=0";
my @myDynamicsBPListSection = ();
push @myDynamicsBPListSection, "[DynamicsBPList]";
my @myPitchBendBPListSection = ();
push @myPitchBendBPListSection, "[PitchBendBPList]";
my @myPitchBendSensBPListSection = ();
push @myPitchBendSensBPListSection, "[PitchBendSensBPList]";
push @myPitchBendSensBPListSection, "7680=6";
my @mikuMidiEvents = ();
push @mikuMidiEvents,['control_change' , 0, 0, 99, 96];
#
# MIDIイベントを生成する。
#
my $myPhoneticTotalCount = 0; # 発音数のカウント
my $pitch_normal = 120; # 通常の発音期間 [mSEC]
my $pitch_important = 180; # 重要語の発音期間 [mSEC]
my $pitch_bress = 240; # 息継ぎの発音期間 [mSEC]
my $note = 64; # ノート(音の高さ)
my $myIdPos = 7680; # 発音タイミング [mSEC]
my $myIdPosLast = $myIdPos; # 直前の発音タイミング
my $myHinsiLast = ""; # 直前の品詞
my $myPitchBendInit = 2400; # ピッチベンドの初期値
my $myPitchBend = $myPitchBendInit; # ピッチベンドのベース
my $myPitchBendHinsi = 0; # 品詞毎のピッチベンドの差分
my $myPitchBendDelta = 0; # ピッチベンド変化量の初期値
my $myLyricCount = 0; # 仮名文字数(息継ぎのため)
my $myIsBress = 0; # ブレスを使ったか
my $myProminence = 1; # 発音のプロミネンスか?
my $lastSeparator = "";
my $nextSeparator = "";
my $i=0;
for($i=0; $i<@myWords; $i++) {
my $thisWord = $myWords[$i];
chomp($thisWord);
$thisWord =~ s/\x0d//g;
my( $midasi, $thisWord19 ) = (split( /\t/, $thisWord ))[0,1];
if( !defined($thisWord19) || length($thisWord19) == 0 ) {
$lastSeparator = $midasi;
if( $lastSeparator eq "EOS" ) {
$myProminence = 1; # 発音のプロミネンスか?
$myIdPos += 240; # ブレスと文末に必要な間隔
$myPitchBend = $myPitchBendInit; # ピッチベンドの初期値。
} elsif( $lastSeparator eq "BR" ) {
$myProminence = 1; # 発音のプロミネンスか?
$myIdPos += 120; # ブレスに必要な間隔
$myPitchBend = $myPitchBendInit; # ピッチベンドの初期値。
} elsif( $lastSeparator eq "W575" ) {
$myProminence = 1; # 発音のプロミネンスか?
$myIdPos += 60; # 575区切りに必要な間隔
if( $myPitchBend < -3600 ) {
$myPitchBend = 0; # ピッチベンドの初期値。
}
}
next;
}
my( $hinsi,
$yomi,
$lyricPhonetics,
$phoneticCount,
$eosCount,
$brCount,
$w575Count ) = (split( /,/, $thisWord19 ))[0,8,9,10,11,12,13];
my $nextWord = $myWords[$i+1];
chomp($nextWord);
$nextWord =~ s/\x0d//g;
my( $nextMidasi, $nextWord19 ) = (split( /\t/, $nextWord ))[0,1];
if( !defined($nextWord19) || length($nextWord19) == 0 ) {
$nextSeparator = $nextMidasi;
} else {
$nextSeparator = "";
}
#
# 記号の区切りであったとき。
#
if( $nextSeparator eq "BR" || $nextSeparator eq "EOS" ){
#
# 無音期間を設ける
#
$myIdPos += 120; # ブレスに必要な間隔
$myPitchBend = $myPitchBendInit; # ピッチベンドの初期値。
}
if( $midasi eq "「" || $midasi eq "『" || $midasi eq "(" ) {
if( $note < 70 ) { # E5 の音階は88
$note += 1;
}
}
if( $midasi eq "」" || $midasi eq "』" || $midasi eq ")" ) {
if( 64 < $note ) { # A3 の音階は69
$note -= 1;
}
}
#
# 特殊(記号)でかつ発音がなければ、1文字分の期間をおく。
#
if(($hinsi eq "特殊" || $hinsi eq "記号")
&& $phoneticCount == 0 ) {
$myIdPos += 120;
next;
}
#
# 品詞ごとの音量を規定する。
#
if( $hinsi eq "名詞" ) {
push @myDynamicsBPListSection, ($myIdPos)."=80";
} elsif( $hinsi eq "形容詞" || $hinsi eq "形容動詞"
|| $hinsi eq "接頭詞" || $hinsi eq "感動詞" || $hinsi eq "接頭辞"
|| $hinsi eq "副詞" ) {
push @myDynamicsBPListSection, ($myIdPos)."=72";
} else {
push @myDynamicsBPListSection, ($myIdPos)."=64";
}
#
# 読みに対応したミクの発音記号を得る。
#
my $lyricCount = 0;
my @eachLyricPhonetics = split( /;/, $lyricPhonetics );
my $thisLyricPhonetic = "";
foreach $thisLyricPhonetic (@eachLyricPhonetics) {
my ($thisLyric, $thisPhonetic ) = split(/:/,$thisLyricPhonetic);
my $myLength = 120;
#
# 文末の「。?!」は長く息継ぎする。
if( $nextSeparator = "EOS" && substr($thisPhonetic,0,2) eq "br" ) {
$myLength = 240;
}
#
# 長音記号があったならば、その分は長く発音する。
#
if( 0 < index($thisLyric,"ー",0) ) {
$myLength *= (length($thisLyric) - index($thisLyric,"ー",0))/3 + 1;
$thisLyric = substr($thisLyric,0,index($thisLyric,"ー",0));
} elsif( 0 < index($thisLyric,"-",0) ) {
$myLength *= (length($thisLyric) - index($thisLyric,"-",0))/3 + 1;
$thisLyric = substr($thisLyric,0,index($thisLyric,"-",0));
}
#
# 語尾ならば、ピッチを下げて発音する。
#
if( $eosCount == 0 ) {
$myPitchBendDelta = -400;
$myPitchBendHinsi = -3600;
if( $nextMidasi eq "?" && $lyricCount == $phoneticCount - 1) {
#
# 疑問文のときは語尾を上げる。
#
$myPitchBendDelta = +400;
}
} elsif( $brCount == 0 ) {
#
# もし、ブレス直前だったならばトーンを落とす。
#
$myPitchBendHinsi = -3600;
$myPitchBendDelta = -400;
} else {
#
# もし、以下の品詞だったならば、自然な音節の下降を付与する。
#
if( $hinsi eq "名詞" || $hinsi eq "形容詞" || $hinsi eq "形容動詞"
|| $hinsi eq "接頭詞" || $hinsi eq "感動詞" || $hinsi eq "接頭辞"
|| $hinsi eq "副詞" ) {
$myPitchBendDelta = -200;
$myPitchBendHinsi = 0;
if( $hinsi eq "名詞" && $lyricCount == 0 && 2 < $phoneticCount && $myLength == 120) {
$myPitchBendDelta = +400;
}
} else {
$myPitchBendDelta = -200;
$myPitchBendHinsi = -1200;
if( $w575Count == 0 ) {
$myPitchBendHinsi = -2400;
}
}
}
if( $myProminence == 1 ) {
$myPitchBendDelta = +400;
$myProminence = 0;
}
if( $thisPhonetic ne "" ) {
push @myEventListSection, sprintf("%d=ID#%04d",$myIdPos,$myPhoneticTotalCount+1);
push @myIDSection, sprintf("[ID#%04d]",$myPhoneticTotalCount+1);
push @myIDSection, "Type=Anote";
push @myIDSection, "Length=".$myLength;
push @myIDSection, "Note#=".$note;
# if( $myPhoneticTotalCount == 0 ) {
#
# か行、が行、さ行、ざ行、「つ」はベロシティを大きく(子音を短く)
#
my @phonetic_array = split(/ /, $thisPhonetic);
my $myConsonant = $phonetic_array[0];
if( $myConsonant eq "k" || $myConsonant eq "g"
|| $myConsonant eq "s" || $myConsonant eq "dz" || $myConsonant eq "dZ"
|| $thisPhonetic eq "ts M") {
push @myIDSection, "Dynamics=80";
} elsif( $myConsonant eq "4" ) {
#
# ら行はベロシティを小さく(子音を長く)
#
push @myIDSection, "Dynamics=20";
} else {
push @myIDSection, "Dynamics=50";
}
push @myIDSection, "PMBendDepth=6"; #20";
push @myIDSection, "PMBendLength=0";
push @myIDSection, "PMbPortamentoUse=0";
push @myIDSection, "DEMdecGainRate=0"; #50";
push @myIDSection, "DEMaccent=4"; #50";
# }
push @myIDSection, sprintf("LyricHandle=h#%04d", $myPhoneticTotalCount+1);
push @myHandleSection, sprintf("[h#%04d]", $myPhoneticTotalCount+1);
push @myHandleSection, "L0=\"". Jcode->new($thisLyric,'utf8')->sjis
."\",\"". $thisPhonetic."\",1.000000,64,0,0";
if( substr($thisPhonetic,0,2) eq "br" ) {
$myPitchBend = $myPitchBendInit; # ピッチベンドの初期値
} else {
my $myLocalId = 0;
for($myLocalId = 0; $myLocalId<$myLength; $myLocalId += 120 ) {
$myPitchBend += $myPitchBendDelta*2;
if( $myPitchBend < -8192 ) {
$myPitchBend = -8192;
} elsif( 8191 < $myPitchBend ) {
$myPitchBend = 8191;
}
my $myPitchBendOut = $myPitchBend + $myPitchBendHinsi;
if( $myPitchBendOut < -8192 ) {
$myPitchBendOut = -8192;
} elsif( 8191 < $myPitchBendOut ) {
$myPitchBendOut = 8191;
}
push @myPitchBendBPListSection, ($myIdPos+$myLocalId)."=".$myPitchBendOut;
}
}
$myIdPos += $myLength;
if( $myPhoneticTotalCount == 0 ) {
push @mikuMidiEvents,['control_change', 5760, 0, 99, 80];
} else {
push @mikuMidiEvents,['control_change', $myIdPos - $myIdPosLast, 0, 99, 80];
}
$myPhoneticTotalCount++;
} else {
# 発音できない記号は、単に時間間隔を空ける。
$myIdPos += $myLength;
}
$myIdPosLast = $myIdPos;
$lyricCount++;
}
$myHinsiLast = $hinsi;
}
push @mikuMidiEvents,['end_track' , $pitch_normal+1920];
push @myEventListSection, sprintf("%d=EOS",$myIdPos);
#
# ミク発音用メタデータのセクションを結合する
#
my $my_miku_phonetics="";
my $phonetic = "";
foreach $phonetic (@myCommonSection) {
$my_miku_phonetics = $my_miku_phonetics.$phonetic."\n";
}
foreach $phonetic (@myMasterSection) {
$my_miku_phonetics = $my_miku_phonetics.$phonetic."\n";
}
foreach $phonetic (@myMixerSection) {
$my_miku_phonetics = $my_miku_phonetics.$phonetic."\n";
}
foreach $phonetic (@myEventListSection) {
$my_miku_phonetics = $my_miku_phonetics.$phonetic."\n";
}
foreach $phonetic (@myIDSection) {
$my_miku_phonetics = $my_miku_phonetics.$phonetic."\n";
}
foreach $phonetic (@myHandleSection) {
$my_miku_phonetics = $my_miku_phonetics.$phonetic."\n";
}
foreach $phonetic (@myDynamicsBPListSection) {
$my_miku_phonetics = $my_miku_phonetics.$phonetic."\n";
}
foreach $phonetic (@myPitchBendBPListSection) {
$my_miku_phonetics = $my_miku_phonetics.$phonetic."\n";
}
foreach $phonetic (@myPitchBendSensBPListSection) {
$my_miku_phonetics = $my_miku_phonetics.$phonetic."\n";
}
#
# Voice1トラックのイベントを生成する。
#
my @voice1Events = (
['track_name', 0, 'Voice1'],
);
for($i=0; $i*119<length($my_miku_phonetics); $i++) {
push @voice1Events, ['text_event', 0,
sprintf("DM:%04d:",$i).substr($my_miku_phonetics,$i*119,119) ];
}
my $mikuMidi = "";
foreach $mikuMidi (@mikuMidiEvents) {
push @voice1Events,$mikuMidi;
}
#
# マスタートラックとVoice1トラックからVSQファイルを生成する。
#
my $master_track = MIDI::Track->new({ 'events' => \@masterEvents });
my $voice1_track = MIDI::Track->new({ 'events' => \@voice1Events });
my $opus = MIDI::Opus->new(
{ 'format' => 1, 'ticks' => 480, 'tracks' => [ $master_track, $voice1_track ] } );
$opus->write_to_file( $myVsqFile );
}
なお、サーバー上で同プログラムを動かすためには、サーバー上に MIDI-Perl
のファイル、CGIのモジュール及びJcodeのモジュールが必要です。infoseek はデフォルト状態では MIDI-Perl
が存在しませんので、以下のファイルをサーバ上に転送しました。
最近のコメント