MIKU_TALK 0.2.1 のVSQ生成部ソースを公開します。サーバ上のCGIスクリプトとして動作しますが、ローカルPC上で動くように変更するのは、それほど難しくはないと思います。説明なくとも理解できるよう、できるだけ沢山のコメントを入れました。これを参考に更に良いものが世の中に生み出されればと思います。
#!/usr/local/bin/perl
BEGIN { unshift(@INC, "/cgi-bin/lib"); }
########################################################################
#
# Filename : miku_talk021.cgi @infoseek
# Charcode : UTF-8
#
# Released on 26/Apr/2008, Programmed by A.Izumi
#
#
use strict;
use CGI;
use CGI::Carp qw(fatalsToBrowser);
use Jcode;
use MIDI; # MIDI-Perl のインストールを要する。(非標準モジュール)
#
# プロバイダ固有部分:ワークファイルの相対書込み先
#
my $myWorkFolderPath = "work";
#
# プロバイダ固有部分:URL
my $myUrl = "work";
my $myAdv = "";
$CGI::POST_MAX = 1024*1024*100;
#
# ワークファイル書込みフォルダを作成しておく。
mkdir($myWorkFolderPath, 0777);
my $q = new CGI;
my $mySessionId = GetSessionId($q->remote_host);
my $myPostedString = $q->param("myText1");
my $myDebugMode = $q->param("debug");
if( !defined($myDebugMode) ) {
$myDebugMode = 0;
}
# 改行記号が \x0d の場合に \x0d\x0a に正規化する。
$myPostedString =~ s/\x0d\x0a/\x0d/g;
$myPostedString =~ s/\x0d/\x0d\x0a/g;
$myPostedString =~ s/%([0-9a-f][0-9a-f])/pack("C",hex($1))/egi;
$myPostedString = Jcode->new($myPostedString)->utf8;
#
# 入力された形態素のチェック
#
my $maErrorLine = checkMa($myPostedString,$mySessionId);
if( $maErrorLine > 0 ) {
print $q->header(-type=>'text/html; charset=UTF-8');
print $q->start_html(-lang=>'ja',
-charset => 'UTF-8',
-encoding => 'UTF-8',
-title => 'MIKU_TALK Ver. 0.2.0');
print $myAdv, "\n";
print $q->br;
print $q->h1('MIKU_TALK はエラーを検出しました。'),
$q->hr;
print "MIKU_TALK は、".$maErrorLine."行目の形態素を読み取れません。\n";
print $q->br;
exit();
}
if( $myDebugMode != 0 ) {
print $q->header(-type=>'text/html; charset=UTF-8');
print $q->start_html(-lang=>'ja',
-charset => 'UTF-8',
-encoding => 'UTF-8',
-title => 'MIKU_TALK Ver. 0.2.0');
print $myAdv, "\n";
print $q->h1('MIKU_TALK がVSQファイルを生成しました。'),
$q->hr,
$q->br;
}
my $lockdir = "miku_lock";
my $retry = 20; # リトライ回数セット
while (!mkdir($lockdir, 0755)) { # ロック作成。出来なければ待つ
if (--$retry <= 0) {
last;
} # 20秒間ダメならあきらめて強行
sleep(1); # 1秒待つ
}
my $myDebugPath = "./". $myWorkFolderPath. "/". $mySessionId. ".txt";
my $myVsqPath = "./". $myWorkFolderPath. "/". $mySessionId. ".vsq";
my $myVsqUrl = $myUrl. "/". $mySessionId. ".vsq";
WriteMA("ma000.txt",$myPostedString);
$myPostedString = Hankaku2Zenkaku($myPostedString);
WriteMA("ma001.txt",$myPostedString);
$myPostedString = JoinTyouonKigou($myPostedString);
WriteMA("ma002.txt",$myPostedString);
$myPostedString = AddPhonetic($myPostedString);
WriteMA("ma003.txt",$myPostedString);
$myPostedString = AddSeparator($myPostedString);
WriteMA("ma004.txt",$myPostedString);
$myPostedString = ParseSentence($myPostedString);
WriteMA("ma005.txt",$myPostedString);
mecab2VsqGenerate( $myPostedString, $myVsqPath );
my @myStatVsq = ();
@myStatVsq = stat $myVsqPath;
rmdir($lockdir); # ロックの削除
#
# 正常にVSQ生成できた。
#
if( $myDebugMode != 0 ) {
#
# デバッグモードでは、ダウンロードリンクを表示する。
#
my $myVsqCgi = "miku_talk_dl021.cgi";
$myVsqCgi .= "?download_name=". $mySessionId. ".vsq";
$myVsqCgi .= "&source_path=" . $myVsqPath;
$myVsqCgi .= "&debug=" . $myDebugMode;
print $q->a({href=> $myVsqCgi }, "VSQファイルのダウンロード");
print $q->br;
print "↑ ここをクリックしてください。";
print $q->br;
print $q->end_html;
} else {
#
# 正常にVSQ生成できたなら、ダウンロードする。
#
print $q->header(
-content_type => "audio/midi",
-content_disposition => "attachment;filename=". $mySessionId. ".vsq"
);
my $contents;
my @srcStat = stat $myVsqPath;
open(hFile,"< ".$myVsqPath);
binmode hFile;
read hFile,$contents, $srcStat[7] + 1024;
print $contents;
close(hFile);
#
# 元ファイルは消す。
#
unlink $myVsqPath;
}
exit;
########################################################################
#
# サブルーチン名:WriteMA
#
# 第1引数:出力するファイルパス
# 第2引数:出力データ
#
sub WriteMA {
open hFile,">".$_[0];
my @myWords = split(/\n/, $_[1]);
my $i=0;
for($i=0; $i<@myWords-1; $i++) {
print hFile $myWords[$i]."\n";
}
close hFile;
return $_[1];
}
########################################################################
#
# サブルーチン名:Hankaku2Zenkaku
#
# 形態素解析の読みが半角カナならば全角カナに変換する。
#
# Mecab拡張形式入力
# 意味 :変数
#-------------------------------------------------
# 見出し :$midasi
# タブ区切り
# 0,品詞 :$hinsi
# 8,読み :$yomi
#
sub Hankaku2Zenkaku {
my @myWords = split(/\n/, $_[0]);
my $i=0;
for($i=0; $i<@myWords; $i++) {
#
# 現在着目している単語
#
my $thisWord = $myWords[$i];
chomp($thisWord);
$thisWord =~ s/\x0d//g;
my( $midasi, $thisWord19 ) = (split( /\t/, $thisWord ))[0,1];
if( !defined($thisWord19) || length($thisWord19)==0 ) {
$myWords[$i] = sprintf("%s\x0d\n",$thisWord);
} else {
my( $hinsi, $yomi ) = (split( /,/, $thisWord19 ))[0,8];
#
# Mecab対策・・・カタカナ記載は読みを出力しない場合があるので、
# $midasi をそのまま読みとする。
#
if( $hinsi eq "名詞" && $yomi eq "" ) {
$yomi = $midasi;
}
my $yomi_sj = Jcode->new($yomi,'utf8')->sjis;
$yomi = Jcode->new($yomi_sj,'sjis')->h2z->utf8;
$myWords[$i] = sprintf("%s\t%s,,,,,,,,%s\x0d\n", $midasi, $hinsi, $yomi );
}
}
my $result = "";
for($i=0; $i<@myWords; $i++) {
my $thisWord = $myWords[$i];
$result .= $thisWord;
}
return $result;
}
########################################################################
#
# サブルーチン名:JoinTyouonKigou
#
# 単独の長音記号を直前の語に付けて、発音を確定させる。
#
# Mecab拡張形式入力
# 意味 :変数
#-------------------------------------------------
# 見出し :$midasi
# タブ区切り
# 0,品詞 :$hinsi
# 8,読み :$yomi
#
sub JoinTyouonKigou {
my @myWords = split(/\n/, $_[0]);
my $i=0;
for($i=0; $i<@myWords-1; $i++) {
#
# 現在着目している単語の各要素を切り出す。
#
my $thisWord = $myWords[$i];
chomp($thisWord);
$thisWord =~ s/\x0d//g;
my( $midasi, $thisWord19 ) = (split( /\t/, $thisWord ))[0,1];
if( !defined($thisWord19) || length($thisWord19)==0 ) {
$myWords[$i] = sprintf("%s\x0d\n",$thisWord);
} else {
my( $hinsi, $yomi ) = (split( /,/, $thisWord19 ))[0,8];
#
# 次に着目する単語が長音記号ならば、見出しと読みに追加してゆく。
#
my $j=0;
for($j=$i+1; $j<@myWords; $j++) {
my $nextWord = $myWords[$j];
chomp($nextWord);
$nextWord =~ s/\x0d//g;
my( $nextMidasi, $nextWord19 ) = (split( /\t/, $nextWord ))[0,1];
if( length($nextWord19)!=0 ) {
my( $nextHinsi, $nextYomi ) = (split( /,/, $nextWord19 ))[0,8];
if( $nextMidasi eq "ー" || $nextMidasi eq "-" ) {
$midasi .= $nextMidasi;
$yomi .= $nextMidasi;
$myWords[$j] = "";
} else {
last;
}
}
}
$myWords[$i] = sprintf("%s\t%s,,,,,,,,%s\x0d\n", $midasi, $hinsi, $yomi );
}
}
my $result = "";
for($i=0; $i<@myWords; $i++) {
my $thisWord = $myWords[$i];
if( length($thisWord) != 0 ) {
$result .= $thisWord;
}
}
return $result;
}
########################################################################
#
# サブルーチン名:AddPhonetic
#
# 機能:発音を付加する。
#
# Mecab拡張形式入力
# 意味 :変数
#-------------------------------------------------
# 見出し :$midasi
# タブ区切り
# 0,品詞 :$hinsi
# 8,読み :$yomi
#
#
# Mecab拡張形式出力
# 意味 :変数
#-------------------------------------------------
# 見出し :$midasi
# タブ区切り
# 0,品詞 :$hinsi
# 8,読み :$yomi
# ● 9,ミク発音と歌詞 :$lyricPhonetics
# ●10,発音数 :$phoneticCount
#
#
sub AddPhonetic {
my @myWords = split(/\n/, $_[0]);
my %phonetic = (
"あ"=>"a", "い"=>"i", "う"=>"M", "え"=>"e", "お"=>"o",
"ぁ"=>"a", "ぃ"=>"i", "ぅ"=>"M", "ぇ"=>"e", "ぉ"=>"o",
"か"=>"k a","き"=>"k' i","く"=>"k M", "け"=>"k e", "こ"=>"k o",
"さ"=>"s a","し"=>"S i", "す"=>"s M", "せ"=>"s e", "そ"=>"s o",
"た"=>"t a","ち"=>"tS i","つ"=>"ts M", "て"=>"t e", "と"=>"t o",
"な"=>"n a","に"=>"J i", "ぬ"=>"n M", "ね"=>"n e", "の"=>"n o",
"は"=>"h a","ひ"=>"C i", "ふ"=>"p\\ M","へ"=>"h e", "ほ"=>"h o",
"ま"=>"m a","み"=>"m' i","む"=>"m M", "め"=>"m e", "も"=>"m o",
"や"=>"j a", "ゆ"=>"j M", "いぇ"=>"j e","よ"=>"j o",
"ら"=>"4 a","り"=>"4' i","る"=>"4 M", "れ"=>"4 e", "ろ"=>"4 o",
"わ"=>"w a","ゐ"=>"w i",
# "ゑ"=>"w e",
"を"=>"o","うぃ"=>"w i", "うぇ"=>"w e",
"ん"=>"n",
#"んが"=>"N a", "んぎ"=>"N i", "んぐ"=>"N M", "んげ"=>"N e", "んご"=>"N o",
#"んにゃ"=>"N' a","んにぃ"=>"N' i","んにゅ"=>"N' M","んにぇ"=>"N' e","んにょ"=>"N' o",
"が"=>"g a", "ぎ"=>"g' i", "ぐ"=>"g M", "げ"=>"g e", "ご"=>"g o",
"ざ"=>"dz a", "じ"=>"dZ i", "ず"=>"dz M", "ぜ"=>"dz e", "ぞ"=>"dz o",
"だ"=>"d a", "ぢ"=>"dZ i", "づ"=>"dz M", "で"=>"d e", "ど"=>"d o",
"ば"=>"b a", "び"=>"b' i", "ぶ"=>"b M", "べ"=>"b e", "ぼ"=>"b o",
"ぱ"=>"p a", "ぴ"=>"p' i", "ぷ"=>"p M", "ぺ"=>"p e", "ぽ"=>"p o",
"きゃ"=>"k' a","きぃ"=>"k' i","きゅ"=>"k' M","きぇ"=>"k' e","きょ"=>"k' o",
"しゃ"=>"S a", "しぃ"=>"S i", "しゅ"=>"S M", "しぇ"=>"S e", "しょ"=>"S o",
"ちゃ"=>"tS a","ちぃ"=>"tS i","ちゅ"=>"tS M","ちぇ"=>"tS e","ちょ"=>"tS o",
"にゃ"=>"J a", "にぃ"=>"J i", "にゅ"=>"J M", "にぇ"=>"J e", "にょ"=>"J o",
"ひゃ"=>"C a", "ひぃ"=>"C i", "ひゅ"=>"C M", "ひぇ"=>"C e", "ひょ"=>"C o",
"ふゃ"=>"p\\' a","ふぃ"=>"p\\' i","ふゅ"=>"p\\' M","ふぇ"=>"p\\' e","ふょ"=>"p\\' o",
"みゃ"=>"m' a","みぃ"=>"m' i","みゅ"=>"m' M","みぇ"=>"m' e","みょ"=>"m' o",
"りゃ"=>"4' a","りぃ"=>"4' i","りゅ"=>"4' M","りぇ"=>"4' e","りょ"=>"4' o",
"ぎゃ"=>"g' a","ぎぃ"=>"g' i","ぎゅ"=>"g' M","ぎぇ"=>"g' e","ぎょ"=>"g' o",
"じゃ"=>"Z a", "じぃ"=>"Z i", "じゅ"=>"Z M", "じぇ"=>"Z e", "じょ"=>"Z o",
"じゃ"=>"dZ a","じぃ"=>"dZ i","じゅ"=>"dZ M","じぇ"=>"dZ e","じょ"=>"dZ o",
"ぢゃ"=>"dZ a","ぢぃ"=>"dZ i","ぢゅ"=>"dZ M","ぢぇ"=>"dZ e","ぢょ"=>"dZ o",
"びゃ"=>"b' a","びぃ"=>"b' i","びゅ"=>"b' M","びぇ"=>"b' e","びょ"=>"b' o",
"ぴゃ"=>"p' a","ぴぃ"=>"p' i","ぴゅ"=>"p' M","ぴぇ"=>"p' e","ぴょ"=>"p' o",
"ふぁ"=>"p\\ a","ふぃ"=>"p\\ i","ふぅ"=>"p\\ M","ふぇ"=>"p\\ e","ふぉ"=>"p\\ o",
"てゃ"=>"t' a","てぃ"=>"t' i","てゅ"=>"t' M","てぇ"=>"t' e","てょ"=>"t' o",
"でゃ"=>"d' a","でぃ"=>"d' i","でゅ"=>"d' M","でぇ"=>"d' e","でょ"=>"d' o",
"ぁ"=>"h\\ a", "ぃ"=>"h\\ i", "ぅ"=>"h\\ M","ぇ"=>"h\\ e", "ぉ"=>"h\\ o",
"っ"=>"tS ",
"すぃ"=>"s i", "ずぃ"=>"dz i", "とぅ"=>"t M","どぅ"=>"d M",
"ア"=>"a", "イ"=>"i", "ウ"=>"M", "エ"=>"e", "オ"=>"o",
"カ"=>"k a","キ"=>"k' i","ク"=>"k M", "ケ"=>"k e", "コ"=>"k o",
"サ"=>"s a","シ"=>"S i", "ス"=>"s M", "セ"=>"s e", "ソ"=>"s o",
"タ"=>"t a","チ"=>"tS i","ツ"=>"ts M", "テ"=>"t e", "ト"=>"t o",
"ナ"=>"n a","ニ"=>"J i", "ヌ"=>"n M", "ネ"=>"n e", "ノ"=>"n o",
"ハ"=>"h a","ヒ"=>"C i", "フ"=>"p\\ M","ヘ"=>"h e", "ホ"=>"h o",
"マ"=>"m a","ミ"=>"m' i","ム"=>"m M", "メ"=>"m e", "モ"=>"m o",
"ヤ"=>"j a", "ユ"=>"j M", "イェ"=>"j e","ヨ"=>"j o",
"ラ"=>"4 a","リ"=>"4' i","ル"=>"4 M", "レ"=>"4 e", "ロ"=>"4 o",
"ワ"=>"w a","ヰ"=>"w i",
# "ヱ"=>"w e",
"ヲ"=>"o", "ウィ"=>"w i", "ウェ"=>"w e",
"ン" =>"n",
#"ンガ" =>"N a", "ンギ"=>"N i", "ング"=>"N M", "ンゲ"=>"N e", "ンゴ"=>"N o",
#"ンニャ"=>"N' a","ンニィ"=>"N' i","ンニュ"=>"N' M","ンニェ"=>"N' e","ンニョ"=>"N' o",
"ガ"=>"g a", "ギ"=>"g' i", "グ"=>"g M", "ゲ"=>"g e", "ゴ"=>"g o",
"ザ"=>"dz a", "ジ"=>"dZ i", "ズ"=>"dz M", "ゼ"=>"dz e", "ゾ"=>"dz o",
"ダ"=>"d a", "ヂ"=>"dZ i", "ヅ"=>"dz M", "デ"=>"d e", "ド"=>"d o",
"バ"=>"b a", "ビ"=>"b' i", "ブ"=>"b M", "ベ"=>"b e", "ボ"=>"b o",
"パ"=>"p a", "ピ"=>"p' i", "プ"=>"p M", "ペ"=>"p e", "ポ"=>"p o",
# "ヴ"=>""p M",
"キャ"=>"k' a","キィ"=>"k' i","キュ"=>"k' M","キェ"=>"k' e","キョ"=>"k' o",
"シャ"=>"S a", "シィ"=>"S i", "シュ"=>"S M", "シェ"=>"S e", "ショ"=>"S o",
"チャ"=>"tS a","チィ"=>"tS i","チュ"=>"tS M","チェ"=>"tS e","チョ"=>"tS o",
"ニャ"=>"J a", "ニィ"=>"J i", "ニュ"=>"J M", "ニェ"=>"J e", "ニョ"=>"J o",
"ヒャ"=>"C a", "ヒィ"=>"C i", "ヒュ"=>"C M", "ヒェ"=>"C e", "ヒョ"=>"C o",
"フャ"=>"p\\' a","フィ"=>"p\\' i","フュ"=>"p\\' M","フェ"=>"p\\' e","フョ"=>"p\\' o",
"ミャ"=>"m' a", "ミィ"=>"m' i", "ミュ"=>"m' M", "ミェ"=>"m' e", "ミョ"=>"m' o",
"リャ"=>"4' a", "リィ"=>"4' i", "リュ"=>"4' M", "リェ"=>"4' e", "リョ"=>"4' o",
"ギャ"=>"g' a","ギィ"=>"g' i","ギュ"=>"g' M","ギェ"=>"g' e","ギョ"=>"g' o",
"ジャ"=>"Z a", "ジィ"=>"Z i", "ジュ"=>"Z M", "ジェ"=>"Z e", "ジョ"=>"Z o",
"ジャ"=>"dZ a","ジィ"=>"dZ i","ジュ"=>"dZ M","ジェ"=>"dZ e","ジョ"=>"dZ o",
"ヂャ"=>"dZ a","ヂィ"=>"dZ i","ヂュ"=>"dZ M","ビェ"=>"dZ e","ビョ"=>"dZ o",
"ビャ"=>"b' a","ビィ"=>"b' i","ビュ"=>"b' M","ビェ"=>"b' e","ビョ"=>"b' o",
"ピャ"=>"p' a","ピィ"=>"p' i","ピュ"=>"p' M","ピェ"=>"p' e","ピョ"=>"p' o",
"ファ"=>"p\\ a","フィ"=>"p\\ i","フゥ"=>"p\\ M","フェ"=>"p\\ e","フォ"=>"p\\ o",
"テャ"=>"t' a", "ティ"=>"t' i", "テゥ"=>"t' M", "テェ"=>"t' e", "テォ"=>"t' o",
"デャ"=>"d' a", "ディ"=>"d' i", "デゥ"=>"d' M","デェ"=>"d' e", "デォ"=>"d' o",
"ァ"=>"h\\ a", "ィ"=>"h\\ i", "ゥ"=>"h\\ M", "ェ"=>"h\\ e", "ォ"=>"h\\ o",
"ッ"=>"tS ",
"スィ"=>"s i", "ズィ"=>"dz i", "トゥ"=>"t M","ドゥ"=>"d M",
","=>"br1","、"=>"br1", "。"=>"br2","!"=>"br2","?"=>"br2",
"("=>"br4","『"=>"br4","「"=>"br4"
);
my $i=0;
for($i=0; $i<@myWords; $i++) {
#
# 現在着目している単語
#
my $thisWord = $myWords[$i];
chomp($thisWord);
$thisWord =~ s/\x0d//g;
my( $midasi, $thisWord19 ) = (split( /\t/, $thisWord ))[0,1];
if( !defined($thisWord19) || length($thisWord19)==0 ) {
$myWords[$i] = sprintf("%s\x0d\n",$thisWord);
} else {
my( $hinsi, $yomi ) = (split( /,/, $thisWord19 ))[0,8];
my $lyricPhonetics = ""; # 歌詞と発音の構造体
my $phoneticCount = 0;
#
# 読みに対応したミクの歌詞と発音記号の構造体を得る
#
my $idxYomi;
for($idxYomi=0; $idxYomi<(length($yomi)-1);) {
#
# UTF8 の3文字(9bytes)→2文字→1文字の優先順で
# カタカナとミクの発音テーブルから、発音記号を取得する。
#
my $myLyric = "";
my $myPhonetic = "";
my $myLyricLen = 0;
for($myLyricLen=9; $myLyricLen>=3; $myLyricLen-=3) {
$myLyric = substr($yomi,$idxYomi,$myLyricLen);
if( exists($phonetic{$myLyric}) ) {
$myPhonetic = $phonetic{$myLyric};
$phoneticCount++;
last;
} elsif( $myLyricLen<=3 ) {
$myPhonetic = "";
last;
}
}
$idxYomi += $myLyricLen;
#
# 発音の直後が長音記号「ー」「-」のときは歌詞に長音記号を付加する。
#
if( $myPhonetic ne "" ) {
for(;;) {
my $myLyric_appendix = substr($yomi,$idxYomi,3);
if( $myLyric_appendix ne "ー" && $myLyric_appendix ne "-" ) {
last;
}
$phoneticCount++;
$myLyric .= $myLyric_appendix;
$idxYomi += 3;
}
$lyricPhonetics .= $myLyric.":".$myPhonetic.";";
} else {
$lyricPhonetics .= ":;";
}
}
$myWords[$i] = sprintf("%s\t%s,,,,,,,,%s,%s,%d\x0d\n",
$midasi, $hinsi, $yomi, $lyricPhonetics, $phoneticCount );
}
}
my $result = "";
for($i=0; $i<@myWords; $i++) {
my $thisWord = $myWords[$i];
$result .= $thisWord;
}
return $result;
}
########################################################################
#
# サブルーチン名:AddSeparator
#
# 区切りを入れる。
#
# 発音区切 "W575", 息継区切 "BR", 文末区切 "EOS"
# 区切りの場合にはタブ等は無く、そのまま改行記号が入る。
#
sub AddSeparator {
my @myWords = split(/\n/, $_[0]);
my $phoneticCountFromW575 = 0;
my $myHinsiLast = "";
my $i=0;
for($i=0; $i<@myWords; $i++) {
#
# 現在着目している単語の各要素を切り出す。
#
my $thisWord = $myWords[$i];
chomp($thisWord);
$thisWord =~ s/\x0d//g;
my( $midasi, $thisWord19 ) = (split( /\t/, $thisWord ))[0,1];
if( !defined($thisWord19) || length($thisWord19) == 0 ) {
#
# 区切りそのものだった場合は次の語の判断に入る
#
$myHinsiLast = "";
$phoneticCountFromW575 = 0;
next;
}
if( $i == @myWords - 1) {
#
# 終端の単語は常に文末とする。
#
$myWords[$i] = sprintf("%s\x0d\n%s\x0d\n", $thisWord, "EOS" );
next;
}
my( $hinsi, $yomi, $lyricphonetics, $phoneticCount )
= (split( /,/, $thisWord19 ))[0,8,9,10];
$phoneticCountFromW575 += $phoneticCount;
my $nextWord = $myWords[$i+1];
chomp($nextWord);
$nextWord =~ s/\x0d//g;
my( $nextMidasi, $nextWord19 ) = (split( /\t/, $nextWord ))[0,1];
if( (!defined($nextWord19) || length($nextWord19) == 0)
&& $nextMidasi eq "EOS" ) {
#
# 既にMecabが文末 "EOS" を判定したら、新たな判定は不要。
#
next;
}
my( $nextHinsi ) = (split( /,/, $nextWord19 ))[0];
my $mySeparator = "";
if( $midasi eq "。" || $midasi eq "!" || $midasi eq "?" ) {
$mySeparator = "EOS";
} elsif( 0 <= index($lyricphonetics,"br") ) {
$mySeparator = "BR";
}
if( length($mySeparator) == 0 ) {
if( $phoneticCountFromW575 < 5 ) {
#
# 何もしない。
#
;
} elsif( 5 <= $phoneticCountFromW575 && $phoneticCountFromW575 <= 7 ) {
#
# 5音節~7音節を発音し、かつ文節の頭となる単語だったら、そこで区切って発音する。
#
if(($hinsi ne "名詞" && $hinsi ne "形容詞" && $hinsi ne "形容動詞"
&& $hinsi ne "接頭詞" && $hinsi ne "感動詞" && $hinsi ne "接頭辞"
&& $hinsi ne "副詞" )
&& ($nextHinsi eq "名詞" || $nextHinsi eq "形容詞" || $nextHinsi eq "形容動詞"
|| $nextHinsi eq "接頭詞" || $nextHinsi eq "感動詞" || $nextHinsi eq "接頭辞"
|| $nextHinsi eq "副詞" ) ) {
$mySeparator = "W575";
}
} else {
#
# 発音があまりに長く8音節以上継続する場合には、次が名詞等(動詞を含む)ならば区切る。
#
if(($nextHinsi eq "名詞" || $nextHinsi eq "形容詞" || $nextHinsi eq "形容動詞"
|| $nextHinsi eq "接頭詞" || $nextHinsi eq "感動詞" || $nextHinsi eq "接頭辞"
|| $nextHinsi eq "副詞" || $nextHinsi eq "動詞" ) ) {
$mySeparator = "W575";
}
}
}
$myHinsiLast = $hinsi;
$myWords[$i] = sprintf("%s\x0d\n", $thisWord );
if( defined($mySeparator) && length($mySeparator) != 0 ) {
$phoneticCountFromW575 = 0;
$myHinsiLast = "";
$myWords[$i] .= sprintf("%s\x0d\n", $mySeparator );
}
}
#
#
#
my $result = "";
for($i=0; $i<@myWords; $i++) {
my $thisWord = $myWords[$i];
if( length($thisWord) != 0 ) {
$result .= $thisWord;
}
}
return $result;
}
########################################################################
#
# サブルーチン名:ParseSentence
#
# 文章全体を解析する。
#
# Mecab拡張形式入力
#
# 意味 :変数
#-------------------------------------------------
# 見出し :$midasi
# タブ区切り
# 0,品詞 :$hinsi
# 8,読み :$yomi
# 9,ミク発音 :$lyricPhonetics
# 10,発音数 :$phoneticCount
#
# Mecab拡張形式出力(●部分)
#
# 意味 :変数
#-------------------------------------------------
# 見出し :$midasi
# タブ区切り
# 0,品詞 :$hinsi
# 8,読み :$yomi
# 9,ミク発音 :$lyricPhonetics
# 10,発音数 :$phoneticCount
# ●11,文章終端までの形態素数 :$eosCount
# ●12,息継ぎまでの形態素数 :$brCount
# ●13,575区切りまでの形態素数:$w575Count
#
#
sub ParseSentence {
my @myWords = split(/\n/, $_[0]);
my $myEosCount = 0;
my $myBrCount = 0;
my $myW575Count = 0;
my $i=0;
for($i=@myWords-1; $i>=0; $i--) {
#
# 現在着目している単語
#
my $thisWord = $myWords[$i];
chomp($thisWord);
最近のコメント