MIKU_TALK 0.2.0 ソース (形態素解析パート)
MIKU_TALK Ver. 0.2.0 の形態素解析パート ActionScript 3.0 のコードを貼ります。Flex SDK 2.0 でコンパイル可能です。ただし"(アプリケーションID)"の部分は、各自 Yahoo にアプリケーションIDを申請の上で、そのID文字列を入力する必要があります。
Yahoo! 形態素解析サービスをWEBアプリケーションで使うときのサンプルとしてお使いください。
<?xml version="1.0"?>
<mx:Application xmlns:mx="http://www.adobe.com/2006/mxml"
creationComplete="init()">
<mx:Script>
<![CDATA[
import mx.controls.Alert;
import flash.net.*;
//import flash.external.*;
import mx.rpc.events.FaultEvent;
import mx.rpc.events.ResultEvent;
private var loader:URLLoader;
default xml namespace = new Namespace("urn:yahoo:jp:jlp");
private function init():void
{
write_sample();
}
private function request():void
{
yahoo_ma.text = "";
if( sentence.text.length == 0 ) {
Alert.show('自然文が入力されていません', 'Message');
return;
}
if( 2000 <= sentence.text.length ) {
Alert.show('自然文が2000文字を超えています'
+ sentence.text.length, 'Message');
return;
}
var variables:Object = new Object();
// variables.appid = "miku_talker";
variables.sentence = sentence.text;
yahoo_ma_service.send(variables);
}
private function delete_parenthesis():void
{
var work_sentence:String = "";
var c:String = "";
var isParenthesis:Number = 0;
for(var i:Number=0; i<sentence.text.length; i++){
c = sentence.text.charAt(i);
if( c == "(" || c == "(" ) {
isParenthesis++;
} else {
if( c == ")" || c == ")" ) {
if( 0 < isParenthesis ) {
isParenthesis--;
}
} else {
if( isParenthesis == 0 ) {
work_sentence += c;
}
}
}
}
sentence.text = work_sentence;
}
private function setResponse(event:ResultEvent):void
{
var response:XML = yahoo_ma_service.lastResult as XML;
var total_count:Number = response.ma_result.total_count;
var filtered_count:Number = response.ma_result.filtered_count;
for(var i:Number=0; i<total_count; i++){
var surface:String = response.ma_result.word_list.word[i].surface;
var pos:String = response.ma_result.word_list.word[i].pos;
var reading:String = response.ma_result.word_list.word[i].reading;
var pronounce:String = reading;
if( "0".charCodeAt(0) <= reading.charCodeAt(0)
&& reading.charCodeAt(0) <= "9".charCodeAt(0) ) {
pronounce = pronounce_number(reading);
} else {
pronounce = pronounce_alphabet(reading);
}
if( pos == "助詞" ) {
if( reading == "へ" ) {
pronounce = "え";
}
if( reading == "は" ) {
pronounce = "わ";
}
}
if( pos == "名詞" ) {
if( surface.charAt(0) == "・"
|| surface.charAt(0) == "…" ) {
reading = surface;
pronounce = surface;
}
}
pronounce = pronounce_tyouon(pronounce);
yahoo_ma.text += surface + "\t"
+ pos + ",,,,,,,"
+ reading + ","
+ pronounce + "\n";
}
}
private function pronounce_number(reading:String):String
{
var pronounce:String = "";
var currentFigure:Number = reading.length;
var pronounceFigure:Number = 1;
if( currentFigure > 17 ) return "よめない";
for(var i:Number = 0; i<reading.length; i++,currentFigure--) {
if( reading.charAt(i) == "0" ) {
if( reading.length == 1 ) pronounce += "ぜろ";
}
if( reading.charAt(i) == "1" ) {
if( currentFigure % 4 == 1 ) pronounce += "いち";
if( currentFigure % 4 == 0
&& 4 < currentFigure ) pronounce += "いっ";
}
if( reading.charAt(i) == "2" ) pronounce += "に";
if( reading.charAt(i) == "3" ) pronounce += "さん";
if( reading.charAt(i) == "4" ) pronounce += "よん";
if( reading.charAt(i) == "5" ) pronounce += "ご";
if( reading.charAt(i) == "6" ) pronounce += "ろく";
if( reading.charAt(i) == "7" ) pronounce += "しち";
if( reading.charAt(i) == "8" ) pronounce += "はち";
if( reading.charAt(i) == "9" ) pronounce += "きゅう";
if( reading.charAt(i) != "0" ) {
if( currentFigure % 4 == 2 ) {
pronounce += "じゅう";
pronounceFigure = 1;
}
if( currentFigure % 4 == 3 ) {
pronounce += "ひゃく";
pronounceFigure = 1;
}
if( currentFigure % 4 == 0 ) {
pronounce += "せん";
pronounceFigure = 1;
}
}
if( currentFigure == 5 && pronounceFigure == 1 ) {
pronounce += "まん";
pronounceFigure = 0;
}
if( currentFigure == 9 && pronounceFigure == 1 ) {
pronounce += "おく";
pronounceFigure = 0;
}
if( currentFigure == 13 && pronounceFigure == 1 ) {
pronounce += "ちょう";
pronounceFigure = 0;
}
}
return pronounce;
}
private function pronounce_tyouon(pronounce_in:String):String
{
var pronounce:String = "";
var a_gyou:String = "あかさたなはまやらわがざだばぱゃ";
var i_gyou:String = "いきしちにひみりぎじぢびぴ";
var u_gyou:String = "うくすつぬふむゆるぐずづぶぷゅ";
var e_gyou:String = "えけせてねへめれげぜでべぺ";
var o_gyou:String = "おこそとのほもよろをごぞどぼぽょ";
if( pronounce_in.length==0 ) {
return pronounce;
}
pronounce = pronounce_in.charAt(0);
for(var i:Number = 1; i<pronounce_in.length; i++) {
var c0:String = pronounce_in.charAt(i-1);
var c1:String = pronounce_in.charAt(i);
if((0 <= a_gyou.indexOf(c0,0) && c1 == "あ")
|| (0 <= i_gyou.indexOf(c0,0) && c1 == "い")
|| (0 <= u_gyou.indexOf(c0,0) && c1 == "う")
|| (0 <= u_gyou.indexOf(c0,0) && c1 == "お")
|| (0 <= e_gyou.indexOf(c0,0) && c1 == "い")
|| (0 <= e_gyou.indexOf(c0,0) && c1 == "え")
|| (0 <= o_gyou.indexOf(c0,0) && c1 == "う")
|| (0 <= o_gyou.indexOf(c0,0) && c1 == "お")) {
pronounce += "ー";
} else {
pronounce += c1;
}
}
return pronounce;
}
private function pronounce_alphabet(reading:String):String
{
var pronounce:String = "";
for(var i:Number = 0; i<reading.length; i++) {
var c:String = reading.charAt(i);
var cOut:String = "";
if( c == "A" || c=="A" || c=="a" || c=="a" ) cOut = "えい";
if( c == "B" || c=="B" || c=="b" || c=="b" ) cOut = "びい";
if( c == "C" || c=="C" || c=="c" || c=="c" ) cOut = "しい";
if( c == "D" || c=="D" || c=="d" || c=="d" ) cOut = "でえ";
if( c == "E" || c=="E" || c=="e" || c=="e" ) cOut = "いい";
if( c == "F" || c=="F" || c=="f" || c=="f" ) cOut = "えふ";
if( c == "G" || c=="G" || c=="g" || c=="g" ) cOut = "じい";
if( c == "H" || c=="H" || c=="h" || c=="h" ) cOut = "えいち";
if( c == "I" || c=="I" || c=="i" || c=="i" ) cOut = "あい";
if( c == "J" || c=="J" || c=="j" || c=="j" ) cOut = "じぇい";
if( c == "K" || c=="K" || c=="k" || c=="k" ) cOut = "けい";
if( c == "L" || c=="L" || c=="l" || c=="l" ) cOut = "える";
if( c == "M" || c=="M" || c=="m" || c=="m" ) cOut = "えむ";
if( c == "N" || c=="N" || c=="n" || c=="n" ) cOut = "えぬ";
if( c == "O" || c=="O" || c=="o" || c=="o" ) cOut = "おお";
if( c == "P" || c=="P" || c=="p" || c=="p" ) cOut = "ぴい";
if( c == "Q" || c=="Q" || c=="q" || c=="q" ) cOut = "きゅう";
if( c == "R" || c=="R" || c=="r" || c=="r" ) cOut = "あある";
if( c == "S" || c=="S" || c=="s" || c=="s" ) cOut = "えす";
if( c == "T" || c=="T" || c=="t" || c=="t" ) cOut = "てい";
if( c == "U" || c=="U" || c=="u" || c=="u" ) cOut = "ゆう";
if( c == "V" || c=="V" || c=="v" || c=="v" ) cOut = "ぶい";
if( c == "W" || c=="W" || c=="w" || c=="w" ) cOut = "だぶりゅ";
if( c == "X" || c=="X" || c=="x" || c=="x" ) cOut = "えっくす";
if( c == "Y" || c=="Y" || c=="y" || c=="y" ) cOut = "わい";
if( c == "Z" || c=="Z" || c=="z" || c=="z" ) cOut = "ぜっと";
if( c == "%" || c=="%" ) cOut = "ぱーせんと";
if( c == "#" || c=="#" ) cOut = "しゃーぷ";
if( c == "@" || c=="@" ) cOut = "あっと";
if( c == "\\"|| c=="¥" ) cOut = "えん";
if( c == "$" || c=="$" ) cOut = "どる";
if( c == "&" || c=="&" ) cOut = "あんど";
if( cOut == "" ) cOut = c;
pronounce += cOut;
}
return pronounce;
}
private function error(event:FaultEvent):void
{
yahoo_ma.text = event.toString();
}
private function clear_sentence():void
{
sentence.text = "";
}
private function write_sample():void
{
sentence.text = "今日の天気は、一日中晴れ。";
}
private function clear_yahoo_ma():void
{
yahoo_ma.text = "";
}
private function submitCgi():void
{
if( yahoo_ma.text.length == 0 ) {
Alert.show('形態素が入力されていません', 'Message');
return;
}
var mylines:Number = 0;
for(var i:Number = 0; i<yahoo_ma.text.length; i++) {
var c:String = yahoo_ma.text.charAt(i);
if( c == "\n" ) mylines++;
}
if( 1200 <= mylines ) {
Alert.show('形態素が1200個を超えています', 'Message');
return;
}
var myVars:URLVariables = new URLVariables();
myVars.myText1 = yahoo_ma.text;
myVars.debug = "0";
var myReq:URLRequest = new URLRequest();
myReq.url = "../cgi-bin/miku_talk/miku_talk020.cgi";
myReq.data = myVars;
myReq.method = "POST";
navigateToURL(myReq,"_top");
}
]]>
</mx:Script>
<mx:HTTPService id="yahoo_ma_service"
url="http://api.jlp.yahoo.co.jp/MAService/V1/parse?appid=(アプリケーションID)"
method="POST" resultFormat="e4x"
result="setResponse(event)"
fault="error(event)">
</mx:HTTPService>
<mx:Panel title="自然文を元に形態素解析 MIKU_TALK Ver. 0.2.0"
width="100%" height="50%" id="main_panel" layout="vertical">
<mx:HBox width="100%" height="85%">
<mx:Label text="自然文"/>
<mx:TextArea id="sentence" width="100%" height="100%" backgroundColor="#EEFFEE" wordWrap="true">
<mx:text></mx:text>
</mx:TextArea>
</mx:HBox>
<mx:HBox width="100%" height="15%">
<mx:Button label="形態素解析" click="request()"/>
<mx:Button label="括弧削除" click="delete_parenthesis()"/>
<mx:Button label="消去" click="clear_sentence()"/>
<mx:Button label="例文" click="write_sample()"/>
</mx:HBox>
</mx:Panel>
<mx:Panel title="形態素を元にVSQ生成" width="100%" height="50%" id="sub_panel" layout="vertical">
<mx:HBox width="100%" height="85%">
<mx:Label text="形態素"/>
<mx:TextArea id="yahoo_ma" width="100%" height="100%" backgroundColor="#EEFFEE" wordWrap="true">
<mx:text></mx:text>
</mx:TextArea>
</mx:HBox>
<mx:HBox width="100%" height="15%">
<mx:Button label="VSQ生成" click="submitCgi()"/>
<mx:Button label="消去" click="clear_yahoo_ma()"/>
</mx:HBox>
</mx:Panel>
</mx:Application>
関数について解説します。
private function init():void
このswfファイルが起動したときに呼び出され、例文 "今日の天気は一日中晴れ" を自然文入力部分に設定しています。
private function request():void
形態素解析ボタンを押したとき、Yahoo! 形態素解析サービスで、自然文を形態素に解析します。
private function delete_parenthesis():void
括弧を削除します。
private function setResponse(event:ResultEvent):void
形態素解析サービスの処理の後に呼び出され、XML形式をCSV形式に変換します。
private function pronounce_number(reading:String):String
数字を平仮名で読みます。
private function pronounce_tyouon(pronounce_in:String):String
長音で発音する平仮名文字列を検出します。
(ぼおかろいど→ぼーかろいど)
private function pronounce_alphabet(reading:String):String
アルファベットを平仮名で読みます。
private function error(event:FaultEvent):void
形態素解析のエラーのときの処理です。
private function clear_sentence():void
自然文入力部をクリアします。
private function write_sample():void
自然文入力部に例文を入力します。
private function clear_yahoo_ma():void
形態素解析部をクリアします。
private function submitCgi():void
形態素解析部をCGIに渡します。
| 固定リンク
「初音ミク」カテゴリの記事
- MikuMikuTalk ver.0.2.1c(2012.04.20)
- MikuMikuTalk ver.0.2.1(2012.04.13)
- Tda式Appendミク・ver.βのMMM対応パッチ(2012.04.08)
- Tell Your World MMD-PV(2012.04.04)
- Tda式Appendミクさんの物理演算対応(2012.02.22)
この記事へのコメントは終了しました。
コメント