Difference between revisions of "Ar Tonelico III"
Blutorange (talk | contribs) (→Script) |
|||
Line 19: | Line 19: | ||
• DIAG to contains the main dialogue lines, while CTRL is probably system-related | • DIAG to contains the main dialogue lines, while CTRL is probably system-related | ||
• DIAG files are also usually only a few hundred bytes long | • DIAG files are also usually only a few hundred bytes long | ||
− | • DIAG has a header of | + | • DIAG has a header of 4 bytes, then comes the main part |
• the first 26 bytes of CTRL are as follows (decimal): | • the first 26 bytes of CTRL are as follows (decimal): | ||
Line 47: | Line 47: | ||
• a [LEADING] byte <bh:ff> this line is outside the "normal" dialogue flow, ie a system message ("You got item..") or "Party member xyz joined." or "……。" or "…!?" &c. | • a [LEADING] byte <bh:ff> this line is outside the "normal" dialogue flow, ie a system message ("You got item..") or "Party member xyz joined." or "……。" or "…!?" &c. | ||
• the 13th byte: this indicates the [SPEAKER]. [SPEAKER] is <bh:ff> when there is no speaker | • the 13th byte: this indicates the [SPEAKER]. [SPEAKER] is <bh:ff> when there is no speaker | ||
− | + | • the first byte indicates the [MODE] | |
+ | <bh:00> - talk with speech bubbles at character's 3D models | ||
+ | <bh:01> - talk with 2D character portraits | ||
+ | <bh:02> - item get | ||
TO SUMMARIZE | TO SUMMARIZE | ||
• dialogue in EVENT-MESSAGE file: [3 byte header][36-byte separator][UTF8 byte sequence, terminating on <bh:00>], repeat | • dialogue in EVENT-MESSAGE file: [3 byte header][36-byte separator][UTF8 byte sequence, terminating on <bh:00>], repeat | ||
• 13th byte [SEPARATOR] is speaker, 26th [SEPARATOR] marks "normal" spoken text</pre> | • 13th byte [SEPARATOR] is speaker, 26th [SEPARATOR] marks "normal" spoken text</pre> | ||
+ | |||
+ | And here is an improved lua script I wrote that looks for valid UTF8 sequences in a file, works much better and doesn't need specific information on separators &c.: | ||
+ | <pre> | ||
+ | --true is interpreted as 1, nil or false as 0 | ||
+ | function dec_to_8bit(dec,byte) --byte must point to an initialized table of wrong (false or nil) values | ||
+ | local exp = 128 | ||
+ | for i=1,8 do | ||
+ | if dec >= exp then | ||
+ | byte[i] = true | ||
+ | dec = dec-exp | ||
+ | end | ||
+ | exp = exp*.5 | ||
+ | end | ||
+ | end | ||
+ | |||
+ | function get_utf8(filename, outname) | ||
+ | local infile = io.open(filename,"rb") | ||
+ | if infile then | ||
+ | print("Searching for valid UTF8 in file: " .. filename .. "...") | ||
+ | out = io.open(outname,"a+") --change "w+" to "a+" to append to end of file, not deleting previous data | ||
+ | out:write("#FILE: " .. filename .. "\n") | ||
+ | local occ = 0 --just count how many valid chars we found | ||
+ | local cur_pos | ||
+ | local len = 0 | ||
+ | local len2 | ||
+ | local utf8 = "" | ||
+ | local insert_line_break = false | ||
+ | local file_len = infile:seek("end") | ||
+ | infile:seek("set") | ||
+ | repeat | ||
+ | local dec = string.byte(infile:read(1)) | ||
+ | cur_pos = infile:seek("cur") | ||
+ | local byte = {} | ||
+ | dec_to_8bit(dec,byte) | ||
+ | if len >= 1 then | ||
+ | if dec == 0 then | ||
+ | --zero byte not acceptable, this is not UTF8! | ||
+ | len = 0 | ||
+ | utf8 = "" | ||
+ | --return to where we wrongly assumed UTF8 started... | ||
+ | cur_pos = cur_pos+2-len | ||
+ | infile:seek("set",cur_pos) | ||
+ | else | ||
+ | if not byte[1] or byte[2] then | ||
+ | --UTF8 multibyte chars MUST start with 10! | ||
+ | len = 0 | ||
+ | utf8 = "" | ||
+ | --return to where we wrongly assumed UTF8 started... | ||
+ | cur_pos = cur_pos+2-len | ||
+ | infile:seek("set",cur_pos) | ||
+ | else | ||
+ | --valid utf8 found, dumping... | ||
+ | utf8 = utf8 .. string.char(dec) | ||
+ | len = len - 1 | ||
+ | occ = occ + 1 | ||
+ | if len==1 then | ||
+ | --utf8 sequence end | ||
+ | len = 0 | ||
+ | insert_line_break = true | ||
+ | out:write(utf8) | ||
+ | utf8 = "" | ||
+ | end | ||
+ | end | ||
+ | end | ||
+ | else | ||
+ | if dec == 0 then | ||
+ | if insert_line_break then | ||
+ | --zero terminated :) | ||
+ | out:write("\n") | ||
+ | end | ||
+ | end | ||
+ | insert_line_break = false | ||
+ | if byte[1] and byte[2] then --we are not interested in ASCII chars... otherwise allow b2=="0" | ||
+ | -- now determine byte length of glyph | ||
+ | len = 2 | ||
+ | repeat | ||
+ | len = len+1 | ||
+ | until not byte[len] | ||
+ | len = len-1 | ||
+ | if len > 6 then | ||
+ | --UTF8 only allows for 6byte chars at most | ||
+ | len = 0 | ||
+ | else | ||
+ | utf8 = utf8 .. string.char(dec) | ||
+ | len2 = len | ||
+ | end | ||
+ | end | ||
+ | end | ||
+ | until cur_pos >= file_len | ||
+ | out:write("\n") | ||
+ | infile:close() | ||
+ | out:close() | ||
+ | print("Found " .. occ .. " valid UTF8 chars, except ASCII.\nWritten to " .. outname .. ".\nDone.") | ||
+ | end | ||
+ | return occ | ||
+ | end | ||
+ | |||
+ | get_utf8(arg[1],arg[2])</pre> |
Revision as of 20:17, 8 February 2011
Lots and lots of text and lots of obscure kanji! First I'll finish the game, then I can re-watch the cosmosphere events from the extra menu. I will probably do the cosmospheres (they're ridiculously funny), perhaps some talk events and if there should be interested, let's see.
Translations
(or should I translate to German??)
Tips
I used the Japanese IME "canna" under ubuntu, compiled from source, and changed kana-kanji dictionaries, so that kanji+furigana is written upon entering and converting Japanese te
Script
Now I managed to dump the script. Much better than having to write by hand:) And I also ripped the voice clips and bgm, character poses, the textures, I can view a few models (not the character models though)... The dump can be found here: [1] or [2] or [3] Now updated with speaker information! For anyone who's interested, what I found out...
AT3 ebd script files • consists of EVENT_MESSAGE_SW[2digit-NUMBER]_[3digit-NUMBER].ebm (called DIAG from now on) and EVENT_SW[2digit-NUMBER]_[3digit-NUMBER].ebm (called CTRL from now on) • each DIAG corresponds do a CTRL file with the same NUMBER's • DIAG to contains the main dialogue lines, while CTRL is probably system-related • DIAG files are also usually only a few hundred bytes long • DIAG has a header of 4 bytes, then comes the main part • the first 26 bytes of CTRL are as follows (decimal): [#1] 000 000 000 000 000 000 000 005 000 000 000 110 097 109 101 000 005 000 000 000 144 224 150 190 000 [#2] [#3] 000 000 [#4] 000 [#5] [#6] [#7] [#8] [#8] 000 [#9] 000 whereas #[n] are -- #1takes many different values, 001 is very frequent (~50%) -- #2 takes many different values -- #3 mostly 000, a few times 001, 002, 4 times 003, 3 times 004 -- #4 mostly small bytes <=021, 021 and 00x frequently occur in adjacent files together, takes 044 in two instances -- #5 either 000, 016, 049, or 064 -- #6 always either 113, 116, 117, 119, or 127 -- #7 bytes <= 025, either 00x or 021x with x<=5 except a handful of times -- #8 almost always 000, except 10 and 7 files respectively -- #9 either 000, 001, 002, 003, 004, 005, 017, 019, 021. 025 with the lower bytes much more common • the byte of CTRL always seems to be <bh:7f>, the last 26 bytes only being somewhat similiar • in general, CTRL displays a high ration of <bh:00> • CTRL contains no UTF8 chars • the main part of CTRL, apart from the man 0's, contains only ASCII chars, most of which are LATIN characters and punctuation, with a few special chars such as <bh:f4>, <bh:dc> (Ü) • the main part of DIAG is in the following format, after the 3-byte header comes: [SEPARATOR] [UTF8-sequence][SEPARATOR][UTF8-sequence] ... [UTF8-sequence][SEPARATOR] • as the text is Japanese, [UTF8-sequence] is usually a multiple of 3-byte blocks, each block representing a multi-byte for one Japanese character; it terminates on a zero-byte • the main text may contain a ※削除※ line, [LEADING] is then <bh:ff> • [SEPARATOR] always consists of 36 bytes, each byte smaller than <bd:192>, with the only exception it may also contain <bh:ff>. Not counting the <bh:00> byte UTF8 terminating byte. • [SEPARATOR]: most bytes are constant, except the following meaningful bytes • the 25th byte: it is a [LEADING] number, counting the dialogue lines • a [LEADING] byte <bh:ff> this line is outside the "normal" dialogue flow, ie a system message ("You got item..") or "Party member xyz joined." or "……。" or "…!?" &c. • the 13th byte: this indicates the [SPEAKER]. [SPEAKER] is <bh:ff> when there is no speaker • the first byte indicates the [MODE] <bh:00> - talk with speech bubbles at character's 3D models <bh:01> - talk with 2D character portraits <bh:02> - item get TO SUMMARIZE • dialogue in EVENT-MESSAGE file: [3 byte header][36-byte separator][UTF8 byte sequence, terminating on <bh:00>], repeat • 13th byte [SEPARATOR] is speaker, 26th [SEPARATOR] marks "normal" spoken text
And here is an improved lua script I wrote that looks for valid UTF8 sequences in a file, works much better and doesn't need specific information on separators &c.:
--true is interpreted as 1, nil or false as 0 function dec_to_8bit(dec,byte) --byte must point to an initialized table of wrong (false or nil) values local exp = 128 for i=1,8 do if dec >= exp then byte[i] = true dec = dec-exp end exp = exp*.5 end end function get_utf8(filename, outname) local infile = io.open(filename,"rb") if infile then print("Searching for valid UTF8 in file: " .. filename .. "...") out = io.open(outname,"a+") --change "w+" to "a+" to append to end of file, not deleting previous data out:write("#FILE: " .. filename .. "\n") local occ = 0 --just count how many valid chars we found local cur_pos local len = 0 local len2 local utf8 = "" local insert_line_break = false local file_len = infile:seek("end") infile:seek("set") repeat local dec = string.byte(infile:read(1)) cur_pos = infile:seek("cur") local byte = {} dec_to_8bit(dec,byte) if len >= 1 then if dec == 0 then --zero byte not acceptable, this is not UTF8! len = 0 utf8 = "" --return to where we wrongly assumed UTF8 started... cur_pos = cur_pos+2-len infile:seek("set",cur_pos) else if not byte[1] or byte[2] then --UTF8 multibyte chars MUST start with 10! len = 0 utf8 = "" --return to where we wrongly assumed UTF8 started... cur_pos = cur_pos+2-len infile:seek("set",cur_pos) else --valid utf8 found, dumping... utf8 = utf8 .. string.char(dec) len = len - 1 occ = occ + 1 if len==1 then --utf8 sequence end len = 0 insert_line_break = true out:write(utf8) utf8 = "" end end end else if dec == 0 then if insert_line_break then --zero terminated :) out:write("\n") end end insert_line_break = false if byte[1] and byte[2] then --we are not interested in ASCII chars... otherwise allow b2=="0" -- now determine byte length of glyph len = 2 repeat len = len+1 until not byte[len] len = len-1 if len > 6 then --UTF8 only allows for 6byte chars at most len = 0 else utf8 = utf8 .. string.char(dec) len2 = len end end end until cur_pos >= file_len out:write("\n") infile:close() out:close() print("Found " .. occ .. " valid UTF8 chars, except ASCII.\nWritten to " .. outname .. ".\nDone.") end return occ end get_utf8(arg[1],arg[2])