#!/usr/bin/php 'ê' (d'être in wordlist?) FRA Wordlist: ...Je [Ô]Agar [Ô]Allez, [Ô]Alors [Ô]alors [Ô]animaux [Ô]Appuyer [Ô]AprÜs [Ô]Ar */ // TODO: 80+ sind "unicode" zeichen // TODO: aktuelle transcriptionstabelle // TODO: ESP und FRA wortliste wird nicht gefunden! echo "Please wait approx. 11 minutes...\n"; // Only GER $preg_valid_chars = 'A-Za-z0-9 !\\?ßäöüÄÖÜ\\[\\]\\(\\):\\.,\\-\''; $roms = rom_names(__DIR__ . '/../../roms/*.sfc'); foreach ($roms as &$rom) { $x = file_get_contents(__DIR__ . '/../../roms/'.$rom.'.sfc'); $wl = terranigma_ger_find_word_lists($x, $rom); // needs to be called with the complete ROM $x = terranigma_ger_decode_string($x, $wl); // can be called with a single string sequence or with the complete ROM $h = fopen(__DIR__ . '/out/'.$rom.'.txt', 'w'); fwrite($h, $x); fclose($h); } function terranigma_ger_decode_string($x, $wl = array()) { $out = ''; $prev_c = 0x00; $num_of_dictionaries = count($wl); // It is a very slow technique to parse byte by byte, but it is probably the only // technique which allows bugfree parsing of the dictionary-expansions. // Problem with ROM parsing, when E5/E6 is in front of a string. for ($i=0; $i */) $out .= chr(0xf6) /* ö */; else if ($c == chr(0x3f) /* ? */) $out .= chr(0xd6) /* Ö */; else if ($c == chr(0x41) /* A */) $out .= chr(0x61) /* a */; else if ($c == chr(0x42) /* B */) $out .= chr(0x62) /* b */; else if ($c == chr(0x43) /* C */) $out .= chr(0x63) /* c */; else if ($c == chr(0x44) /* D */) $out .= chr(0x64) /* d */; else if ($c == chr(0x45) /* E */) $out .= chr(0x65) /* e */; else if ($c == chr(0x46) /* F */) $out .= chr(0x66) /* f */; else if ($c == chr(0x47) /* G */) $out .= chr(0x67) /* g */; else if ($c == chr(0x48) /* H */) $out .= chr(0x68) /* h */; else if ($c == chr(0x49) /* I */) $out .= chr(0x69) /* i */; else if ($c == chr(0x4a) /* J */) $out .= chr(0x6a) /* j */; else if ($c == chr(0x4b) /* K */) $out .= chr(0x6b) /* k */; else if ($c == chr(0x4c) /* L */) $out .= chr(0x6c) /* l */; else if ($c == chr(0x4d) /* M */) $out .= chr(0x6d) /* m */; else if ($c == chr(0x4e) /* N */) $out .= chr(0x6e) /* n */; else if ($c == chr(0x4f) /* O */) $out .= chr(0x6f) /* o */; else if ($c == chr(0x50) /* P */) $out .= chr(0x70) /* p */; else if ($c == chr(0x51) /* Q */) $out .= chr(0x71) /* q */; else if ($c == chr(0x52) /* R */) $out .= chr(0x72) /* r */; else if ($c == chr(0x53) /* S */) $out .= chr(0x73) /* s */; else if ($c == chr(0x54) /* T */) $out .= chr(0x74) /* t */; else if ($c == chr(0x55) /* U */) $out .= chr(0x75) /* u */; else if ($c == chr(0x56) /* V */) $out .= chr(0x76) /* v */; else if ($c == chr(0x57) /* W */) $out .= chr(0x77) /* w */; else if ($c == chr(0x58) /* X */) $out .= chr(0x78) /* x */; else if ($c == chr(0x59) /* Y */) $out .= chr(0x79) /* y */; else if ($c == chr(0x5a) /* Z */) $out .= chr(0x7a) /* z */; else if ($c == chr(0x5b) /* [ */) $out .= chr(0xdc) /* Ü */; else if ($c == chr(0x5f) /* _ */) $out .= chr(0xdf) /* ß */; else if ($c == chr(0x60) /* ` */) $out .= chr(0x3f) /* ? */; else if ($c == chr(0x63) /* c */) $out .= chr(0x30) /* 0 */; else if ($c == chr(0x64) /* d */) $out .= chr(0x31) /* 1 */; else if ($c == chr(0x65) /* e */) $out .= chr(0x32) /* 2 */; else if ($c == chr(0x66) /* f */) $out .= chr(0x33) /* 3 */; else if ($c == chr(0x67) /* g */) $out .= chr(0x34) /* 4 */; else if ($c == chr(0x68) /* h */) $out .= chr(0x35) /* 5 */; else if ($c == chr(0x69) /* i */) $out .= chr(0x36) /* 6 */; else if ($c == chr(0x6a) /* j */) $out .= chr(0x37) /* 7 */; else if ($c == chr(0x6b) /* k */) $out .= chr(0x38) /* 8 */; else if ($c == chr(0x6c) /* l */) $out .= chr(0x39) /* 9 */; else if ($c == chr(0x6d) /* m */) $out .= chr(0x21) /* ! */; else if ($c == chr(0x6e) /* n */) $out .= chr(0x2c) /* , */; else if ($c == chr(0x6f) /* o */) $out .= chr(0x3a) /* : */; else if ($c == chr(0x76) /* v */) $out .= chr(0x27) /* ' */; else if ($c == chr(0x79) /* y */) $out .= chr(0x25) /* % */; else if ($c == chr(0x7c) /* | */) $out .= chr(0x2d) /* - */; else if ($c == chr(0x7b) /* { */) $out .= chr(0x2b) /* + */; else if ($c == chr(0x7f) /* [DEL] */) $out .= chr(0x2e) /* . */; else if ($c == chr(0xcf) /* Ï */) $out .= chr(0x0a) /* [NEWLINE] */; # --- END OF OUTPUT OF ger_helper_enc.phps # Manually added else if ($c == chr(0x20) /* */) $out .= ' '; else if ($c == chr(0xC9) /* É */) { // Go to the next char $i++; $c = $x[$i]; if ($c == chr(0x44)) /* ÉD */ $out .= "\t"; // Ma-Jo's Brief else { $out .= "[0xC9 $c]"; // Unknown } } else if ($c == chr(0xD2) /* Ò */) { // Go to the next char $i++; $c = $x[$i]; if ($c == chr(0x00)) /* Ò[NUL] */ $out .= "[SPIELER]"; else { $out .= "[0xD2 $c]"; // Unknown } } else if ($c == chr(0xd3) /* Ó */) $out .= "[ENDE]\n"; else if ($c == chr(0xd5) /* Õ */) $out .= "[WEITER]\n"; else if (($c >= chr(0xE5)) && ($c < chr(0xE5 + $num_of_dictionaries))) { // To make the ROM smaller, they added dictionaries with mostly used words // The dictionary $wl needs to be extracted from the ROM $dict_id = ord($c)-0xE5; $cur_dict = $wl[$dict_id]; // Go to the next char $i++; $c = $x[$i]; $out .= $cur_dict[ord($c)]; } else { $out .= "[$c]"; // Unknown char } // Vogel sagt "Trällerö" ... "Vogel:Ü" -- "Ü" hat keine Bedeutung im Spiel?! Scheinbar nur, dass die Farbe entfernt wird. ## $x = str_replace('Ü', '', $x); // TODO: 'Ö' ist irgendwie ein Kapitel-Ende (siehe Beruga-PC) $prev_c = $c; } return $out; } function terranigma_ger_encode_string($x) { // TODO } # ------------------------------- function terranigma_ger_find_word_lists($x, $rom) { // Quick replace stuff to make the RegEx work // There is no wordlist expansion inside the wordlists itself include 'ger_table.inc.phps'; $x = strtr($x, $s, $r); // Try to find word lists global $preg_valid_chars; preg_match_all('@((['.$preg_valid_chars.']{2,}Ô){256})@sm', $x, $m); $wortlisten = $m[1]; $h = fopen(__DIR__ . '/out/'.$rom.'_wordlist.txt', 'w'); foreach ($wortlisten as $list_no => &$wortliste) { $wortliste = substr($wortliste, 0, strlen($wortliste)-2); $ary = explode('Ô', $wortliste); $swl = array(); foreach ($ary as $n => &$a) { $debug = dechex(0xE5 + $list_no).' '.zerofill(dechex($n))." = \"$a\"\n"; fwrite($h, $debug); $swl[] = $a; } $wortliste = $swl; } fclose($h); return $wortlisten; } # --- function rom_names($wildcard) { $x = glob($wildcard); $out = array(); foreach ($x as &$y) { $ary = explode('.', basename($y)); $out[] = $ary[0]; } return $out; } # --- function zerofill($mStretch, $iLength = 2) { # http://php.net/manual/en/function.str-pad.php#74259 $sPrintfString = '%0' . (int)$iLength . 's'; return sprintf($sPrintfString, $mStretch); } ?>