diff --git a/Makefile b/Makefile index d74aade63..98eb8507f 100644 --- a/Makefile +++ b/Makefile @@ -146,7 +146,7 @@ hyper$(OBJ_EXTENSION): *.cpp language-data.cpp autohdr.h hyper.res: hyper.rc hr-icon.ico windres hyper.rc -O coff -o hyper.res -langen$(EXE_EXTENSION): langen.cpp language-??.cpp language-ptbr.cpp +langen$(EXE_EXTENSION): langen.cpp language-??.cpp language-ptbr.cpp all-string-literals.ipp $(CXX) -O0 $(CXXFLAGS) $(langen_CXXFLAGS) langen.cpp $(LDFLAGS) -o $@ makeh$(EXE_EXTENSION): makeh.cpp @@ -155,6 +155,9 @@ makeh$(EXE_EXTENSION): makeh.cpp autohdr.h: makeh$(EXE_EXTENSION) language-data.cpp *.cpp ./makeh classes.cpp locations.cpp colors.cpp hyperpoint.cpp geometry.cpp goldberg.cpp init.cpp floorshapes.cpp cell.cpp multi.cpp shmup.cpp pattern2.cpp mapeditor.cpp graph.cpp textures.cpp hprint.cpp language.cpp util.cpp complex.cpp *.cpp > autohdr.h +all-string-literals.ipp: lanlint.py *.cpp + python lanlint.py > all-string-literals.ipp + language-data.cpp: langen$(EXE_EXTENSION) ./langen > language-data.cpp @@ -179,3 +182,4 @@ clean: rm -rf mymake$(EXE_EXTENSION) mymake_files/ rm -f hyperrogue$(EXE_EXTENSION) hyper$(OBJ_EXTENSION) $(hyper_RES) savepng$(OBJ_EXTENSION) rm -f hyper.html hyper.js hyper.wasm + rm -f all-string-literals.ipp diff --git a/langen.cpp b/langen.cpp index 3f7466e96..e453120af 100644 --- a/langen.cpp +++ b/langen.cpp @@ -21,6 +21,7 @@ #endif template int isize(const T& x) { return x.size(); } +template int isize(const T (&x)[N]) { return N; } #define NUMLAN 7 @@ -30,9 +31,75 @@ std::string current_language; const char *escape(std::string s, const std::string& dft); +#include "all-string-literals.ipp" + +int edit_distance(const std::string &s1, const std::string &s2, int bail) +{ + const int m = s1.size(); + const int n = s2.size(); + if( m==0 ) return n; + if( n==0 ) return m; + std::vector costs(n+1); + for (int k=0; k<=n; k++) costs[k] = k; + int i = 0; + for (auto it1 = s1.begin(); it1 != s1.end(); ++it1, ++i ) { + costs[0] = i+1; + int corner = i; + int j = 0; + for (auto it2 = s2.begin(); it2 != s2.end(); ++it2, ++j ) { + int upper = costs[j+1]; + if (toupper(*it1) == toupper(*it2)) { + costs[j+1] = corner; + } else { + costs[j+1] = std::min(costs[j], std::min(upper, corner)) + 1; + } + corner = upper; + } + } + return costs[n]; +} + +static auto program_string_set = std::set( + program_strings, program_strings + isize(program_strings) +); +static std::map> useless_translations; + +static void check_program_strings_for(const std::string& s) +{ + if (program_string_set.find(s) != program_string_set.end()) { + return; + } + useless_translations[s].push_back(current_language); +} + +static void print_useless_translations() +{ + for (auto&& kv : useless_translations) { + const std::string& s = kv.first; + + int min_d = INT_MAX; + std::string min_p; + for (const std::string& p : program_string_set) { + int d = edit_distance(p, s, min_d); + if (d < min_d) { + min_d = d; + min_p = p; + } + } + std::string langs = kv.second[0]; + for (size_t i=1; i < kv.second.size(); ++i) { + langs += ','; + langs += kv.second[i]; + } + fprintf(stderr, "Unused translation in %s: %s\n", langs.c_str(), escape(s, s)); + fprintf(stderr, "Closest match in program: %s\n", escape(min_p, min_p)); + } +} + template struct dictionary { std::map m; void add(const std::string& s, T val) { + check_program_strings_for(s); auto it = m.find(s); if (it == m.end()) { m.emplace(s, std::move(val)); @@ -443,4 +510,6 @@ int main() { printf(" };\n"); + print_useless_translations(); + } diff --git a/lanlint.py b/lanlint.py new file mode 100644 index 000000000..dfbe87514 --- /dev/null +++ b/lanlint.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python + +import glob + +def get_file_strings(fname): + result = [] + with open(fname, "r") as f: + text = f.read() + i = 0 + start = None + current_string = None + while i < len(text): + if (start is None) and (text[i:i+2] == '//'): + while text[i] != '\n': + i += 1 + elif (start is None) and (text[i] == '"'): + if (text[i-1] == 'R'): + i += 1 + while text[i] != '"': i += 1 + else: + start = i + elif (start is None) and (text[i] in ' \t\n'): + pass + elif (start is None) and (text[i:i+3] == "'\"'"): + i += 2 # skip over character literals that might otherwise confuse us + elif (start is None): + if current_string: + result += [current_string] + # print(current_string) + # print("---------------------") + current_string = '' + elif (start is not None) and (text[i] == '\\'): + i += 1 + elif (start is not None) and (text[i] == '"'): + current_string += text[start:i+1] + start = None + else: + pass # just a plain old string character + i += 1 + if current_string: + result += [current_string] + return result + +def get_program_strings(): + result = [] + for fname in glob.glob("*.cpp"): + if not fname.startswith("language-"): + result += get_file_strings(fname) + return result + +if __name__ == '__main__': + program_strings = set(get_program_strings()) + print('const char *program_strings[] = {') + for s in program_strings: + print(' %s,' % s) + print('};')