diff --git a/langid/__init__.py b/langid/__init__.py index 2c0a7a5c..0a09e897 100644 --- a/langid/__init__.py +++ b/langid/__init__.py @@ -1 +1 @@ -from .langid import classify, rank, set_languages +from .langid import classify, rank, set_languages, classify_with_language diff --git a/langid/langid.py b/langid/langid.py index 5c44aab7..6eb4b5b5 100755 --- a/langid/langid.py +++ b/langid/langid.py @@ -106,6 +106,24 @@ def classify(instance): return identifier.classify(instance) + +def classify_with_language(instance, langs): + """ + Convenience method using a global identifier instance with the default + model included in langid.py. Identifies the language that a string is + written in. + + @param instance a text string. Unicode strings will automatically be utf8-encoded + param langs. Increase the characteristics of the specified language during the run + @returns a tuple of the most likely language and the confidence score + """ + global identifier + if identifier is None: + load_model() + + return identifier.classify_with_language(instance, langs) + + def rank(instance): """ Convenience method using a global identifier instance with the default @@ -288,6 +306,11 @@ def nb_classprobs(self, fv): pd = pdc + self.nb_pc return pd + def classify_with_language(self, text, langs): + self.set_languages(langs=langs) + + return self.classify(text) + def classify(self, text): """ Classify an instance.