Skip to main content

identify the language type from a given String

3 replies [Last post]
vaskarbasak
Offline
Joined: 2007-07-06

Hi all,

Do you have some source code sample or any idea how can i identify the language type from a given String.

e.g-

Reply viewing options

Select your preferred way to display the comments and click "Save settings" to activate your changes.
yaroslav173
Offline
Joined: 2008-08-29

class IdentifyLanguage {
private static String[] engLargeLetters = { "41", "42", "43", "44", "45", "46", "47",
"48", "49", "4a", "4b", "4c", "4d", "4e", "4f", "50", "51", "52",
"53", "54", "55", "56", "57", "58", "59", "5a" };
private static String[] engSmallLetters = { "61", "62", "63", "64", "65", "66", "67",
"68", "69", "6a", "6b", "6c", "6d", "6e", "6f", "70", "71", "72",
"73", "74", "75", "76", "77", "78", "79", "7a" };
private static String[] rusLargeLetters = { "410", "411", "412", "413", "414", "415",
"401", "416", "417", "418", "41a", "41b", "41c", "41d", "41e", "41f",
"420", "421", "422", "423", "424", "425", "426", "427", "428", "429",
"42e", "42f", "42d", "42b", "42a" };
private static String[] rusSmallLetters = { "430", "431", "432", "433", "434", "435",
"451", "436", "437", "438", "43a", "43b", "43c", "43d", "43e", "43f",
"440", "441", "442", "443", "444", "445", "426", "447", "448", "449",
"44e", "44f", "44d", "44b", "44a" };
private static String[] ukrLargeLetters = { "410", "411", "412", "413", "414", "415",
"404", "416", "417", "418", "406", "407", "419", "41a", "41b", "41c",
"41d", "41e", "41f", "420", "421", "422", "423", "424", "425", "426",
"427", "428", "429", "42e", "42f", "42c" };
private static String[] ukrSmallLetters = { "430", "431", "432", "433", "434", "435",
"454", "436", "437", "438", "456", "457", "439", "43a", "43b", "43c",
"43d", "43e", "43f", "440", "441", "442", "443", "444", "445", "446",
"447", "448", "449", "44e", "44f", "44c" };
private static String[][] letters = { engLargeLetters, engSmallLetters,
rusLargeLetters, rusSmallLetters,
ukrLargeLetters, ukrSmallLetters };

static protected void foundLanguage(String[] stringArray) {
if (stringArray == engLargeLetters)
System.out.print("English large letters.");
else if (stringArray == engSmallLetters)
System.out.print("English small letters.");
else if (stringArray == rusLargeLetters)
System.out.print("Russian large letters.");
else if (stringArray == rusSmallLetters)
System.out.print("Russian small letters.");
else if (stringArray == ukrLargeLetters)
System.out.print("Ukrainian large letters.");
else
System.out.print("Ukrainian small letters.");
}
static protected void identify(String string) {
System.out.println(string);
char[] charArray = string.toCharArray();
String[] str = new String[charArray.length];
for (int i = 0; i < charArray.length; i++) {
int codePoint = Character.codePointAt(charArray, i);
str(i) = Integer.toHexString(codePoint);
for (String[] lettersArray : letters)
for (String foundLetter : lettersArray)
if (str(i).equals(foundLetter)) {
System.out.print("Letter " + charArray(i) + " belongs to the ");
foundLanguage(lettersArray);
System.out.println();
}
}
}
public static void main(String[] args) {
try {
String string = args(0);
identify(string);
}
catch(ArrayIndexOutOfBoundsException e) {
System.err.println("Usage: java IdentifyLanguage aWord");
}
}
}

Message was edited by: yaroslav173

Message was edited by: yaroslav173

claudineifonseca
Offline
Joined: 2011-08-21

Linguagem "C"

sfshaza
Offline
Joined: 2004-06-03

This is a very advanced problem. I'm not sure we have anyone reading this forum that would have this knowledge.

I did see that you have cross posted this to several forums. The most useful results are being posted on the advanced java forum (which is what I would expect):
http://www.java-forums.org/advanced-java/9453-dentify-language-type-give...

Good luck!

Sharon