locked
How to develop a Translator? RRS feed

  • Question

  • User1135209489 posted

    Hi Everyone,

    I intend to develop a Translator for my Non English Language. Which is Tamil.
    It should work in both ways from English to my language and from my language to English.
    Basically I have some experience in Developing some New blog and Shopping cart.
    However I have no idea in Translator. I mean Where to start and How it works, What are the resources I need?..Is it possible to develop in asp.net?
    It is better if u can explain How the Chinese translators are working too?..

    I expect more comments from Experts for this question.

    Thanks in Advance.  

     

    Monday, June 14, 2010 1:46 AM

Answers

  • User-1802908944 posted

    public static class Language
        {
            public const string AFRIKAANS = "af";
            public const string ALBANIAN = "sq";
            public const string AMHARIC = "am";
            public const string ARABIC = "ar";
            public const string ARMENIAN = "hy";
            public const string AZERBAIJANI = "az";
            public const string BASQUE = "eu";
            public const string BELARUSIAN = "be";
            public const string BENGALI = "bn";
            public const string BIHARI = "bh";
            public const string BULGARIAN = "bg";
            public const string BURMESE = "my";
            public const string CATALAN = "ca";
            public const string CHEROKEE = "chr";
            public const string CHINESE = "zh";
            public const string CHINESE_SIMPLIFIED = "zh-CN";
            public const string CHINESE_TRADITIONAL = "zh-TW";
            public const string CROATIAN = "hr";
            public const string CZECH = "cs";
            public const string DANISH = "da";
            public const string DHIVEHI = "dv";
            public const string DUTCH = "nl";
            public const string ENGLISH = "en";
            public const string ESPERANTO = "eo";
            public const string ESTONIAN = "et";
            public const string FILIPINO = "tl";
            public const string FINNISH = "fi";
            public const string FRENCH = "fr";
            public const string GALICIAN = "gl";
            public const string GEORGIAN = "ka";
            public const string GERMAN = "de";
            public const string GREEK = "el";
            public const string GUARANI = "gn";
            public const string GUJARATI = "gu";
            public const string HEBREW = "iw";
            public const string HINDI = "hi";
            public const string HUNGARIAN = "hu";
            public const string ICELANDIC = "is";
            public const string INDONESIAN = "id";
            public const string INUKTITUT = "iu";
            public const string ITALIAN = "it";
            public const string JAPANESE = "ja";
            public const string KANNADA = "kn";
            public const string KAZAKH = "kk";
            public const string KHMER = "km";
            public const string KOREAN = "ko";
            public const string KURDISH = "ku";
            public const string KYRGYZ = "ky";
            public const string LAOTHIAN = "lo";
            public const string LATVIAN = "lv";
            public const string LITHUANIAN = "lt";
            public const string MACEDONIAN = "mk";
            public const string MALAY = "ms";
            public const string MALAYALAM = "ml";
            public const string MALTESE = "mt";
            public const string MARATHI = "mr";
            public const string MONGOLIAN = "mn";
            public const string NEPALI = "ne";
            public const string NORWEGIAN = "no";
            public const string ORIYA = "or";
            public const string PASHTO = "ps";
            public const string PERSIAN = "fa";
            public const string POLISH = "pl";
            public const string PORTUGUESE = "pt-PT";
            public const string PUNJABI = "pa";
            public const string ROMANIAN = "ro";
            public const string RUSSIAN = "ru";
            public const string SANSKRIT = "sa";
            public const string SERBIAN = "sr";
            public const string SINDHI = "sd";
            public const string SINHALESE = "si";
            public const string SLOVAK = "sk";
            public const string SLOVENIAN = "sl";
            public const string SPANISH = "es";
            public const string SWAHILI = "sw";
            public const string SWEDISH = "sv";
            public const string TAJIK = "tg";
            public const string TAMIL = "ta";
            public const string TAGALOG = "tl";
            public const string TELUGU = "te";
            public const string THAI = "th";
            public const string TIBETAN = "bo";
            public const string TURKISH = "tr";
            public const string UKRAINIAN = "uk";
            public const string URDU = "ur";
            public const string UZBEK = "uz";
            public const string UIGHUR = "ug";
            public const string VIETNAMESE = "vi";
            public const string UNKNOWN = "";

           
            public static string Translate(string stringToTranslate, string fromLanguage, string toLanguage)
            {
                // make sure that the passed string is not empty or null
                if (!String.IsNullOrEmpty(stringToTranslate))
                {
                    // per Google's terms of use, we can only translate
                    // a string of up to 5000 characters long
                    if (stringToTranslate.Length <= 5000)
                    {
                        const int bufSizeMax = 65536;
                        const int bufSizeMin = 8192;

                        try
                        {
                            // by default format? is text.  
                            // so we don't need to send a format? key
                            string requestUri = "http://ajax.googleapis.com/ajax/services/language/translate?v=1.0&q=" + stringToTranslate + "&langpair=" + fromLanguage + "%7C" + toLanguage;

                            // execute the request and get the response stream
                            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(requestUri);
                            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                            Stream responseStream = response.GetResponseStream();

                            // get the length of the content returned by the request
                            int length = (int)response.ContentLength;
                            int bufSize = bufSizeMin;

                            if (length > bufSize)
                                bufSize = length > bufSizeMax ? bufSizeMax : length;

                            // allocate buffer and StringBuilder for reading response
                            byte[] buf = new byte[bufSize];
                            StringBuilder sb = new StringBuilder(bufSize);

                            // read the whole response
                            while ((length = responseStream.Read(buf, 0, buf.Length)) != 0)
                            {
                                sb.Append(Encoding.UTF8.GetString(buf, 0, length));
                            }

                            // the format of the response is like this
                            // {"responseData": {"translatedText":"¿Cómo estás?"},             "responseDetails": null, "responseStatus": 200}
                            // so now let's clean up the response by manipulating the string
                            string translatedText = sb.Remove(0, 36).ToString();
                            translatedText = translatedText.Substring(0,
                            translatedText.IndexOf("\"},"));

                            return translatedText;
                        }
                        catch
                        {
                            return "Cannot get the translation.  Please try again later.";
                        }
                    }
                    else
                    {
                        return "String to translate must be less than 5000 characters long.";
                    }
                }
                else
                {
                    return "String to translate is empty.";
                }
            }
        }


    Some useful links:


    http://www.vishalon.net/IndicResources/SpellingBee.aspx

    http://blogs.interakting.co.uk/danmatthews/archive/2008/05/20/Google-Translate-and-.NET.aspx

    http://blogs.msdn.com/shahpiyush/archive/2007/06/09/3188246.aspx


    happy coding

    • Marked as answer by Anonymous Thursday, October 7, 2021 12:00 AM
    Wednesday, June 16, 2010 5:03 AM

All replies

  • User-1900356179 posted

    Check this

    http://www.eggheadcafe.com/articles/20050725.asp 

    Monday, June 14, 2010 5:59 AM
  • User-925286913 posted

    The application will break when google slightly changes UI arrangement or rendering technique.

    Monday, June 14, 2010 8:28 AM
  • User1508394307 posted

    The application will break when google slightly changes UI arrangement or rendering technique.

    ...and doesn't support Tamil 

    Monday, June 14, 2010 9:40 AM
  • User1508394307 posted

    You should be more specific on what kind of tool it will be. A full text translator could be a challenge. A vocabulary is something very basic. 

    Monday, June 14, 2010 9:46 AM
  • User-925286913 posted

    ...and doesn't support Tamil 

    I just searched in Google and found http://www.google.com/transliterate/

    There's also API for this: http://code.google.com/apis/ajaxlanguage/documentation/#Transliteration

    I don't know whether you can use it or not but have a look at them.

    Monday, June 14, 2010 9:50 AM
  • User1135209489 posted

    Hi,

    My one is about Fulltext Translator. There is already plenty of things in market which converts vocubulary. However I'm ready to take the pain of challenge.

     

    Monday, June 14, 2010 10:28 AM
  • User1508394307 posted

    Well, I'm not an expert in that field, but here is where you can probably find out the information.

    Cheers!

    Monday, June 14, 2010 1:56 PM
  • User-925286913 posted

    Whatever solution you use, please post it here as I am curious about solution.


    Wednesday, June 16, 2010 4:48 AM
  • User-1802908944 posted

    public static class Language
        {
            public const string AFRIKAANS = "af";
            public const string ALBANIAN = "sq";
            public const string AMHARIC = "am";
            public const string ARABIC = "ar";
            public const string ARMENIAN = "hy";
            public const string AZERBAIJANI = "az";
            public const string BASQUE = "eu";
            public const string BELARUSIAN = "be";
            public const string BENGALI = "bn";
            public const string BIHARI = "bh";
            public const string BULGARIAN = "bg";
            public const string BURMESE = "my";
            public const string CATALAN = "ca";
            public const string CHEROKEE = "chr";
            public const string CHINESE = "zh";
            public const string CHINESE_SIMPLIFIED = "zh-CN";
            public const string CHINESE_TRADITIONAL = "zh-TW";
            public const string CROATIAN = "hr";
            public const string CZECH = "cs";
            public const string DANISH = "da";
            public const string DHIVEHI = "dv";
            public const string DUTCH = "nl";
            public const string ENGLISH = "en";
            public const string ESPERANTO = "eo";
            public const string ESTONIAN = "et";
            public const string FILIPINO = "tl";
            public const string FINNISH = "fi";
            public const string FRENCH = "fr";
            public const string GALICIAN = "gl";
            public const string GEORGIAN = "ka";
            public const string GERMAN = "de";
            public const string GREEK = "el";
            public const string GUARANI = "gn";
            public const string GUJARATI = "gu";
            public const string HEBREW = "iw";
            public const string HINDI = "hi";
            public const string HUNGARIAN = "hu";
            public const string ICELANDIC = "is";
            public const string INDONESIAN = "id";
            public const string INUKTITUT = "iu";
            public const string ITALIAN = "it";
            public const string JAPANESE = "ja";
            public const string KANNADA = "kn";
            public const string KAZAKH = "kk";
            public const string KHMER = "km";
            public const string KOREAN = "ko";
            public const string KURDISH = "ku";
            public const string KYRGYZ = "ky";
            public const string LAOTHIAN = "lo";
            public const string LATVIAN = "lv";
            public const string LITHUANIAN = "lt";
            public const string MACEDONIAN = "mk";
            public const string MALAY = "ms";
            public const string MALAYALAM = "ml";
            public const string MALTESE = "mt";
            public const string MARATHI = "mr";
            public const string MONGOLIAN = "mn";
            public const string NEPALI = "ne";
            public const string NORWEGIAN = "no";
            public const string ORIYA = "or";
            public const string PASHTO = "ps";
            public const string PERSIAN = "fa";
            public const string POLISH = "pl";
            public const string PORTUGUESE = "pt-PT";
            public const string PUNJABI = "pa";
            public const string ROMANIAN = "ro";
            public const string RUSSIAN = "ru";
            public const string SANSKRIT = "sa";
            public const string SERBIAN = "sr";
            public const string SINDHI = "sd";
            public const string SINHALESE = "si";
            public const string SLOVAK = "sk";
            public const string SLOVENIAN = "sl";
            public const string SPANISH = "es";
            public const string SWAHILI = "sw";
            public const string SWEDISH = "sv";
            public const string TAJIK = "tg";
            public const string TAMIL = "ta";
            public const string TAGALOG = "tl";
            public const string TELUGU = "te";
            public const string THAI = "th";
            public const string TIBETAN = "bo";
            public const string TURKISH = "tr";
            public const string UKRAINIAN = "uk";
            public const string URDU = "ur";
            public const string UZBEK = "uz";
            public const string UIGHUR = "ug";
            public const string VIETNAMESE = "vi";
            public const string UNKNOWN = "";

           
            public static string Translate(string stringToTranslate, string fromLanguage, string toLanguage)
            {
                // make sure that the passed string is not empty or null
                if (!String.IsNullOrEmpty(stringToTranslate))
                {
                    // per Google's terms of use, we can only translate
                    // a string of up to 5000 characters long
                    if (stringToTranslate.Length <= 5000)
                    {
                        const int bufSizeMax = 65536;
                        const int bufSizeMin = 8192;

                        try
                        {
                            // by default format? is text.  
                            // so we don't need to send a format? key
                            string requestUri = "http://ajax.googleapis.com/ajax/services/language/translate?v=1.0&q=" + stringToTranslate + "&langpair=" + fromLanguage + "%7C" + toLanguage;

                            // execute the request and get the response stream
                            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(requestUri);
                            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                            Stream responseStream = response.GetResponseStream();

                            // get the length of the content returned by the request
                            int length = (int)response.ContentLength;
                            int bufSize = bufSizeMin;

                            if (length > bufSize)
                                bufSize = length > bufSizeMax ? bufSizeMax : length;

                            // allocate buffer and StringBuilder for reading response
                            byte[] buf = new byte[bufSize];
                            StringBuilder sb = new StringBuilder(bufSize);

                            // read the whole response
                            while ((length = responseStream.Read(buf, 0, buf.Length)) != 0)
                            {
                                sb.Append(Encoding.UTF8.GetString(buf, 0, length));
                            }

                            // the format of the response is like this
                            // {"responseData": {"translatedText":"¿Cómo estás?"},             "responseDetails": null, "responseStatus": 200}
                            // so now let's clean up the response by manipulating the string
                            string translatedText = sb.Remove(0, 36).ToString();
                            translatedText = translatedText.Substring(0,
                            translatedText.IndexOf("\"},"));

                            return translatedText;
                        }
                        catch
                        {
                            return "Cannot get the translation.  Please try again later.";
                        }
                    }
                    else
                    {
                        return "String to translate must be less than 5000 characters long.";
                    }
                }
                else
                {
                    return "String to translate is empty.";
                }
            }
        }


    Some useful links:


    http://www.vishalon.net/IndicResources/SpellingBee.aspx

    http://blogs.interakting.co.uk/danmatthews/archive/2008/05/20/Google-Translate-and-.NET.aspx

    http://blogs.msdn.com/shahpiyush/archive/2007/06/09/3188246.aspx


    happy coding

    • Marked as answer by Anonymous Thursday, October 7, 2021 12:00 AM
    Wednesday, June 16, 2010 5:03 AM