Create a real time voice translator using Python
In this article, we are going to create a real-time voice translator in Python.
Module needed
- playsound: This module is used to play sound in Python
pip install playsound
- Speech Recognition Module: It is a library with the help of which Python can recognize the command given. We have to use pip for Speech Recognition.
pip install SpeechRecognition
- googletrans: Googletrans is a free and unlimited python library that implemented Google Translate API
pip install googletrans
- gTTs: The gTTS API supports several languages including English, Hindi, Tamil, French, German and many more.
pip install gTTs pip install gTTS-token
A real-time voice translator that can translate voice input and give translated voice output generated from it. It is created using google’s googleTrans API and speech_recognition library of python. It converts text from one language to another language and saves its mp3 recorded file. The playsound module is then used to play the generated mp3 file, After that, the generated mp3 file is deleted using the os module.
Stepwise implementation
Step 1: Importing Necessary Modules
Python3
# Importing necessary modules required from playsound import playsound import speech_recognition as sr from googletrans import Translator from gtts import gTTS import os |
Step 2: A tuple of all the languages mapped with their code
Python3
dic = ( 'afrikaans' , 'af' , 'albanian' , 'sq' , 'amharic' , 'am' , 'arabic' , 'ar' , 'armenian' , 'hy' , 'azerbaijani' , 'az' , 'basque' , 'eu' , 'belarusian' , 'be' , 'bengali' , 'bn' , 'bosnian' , 'bs' , 'bulgarian' , 'bg' , 'catalan' , 'ca' , 'cebuano' , 'ceb' , 'chichewa' , 'ny' , 'chinese (simplified)' , 'zh-cn' , 'chinese (traditional)' , 'zh-tw' , 'corsican' , 'co' , 'croatian' , 'hr' , 'czech' , 'cs' , 'danish' , 'da' , 'dutch' , 'nl' , 'english' , 'en' , 'esperanto' , 'eo' , 'estonian' , 'et' , 'filipino' , 'tl' , 'finnish' , 'fi' , 'french' , 'fr' , 'frisian' , 'fy' , 'galician' , 'gl' , 'georgian' , 'ka' , 'german' , 'de' , 'greek' , 'el' , 'gujarati' , 'gu' , 'haitian creole' , 'ht' , 'hausa' , 'ha' , 'hawaiian' , 'haw' , 'hebrew' , 'he' , 'hindi' , 'hi' , 'hmong' , 'hmn' , 'hungarian' , 'hu' , 'icelandic' , 'is' , 'igbo' , 'ig' , 'indonesian' , 'id' , 'irish' , 'ga' , 'italian' , 'it' , 'japanese' , 'ja' , 'javanese' , 'jw' , 'kannada' , 'kn' , 'kazakh' , 'kk' , 'khmer' , 'km' , 'korean' , 'ko' , 'kurdish (kurmanji)' , 'ku' , 'kyrgyz' , 'ky' , 'lao' , 'lo' , 'latin' , 'la' , 'latvian' , 'lv' , 'lithuanian' , 'lt' , 'luxembourgish' , 'lb' , 'macedonian' , 'mk' , 'malagasy' , 'mg' , 'malay' , 'ms' , 'malayalam' , 'ml' , 'maltese' , 'mt' , 'maori' , 'mi' , 'marathi' , 'mr' , 'mongolian' , 'mn' , 'myanmar (burmese)' , 'my' , 'nepali' , 'ne' , 'norwegian' , 'no' , 'odia' , 'or' , 'pashto' , 'ps' , 'persian' , 'fa' , 'polish' , 'pl' , 'portuguese' , 'pt' , 'punjabi' , 'pa' , 'romanian' , 'ro' , 'russian' , 'ru' , 'samoan' , 'sm' , 'scots gaelic' , 'gd' , 'serbian' , 'sr' , 'sesotho' , 'st' , 'shona' , 'sn' , 'sindhi' , 'sd' , 'sinhala' , 'si' , 'slovak' , 'sk' , 'slovenian' , 'sl' , 'somali' , 'so' , 'spanish' , 'es' , 'sundanese' , 'su' , 'swahili' , 'sw' , 'swedish' , 'sv' , 'tajik' , 'tg' , 'tamil' , 'ta' , 'telugu' , 'te' , 'thai' , 'th' , 'turkish' , 'tr' , 'ukrainian' , 'uk' , 'urdu' , 'ur' , 'uyghur' , 'ug' , 'uzbek' , 'uz' , 'vietnamese' , 'vi' , 'welsh' , 'cy' , 'xhosa' , 'xh' , 'yiddish' , 'yi' , 'yoruba' , 'yo' , 'zulu' , 'zu' ) |
Step 3: Taking voice commands from the user
Python3
# Capture Voice # takes command through microphone def takecommand(): r = sr.Recognizer() with sr.Microphone() as source: print ( "listening....." ) r.pause_threshold = 1 audio = r.listen(source) try : print ( "Recognizing....." ) query = r.recognize_google(audio, language = 'en-in' ) print (f "user said {query}\n" ) except Exception as e: print ( "say that again please....." ) return "None" return query |
Step 4: Taking voice input from the user
Python3
# Taking voice input from the user query = takecommand() while (query = = "None" ): query = takecommand() |
Step 5: Input destination language from the user, Mapping user input with the language code
Python3
def destination_language(): print ("Enter the language in which you want to convert \ : Ex. Hindi , English , etc.") print () # Input destination language in which the user # wants to translate to_lang = takecommand() while (to_lang = = "None" ): to_lang = takecommand() to_lang = to_lang.lower() return to_lang to_lang = destination_language() # Mapping it with the code while (to_lang not in dic): print ("Language in which you are trying to convert\ is currently not available ,please input some other language") print () to_lang = destination_language() to_lang = dic[dic.index(to_lang) + 1 ] |
Step 6: Invoking Translator
Python3
# invoking Translator translator = Translator() |
Step 7: Translating from src to dest
Python3
# Translating from src to dest text_to_translate = translator.translate(query, dest = to_lang) text = text_to_translate.text |
Step 8: Saving Translated files and deleting them after playing
Python3
# Using Google-Text-to-Speech ie, gTTS() method # to speak the translated text into the # destination language which is stored in to_lang. # Also, we have given 3rd argument as False because # by default it speaks very slowly speak = gTTS(text = text, lang = to_lang, slow = False ) # Using save() method to save the translated # speech in capture_voice.mp3 speak.save( "captured_voice.mp3" ) # Using OS module to run the translated voice. playsound( 'captured_voice.mp3' ) os.remove( 'captured_voice.mp3' ) print (text) |
Below is the full implementation:
Python3
# Importing necessary modules required from playsound import playsound import speech_recognition as sr from googletrans import Translator from gtts import gTTS import os flag = 0 # A tuple containing all the language and # codes of the language will be detcted dic = ( 'afrikaans' , 'af' , 'albanian' , 'sq' , 'amharic' , 'am' , 'arabic' , 'ar' , 'armenian' , 'hy' , 'azerbaijani' , 'az' , 'basque' , 'eu' , 'belarusian' , 'be' , 'bengali' , 'bn' , 'bosnian' , 'bs' , 'bulgarian' , 'bg' , 'catalan' , 'ca' , 'cebuano' , 'ceb' , 'chichewa' , 'ny' , 'chinese (simplified)' , 'zh-cn' , 'chinese (traditional)' , 'zh-tw' , 'corsican' , 'co' , 'croatian' , 'hr' , 'czech' , 'cs' , 'danish' , 'da' , 'dutch' , 'nl' , 'english' , 'en' , 'esperanto' , 'eo' , 'estonian' , 'et' , 'filipino' , 'tl' , 'finnish' , 'fi' , 'french' , 'fr' , 'frisian' , 'fy' , 'galician' , 'gl' , 'georgian' , 'ka' , 'german' , 'de' , 'greek' , 'el' , 'gujarati' , 'gu' , 'haitian creole' , 'ht' , 'hausa' , 'ha' , 'hawaiian' , 'haw' , 'hebrew' , 'he' , 'hindi' , 'hi' , 'hmong' , 'hmn' , 'hungarian' , 'hu' , 'icelandic' , 'is' , 'igbo' , 'ig' , 'indonesian' , 'id' , 'irish' , 'ga' , 'italian' , 'it' , 'japanese' , 'ja' , 'javanese' , 'jw' , 'kannada' , 'kn' , 'kazakh' , 'kk' , 'khmer' , 'km' , 'korean' , 'ko' , 'kurdish (kurmanji)' , 'ku' , 'kyrgyz' , 'ky' , 'lao' , 'lo' , 'latin' , 'la' , 'latvian' , 'lv' , 'lithuanian' , 'lt' , 'luxembourgish' , 'lb' , 'macedonian' , 'mk' , 'malagasy' , 'mg' , 'malay' , 'ms' , 'malayalam' , 'ml' , 'maltese' , 'mt' , 'maori' , 'mi' , 'marathi' , 'mr' , 'mongolian' , 'mn' , 'myanmar (burmese)' , 'my' , 'nepali' , 'ne' , 'norwegian' , 'no' , 'odia' , 'or' , 'pashto' , 'ps' , 'persian' , 'fa' , 'polish' , 'pl' , 'portuguese' , 'pt' , 'punjabi' , 'pa' , 'romanian' , 'ro' , 'russian' , 'ru' , 'samoan' , 'sm' , 'scots gaelic' , 'gd' , 'serbian' , 'sr' , 'sesotho' , 'st' , 'shona' , 'sn' , 'sindhi' , 'sd' , 'sinhala' , 'si' , 'slovak' , 'sk' , 'slovenian' , 'sl' , 'somali' , 'so' , 'spanish' , 'es' , 'sundanese' , 'su' , 'swahili' , 'sw' , 'swedish' , 'sv' , 'tajik' , 'tg' , 'tamil' , 'ta' , 'telugu' , 'te' , 'thai' , 'th' , 'turkish' , 'tr' , 'ukrainian' , 'uk' , 'urdu' , 'ur' , 'uyghur' , 'ug' , 'uzbek' , 'uz' , 'vietnamese' , 'vi' , 'welsh' , 'cy' , 'xhosa' , 'xh' , 'yiddish' , 'yi' , 'yoruba' , 'yo' , 'zulu' , 'zu' ) # Capture Voice # takes command through microphone def takecommand(): r = sr.Recognizer() with sr.Microphone() as source: print ( "listening....." ) r.pause_threshold = 1 audio = r.listen(source) try : print ( "Recognizing....." ) query = r.recognize_google(audio, language = 'en-in' ) print (f "The User said {query}\n" ) except Exception as e: print ( "say that again please....." ) return "None" return query # Input from user # Make input to lowercase query = takecommand() while (query = = "None" ): query = takecommand() def destination_language(): print ("Enter the language in which you\ want to convert : Ex. Hindi , English , etc.") print () # Input destination language in # which the user wants to translate to_lang = takecommand() while (to_lang = = "None" ): to_lang = takecommand() to_lang = to_lang.lower() return to_lang to_lang = destination_language() # Mapping it with the code while (to_lang not in dic): print ("Language in which you are trying\ to convert is currently not available ,\ please input some other language") print () to_lang = destination_language() to_lang = dic[dic.index(to_lang) + 1 ] # invoking Translator translator = Translator() # Translating from src to dest text_to_translate = translator.translate(query, dest = to_lang) text = text_to_translate.text # Using Google-Text-to-Speech ie, gTTS() method # to speak the translated text into the # destination language which is stored in to_lang. # Also, we have given 3rd argument as False because # by default it speaks very slowly speak = gTTS(text = text, lang = to_lang, slow = False ) # Using save() method to save the translated # speech in capture_voice.mp3 speak.save( "captured_voice.mp3" ) # Using OS module to run the translated voice. playsound( 'captured_voice.mp3' ) os.remove( 'captured_voice.mp3' ) # Printing Output print (text) |
Output:
Please Login to comment...