Text to Voice conversion using Web Speech API of Google Chrome

Last Updated : 06 Jun, 2022

Creating a web app that converts text to speech incorporated to it sounds pretty cool, and if all these facilities are available without the interference of any third party library then it is even more easy to implement. The web speech API provides with basic tools that can be used to create interactive web apps with voice data enabled. We have created a basic interface that has a simple box that contains our text input section where we will write the text, and two sliders which manipulate the rate of the voice and also its pitch. Then we have a drop-down menu that contains all the supported languages with regions mentioned with it. index.html This HTML file contains the layout of the web page.

html

<!DOCTYPE html>
<html lang="en">
 
<head>
    <meta charset="UTF-8">
 
    <title>Text to speech!</title>
 
    <!-- CSS Links -->
    <link rel="stylesheet" href=
"https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css"
    crossorigin="anonymous">
     
    <!-- Giving links to jquery and bootstraps js libraries -->
    <script src="https://code.jquery.com/jquery-3.3.1.slim.min.js"
    crossorigin="anonymous"></script>
     
    <script src=
"https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js"
    crossorigin="anonymous"></script>
     
    <!-- Custom JS that contains all the main functions -->
    <script src="main.js"></script>
     
    <link rel="stylesheet" href="style.css">
</head>
 
<body>
    <form class="container text-center">
        <div class="row">
            <div class="col-sm-6 mx-auto">
                <div class="form-group">
                    <div id="front-text" class="text-success">
                        GeeksforGeeks Text-to-Speech Conversion
                    </div>
                     
                    <!-- Input box text area -->
                    <textarea id="maintext" class="form-control form-control-lg"
                        style="max-lines: 2" placeholder="Enter the text...">
                    </textarea>
                </div>
            </div>
        </div>
         
        <!-- Rate of voice which we will be updated by user -->
        <div class="row">
            <div class="col-sm-6 mx-auto">
                <div class="form-group">
                    <label for="rate">Rate</label>
                    <div id="rate-value" class="badge badge-primary" >5</div>
                    <input class="custom-range" type="range" id="rate" max="1"
                        min="0.2" value="0.5" step="0.1">
                </div>
            </div>
        </div>
         
        <!-- Pitch of voice which we will be updated by user -->
        <div class="row">
            <div class="col-sm-6 mx-auto">
                <div class="form-group">
                    <label for="pitch">Pitch</label>
                    <div id="pitch-value" class="badge badge-primary" >5</div>
                    <input class="custom-range" type="range" id="pitch" max="1"
                        min="0.2" value="0.5" step="0.1">
                </div>
            </div>
        </div>
         
        <!-- The different types of voice along with country and language -->
        <div class="row">
            <div class="col-sm-6 mx-auto">
                <div class="form-group">
                     
                    <!-- This section will be dynamically loaded from
                        the API so we left it blank for now-->
                    <select class="form-control form-control-lg" 
                            id="voice-select" ></select>                
                </div>
                 
                <!-- Button to enable the speech from the 
                    text given in the input box -->
                <button id="submit" class="btn btn-success btn-lg">
                    Speak it
                </button>
            </div>
        </div>
    </form>
</body>
 
</html>

style.css This file is used to add some CSS style to the HTML file.

html

body {
    background: url('images/background.jpg');
    background-size: cover;
    background-repeat: no-repeat;
    height: 100vh;
    background-attachment: fixed;
}
 
#front-text {
    font-size: 35px;
    color: white;
    font-weight: bolder;
    text-shadow: 1px 1px 1px black;
    display: block;
    position: relative;
    margin-bottom: 5%;
    margin-top: 15%;
}
 
#rate-value {
    float: right;
}
 
#pitch-value {
    float: right;
}
 
#foot {
    font-size: 20px;
    color: white;
    font-weight: bolder;
    display: block;
    position: relative;
    margin-top: 1%;
}

main.js The JavaScript file is used to convert the text file into voice.

javascript

// Initialising the speech API
const synth = window.speechSynthesis;
  
// Element initialization section
const form = document.querySelector('form');
const textarea = document.getElementById('maintext');
const voice_select = document.getElementById('voice-select');
const rate = document.getElementById('rate');
const pitch = document.getElementById('pitch');
const rateval = document.getElementById('rate-value');
const pitchval = document.getElementById('pitch-value');
  
// Retrieving the different voices and putting them as 
// options in our speech selection section
let voices = [];
const getVoice = () => {
     
    // This method retrieves voices and is asynchronously loaded
    voices = synth.getVoices();
    var option_string = "";
    voices.forEach(value => {
        var option = value.name + ' (' + value.lang + ') ';
        var newOption = "<option data-name='" + value.name +
                "' data-lang='" + value.lang + "'>" + option
                + "</option>\n";
        option_string += newOption;
    });
     
    voice_select.innerHTML = option_string;
}
  
// Since synth.getVoices() is loaded asynchronously, this
// event gets fired when the return object of that 
// function has changed
synth.onvoiceschanged = function() {
    getVoice();
};
  
const speak = () => {
     
    // If the speech mode is on we dont want to load 
    // another speech
    if(synth.speaking) {
        alert('Already speaking....');
        return;
    }
      
    // If the text area is not empty that is if the input
    // is not empty
    if(textarea.value !== '') {
         
        // Creating an object of SpeechSynthesisUtterance with
        // the input value that represents a speech request
        const speakText = new SpeechSynthesisUtterance(textarea.value);
  
        // When the speaking is ended this method is fired
        speakText.onend = e => {
            console.log('Speaking is done!');
        };
         
        // When any error occurs this method is fired
        speakText.error = e=> {
            console.error('Error occurred...');
        };
  
        // Selecting the voice for the speech from the selection DOM
        const id = voice_select.selectedIndex;
        const selectedVoice = 
            voice_select.selectedOptions[0].getAttribute('data-name');
   
        // Checking which voices has been chosen from the selection
        // and setting the voice to the chosen voice
        voices.forEach(voice => {
            if(voice.name === selectedVoice) {
                speakText.voice = voice;
            }
        });
  
        // Setting the rate and pitch of the voice
        speakText.rate = rate.value;
        speakText.pitch = pitch.value;
  
        // Finally calling the speech function that enables speech
        synth.speak(speakText);
    }
};
  
// This function updates the rate and pitch value to the
// value to display
rate.addEventListener('change', evt => rateval.innerHTML 
        = (Number.parseFloat(rate.value) * 10) + "");
 
pitch.addEventListener('change', evt => pitchval.innerHTML 
        = (Number.parseFloat(pitch.value) * 10) + "");
  
// This is the section when we assign the speak button, the
// speech function
form.addEventListener('submit', evt => {
    evt.preventDefault();
    speak();
    textarea.blur();
});