Using Node Intermediate Layer to Realize tts (Automatic Reading) Function for h5 Page Articles

  koa2, node.js

A lot of times when reading articles, there will be the function of automatically reading the contents of the articles. So how is this function realized on h5? Let’s take a basic requirement of our company as a clue and see how it is realized step by step.

Demand presentation

Through the thinking of our product manager, we have made the following functions
1. Automatically read current h5 page articles

Competitions
竞品功能.png

The investigation found that the competitor h5 was originally realized by app, while our company read the article on h5, so we started the investigation on h5.

On-line Speech Synthesis of Interface Science and Technology University Xunfei

The investigation found that the online speech synthesis of xunfei of hkust can basically provide corresponding functions, and decided to make a demo to test the effect.

1. The console open permissions

clipboard.png

2. Read the document

clipboard.png

The specific code is as follows

import axios from 'axios'
 import * as md5 from './md5'
 
 axios.defaults.withCredentials = true
 
 let Appid = 'xxxxx'
 let apiKey = 'xxxxxx'
 let CurTime = Date.parse(new Date()) / 1000
 let param = {
 auf: 'audio/L16;  rate=16000',
 aue: 'lame',
 voice_name: 'xiaoyan',
 speed: '50',
 volume: '50',
 pitch: '50',
 engine_type: 'intp65',
 text_type: 'text'
 }
 let Base64 = {
 encode: (str) => {
 return btoa(encodeURIComponent(str).replace(/%([0-9A-F]{2})/g,
 function toSolidBytes(match, p1) {
 return String.fromCharCode('0x' + p1);
 }));
 },
 decode: (str) => {
 // Going backwards: from bytestream, to percent-encoding, to original string.
 return decodeURIComponent(atob(str).split('').map(function (c) {
 return '%' + ('00' + c.charCodeAt(0).toString(16)).slice(-2);
 }).join(''));
 }
 }
 let xp = Base64.encode(JSON.stringify(param))
 let CheckSum = md5.hex_md5(apiKey + CurTime + xp)
 
 let headers = {
 'X-Appid': Appid,
 'X-CurTime': CurTime,
 'X-Param': xp,
 'X-CheckSum': CheckSum,
 'Content-Type': 'application/x-www-form-urlencoded;  charset=utf-8'
 }
 
 export function getAloud (text) {
 // let data = {
 //   text: encodeURI(text)
 // }
 var formdata = new FormData()
 formdata.append('text', text)
 return axios({
 baseURL: window.location.href.includes('demo') ?  'https://api.xfyun.cn' : '/tts',
 method: 'POST',
 url: '/v1/service/v1/tts',
 headers: {
 ...headers
 },
 data: formdata
 })
 }

After testing, it returned the binary file stream, but the front end tried various methods and did not realize the playback of the stream.

Node intermediate layer

The introduction of node intermediate layer is to consider that files can be stored and put into cdn for caching, which can reduce requests for similar articles and traffic. therefore, it is decided to add node intermediate layer

Ps: Koala reading has node server as some middle layer processing. The main technology stack is node+koa2+pm2

const md5 = require('../lib/md5.js')
 const fs = require('fs')
 const path = require('path')
 const marked = require('marked')
 const request = require('request')
 
 
 let Appid = ''
 let apiKey = ''
 let CurTime
 let param = {
 auf: 'audio/L16;  rate=16000',
 aue: 'lame',
 voice_name: 'x_yiping',
 speed: '40',
 volume: '50',
 pitch: '50',
 engine_type: 'intp65',
 text_type: 'text'
 }
 
 var b = new Buffer(JSON.stringify(param));
 let xp = b.toString('base64')
 let CheckSum
 
 let headers
 
 exports.getAloud = async ctx => {
 CurTime = Date.parse(new Date()) / 1000
 CheckSum = md5.hex_md5(apiKey + CurTime + xp)
 headers = {
 'X-Appid': Appid,
 'X-CurTime': CurTime,
 'X-Param': xp,
 'X-CheckSum': CheckSum,
 'Content-Type': 'application/x-www-form-urlencoded;  charset=utf-8'
 }
 let id = ctx.request.body.id
 let text = ctx.request.body.text
 console.log(ctx.query)
 var postData = {
 text: text
 }
 let r = request({
 URL:' http://api.xfyun.cn/v1/service/v1/tts',//requested url
 Method:' post','//request method
 headers: headers,
 formData: postData
 }, function (error, response, body) {
 // console.log('error:', error);  // Print the error if one occurred
 // console.log('statusCode:', response && response.statusCode);  // Print the response status code if a response was received
 // console.log('body:', body);  // Print the HTML for the Google homepage.
 })
 await new Promise((resolve, reject) => {
 let filePath = path.join(__dirname, 'public/') + `/${id}.mp3`
 const upStream = fs.createWriteStream(filePath)
 r.pipe(upStream)
 upStream.on('close', () => {
 console.log('download finished');
 resolve()
 });
 })
 .then((res) => {
 ctx.body = {
 code: 200,
 Message:' speech synthesis succeeded',
 data: {
 url: 'https://fe.koalareading.com/file/' + id + '.mp3'
 }
 }
 })
 }

It mainly uses the concept of pipeline flow of request.
The binary file returned from the background is imported into the stream and written into the file
Finally, a url is returned to the front end for playback.

This, the test

clipboard.png

//return url.  The unique id of the same article is distinguished and can be cached for use.
 https://fe.koalareading.com/file/1112.mp3

Demo completed