|
|
|
|
|
|
|
|
|
|
|
|
|
import React, { useState, useRef, useEffect } from 'react'; |
|
import './App.css'; |
|
|
|
|
|
import Header from './components/Header'; |
|
import Footer from './components/Footer'; |
|
import MobileWarning from './components/MobileWarning'; |
|
import MediaDevices from './components/MediaDevices'; |
|
import TextView from './components/TextView'; |
|
import CallView from './components/CallView'; |
|
import Button from './components/Common/Button'; |
|
import { Characters, createCharacterGroups } from './components/Characters'; |
|
import { sendTokenToServer, signInWithGoogle } from './components/Auth/SignIn'; |
|
import Models from './components/Models'; |
|
|
|
|
|
import useWebsocket from './hooks/useWebsocket'; |
|
import useMediaRecorder from './hooks/useMediaRecorder'; |
|
import useSpeechRecognition from './hooks/useSpeechRecognition'; |
|
|
|
|
|
import auth from './utils/firebase'; |
|
|
|
const App = () => { |
|
const isMobile = window.innerWidth <= 768; |
|
const [headerText, setHeaderText] = useState(""); |
|
const [selectedDevice, setSelectedDevice] = useState(""); |
|
const [characterConfirmed, setCharacterConfirmed] = useState(false); |
|
const [isCallView, setIsCallView] = useState(false); |
|
const [isPlaying, setIsPlaying] = useState(false); |
|
const [selectedCharacter, setSelectedCharacter] = useState(null); |
|
const [characterGroups, setCharacterGroups] = useState([]); |
|
const [textAreaValue, setTextAreaValue] = useState(''); |
|
const [messageInput, setMessageInput] = useState(''); |
|
const [selectedModel, setSelectedModel] = useState("gpt-3.5-turbo-16k"); |
|
const [user, setUser] = useState(null); |
|
const [token, setToken] = useState(""); |
|
|
|
const onresultTimeout = useRef(null); |
|
const onspeechTimeout = useRef(null); |
|
const audioPlayer = useRef(null); |
|
const callActive = useRef(false); |
|
const audioSent = useRef(false); |
|
const shouldPlayAudio = useRef(false); |
|
const finalTranscripts = useRef([]); |
|
const audioQueue = useRef([]); |
|
const chunks = useRef([]); |
|
const confidence = useRef(0); |
|
const isConnected = useRef(false); |
|
const isLoggedIn = useRef(false); |
|
|
|
|
|
useEffect(() => { |
|
auth.onAuthStateChanged(async user => { |
|
setUser(user); |
|
if (user) { |
|
isLoggedIn.current = true; |
|
let curToken = auth.currentUser.getIdToken() |
|
setToken(curToken); |
|
} else { |
|
isLoggedIn.current = false; |
|
} |
|
}) |
|
}, []) |
|
|
|
|
|
const handleSocketOnOpen = (event) => { |
|
console.log("successfully connected"); |
|
isConnected.current = true; |
|
connectMicrophone(selectedDevice); |
|
initializeSpeechRecognition(); |
|
send("web"); |
|
setHeaderText("Select a character"); |
|
} |
|
|
|
const handleSocketOnMessage = (event) => { |
|
if (typeof event.data === 'string') { |
|
const message = event.data; |
|
if (message === '[end]\n') { |
|
setTextAreaValue(prevState => prevState + "\n\n"); |
|
|
|
} else if (message.startsWith('[+]')) { |
|
|
|
setTextAreaValue(prevState => prevState + `\nYou> ${message}\n`); |
|
stopAudioPlayback(); |
|
} else if (message.startsWith('[=]')) { |
|
|
|
setTextAreaValue(prevState => prevState + "\n\n"); |
|
|
|
} else if (message.startsWith('Select')) { |
|
setCharacterGroups(createCharacterGroups(message)); |
|
} else { |
|
setTextAreaValue(prevState => prevState + `${event.data}`); |
|
|
|
|
|
shouldPlayAudio.current = true; |
|
} |
|
} else { |
|
if (!shouldPlayAudio.current) { |
|
console.log("should not play audio"); |
|
return; |
|
} |
|
audioQueue.current.push(event.data); |
|
if (audioQueue.current.length === 1) { |
|
setIsPlaying(true); |
|
} |
|
} |
|
} |
|
|
|
const handleRecorderOnDataAvailable = (event) => { |
|
chunks.current.push(event.data); |
|
} |
|
|
|
const handleRecorderOnStop = () => { |
|
let blob = new Blob(chunks.current, {'type' : 'audio/webm'}); |
|
chunks.current = []; |
|
|
|
|
|
|
|
if (isConnected.current) { |
|
if (!audioSent.current && callActive.current) { |
|
send(blob); |
|
} |
|
audioSent.current = false; |
|
if (callActive.current) { |
|
startRecording(); |
|
} |
|
} |
|
} |
|
|
|
const handleRecognitionOnResult = (event) => { |
|
|
|
clearTimeout(onresultTimeout.current); |
|
clearTimeout(onspeechTimeout.current); |
|
stopAudioPlayback(); |
|
const result = event.results[event.results.length - 1]; |
|
const transcriptObj = result[0]; |
|
const transcript = transcriptObj.transcript; |
|
const ifFinal = result.isFinal; |
|
if (ifFinal) { |
|
console.log(`final transcript: {${transcript}}`); |
|
finalTranscripts.current.push(transcript); |
|
confidence.current = transcriptObj.confidence; |
|
send(`[&]${transcript}`); |
|
} else { |
|
console.log(`interim transcript: {${transcript}}`); |
|
} |
|
|
|
onresultTimeout.current = setTimeout(() => { |
|
if (ifFinal) { |
|
return; |
|
} |
|
|
|
console.log(`TIMEOUT: interim transcript: {${transcript}}`); |
|
send(`[&]${transcript}`); |
|
}, 500); |
|
|
|
onspeechTimeout.current = setTimeout(() => { |
|
stopListening(); |
|
}, 2000); |
|
}; |
|
|
|
const handleRecognitionOnSpeechEnd = () => { |
|
if (isConnected.current) { |
|
audioSent.current = true; |
|
stopRecording(); |
|
if (confidence.current > 0.8 && finalTranscripts.current.length > 0) { |
|
let message = finalTranscripts.current.join(' '); |
|
send(message); |
|
setTextAreaValue(prevState => prevState + `\nYou> ${message}\n`); |
|
|
|
shouldPlayAudio.current = true; |
|
} |
|
} |
|
finalTranscripts.current = []; |
|
}; |
|
|
|
const stopAudioPlayback = () => { |
|
if (audioPlayer.current) { |
|
audioPlayer.current.pause(); |
|
shouldPlayAudio.current = false; |
|
} |
|
audioQueue.current = []; |
|
setIsPlaying(false); |
|
} |
|
|
|
|
|
const { socketRef, send, connectSocket, closeSocket } = useWebsocket(token, handleSocketOnOpen,handleSocketOnMessage, selectedModel); |
|
const { isRecording, connectMicrophone, startRecording, stopRecording, closeMediaRecorder } = useMediaRecorder(handleRecorderOnDataAvailable, handleRecorderOnStop); |
|
const { startListening, stopListening, closeRecognition, initializeSpeechRecognition } = useSpeechRecognition(handleRecognitionOnResult, handleRecognitionOnSpeechEnd, callActive); |
|
|
|
|
|
const handleConnectButtonClick = async () => { |
|
try { |
|
|
|
if (selectedModel !== 'gpt-3.5-turbo-16k') { |
|
if (isLoggedIn.current) { |
|
await sendTokenToServer(token); |
|
connectSocket(); |
|
} else { |
|
signInWithGoogle(isLoggedIn, setToken).then(() => { |
|
if(isLoggedIn.current) { |
|
connectSocket(); |
|
} |
|
}); |
|
} |
|
} else { |
|
connectSocket(); |
|
} |
|
} catch (error) { |
|
console.error('Error during sign in or connect:', error); |
|
} |
|
} |
|
|
|
const handleTalkClick = () => { |
|
if (isConnected.current && selectedCharacter) { |
|
|
|
send(selectedCharacter); |
|
setCharacterConfirmed(true); |
|
|
|
|
|
setIsCallView(true); |
|
setHeaderText("Hi, my friend, what brings you here today?"); |
|
|
|
|
|
startRecording(); |
|
startListening(); |
|
shouldPlayAudio.current = true; |
|
callActive.current = true; |
|
} |
|
} |
|
|
|
const handleTextClick = () => { |
|
if (isConnected.current && selectedCharacter) { |
|
|
|
send(selectedCharacter); |
|
setCharacterConfirmed(true); |
|
|
|
|
|
setIsCallView(false); |
|
setHeaderText(""); |
|
|
|
shouldPlayAudio.current = true; |
|
} |
|
} |
|
|
|
const handleStopCall = () => { |
|
stopRecording(); |
|
stopListening(); |
|
stopAudioPlayback(); |
|
callActive.current = false; |
|
} |
|
|
|
const handleContinueCall = () => { |
|
startRecording(); |
|
startListening(); |
|
callActive.current = true; |
|
} |
|
|
|
const handleDisconnect = () => { |
|
if (socketRef && socketRef.current) { |
|
|
|
stopAudioPlayback(); |
|
closeMediaRecorder(); |
|
closeRecognition(); |
|
callActive.current = false; |
|
shouldPlayAudio.current = false; |
|
audioSent.current = false; |
|
confidence.current = 0; |
|
chunks.current = [] |
|
|
|
|
|
setSelectedCharacter(null); |
|
setCharacterConfirmed(false); |
|
setIsCallView(false); |
|
setHeaderText(""); |
|
setTextAreaValue(""); |
|
setSelectedModel("gpt-3.5-turbo-16k"); |
|
|
|
|
|
closeSocket(); |
|
isConnected.current = false; |
|
} |
|
} |
|
|
|
return ( |
|
<div className="app"> |
|
<Header user={user} isLoggedIn={isLoggedIn} setToken={setToken} handleDisconnect={handleDisconnect} /> |
|
|
|
{ isMobile ? ( |
|
<MobileWarning /> |
|
) : ( |
|
<div id="desktop-content"> |
|
<p className="alert text-white"> |
|
Please wear headphone 🎧 |
|
{ isConnected.current && characterConfirmed && isRecording ? |
|
(<span className="recording">Recording</span>) : null |
|
} |
|
</p> |
|
|
|
{ !isConnected.current ? |
|
<MediaDevices selectedDevice={selectedDevice} setSelectedDevice={setSelectedDevice} /> : null |
|
} |
|
|
|
{ !isConnected.current ? |
|
<Models selectedModel={selectedModel} setSelectedModel={setSelectedModel} /> : null |
|
} |
|
|
|
<p className="header">{headerText}</p> |
|
|
|
{ !isConnected.current ? |
|
<Button onClick={handleConnectButtonClick} name="Connect" /> : null |
|
} |
|
|
|
{ isConnected.current && |
|
<Characters |
|
characterGroups={characterGroups} |
|
selectedCharacter={selectedCharacter} |
|
setSelectedCharacter={setSelectedCharacter} |
|
isPlaying={isPlaying} |
|
characterConfirmed={characterConfirmed} |
|
/> |
|
} |
|
|
|
{ isConnected.current && !characterConfirmed ? |
|
( <div className="actions"> |
|
<Button onClick={handleTalkClick} name="Talk" disabled={!selectedCharacter} /> |
|
<Button onClick={handleTextClick} name="Text" disabled={!selectedCharacter} /> |
|
</div> ) : null |
|
} |
|
|
|
{/* we render both views but only display one. */} |
|
<div style={{ display: isConnected.current && characterConfirmed ? "flex" : "none" }}> |
|
<div className="main-screen" style={{ display: isCallView ? "flex" : "none" }}> |
|
<CallView |
|
isRecording={isRecording} |
|
isPlaying={isPlaying} |
|
audioPlayer={audioPlayer} |
|
handleStopCall={handleStopCall} |
|
handleContinueCall={handleContinueCall} |
|
audioQueue={audioQueue} |
|
setIsPlaying={setIsPlaying} |
|
handleDisconnect={handleDisconnect} |
|
setIsCallView={setIsCallView} |
|
/> |
|
</div> |
|
|
|
<div className="main-screen" style={{ display: isCallView ? "none" : "flex" }}> |
|
<TextView |
|
send={send} |
|
isPlaying={isPlaying} |
|
stopAudioPlayback={stopAudioPlayback} |
|
textAreaValue={textAreaValue} |
|
setTextAreaValue={setTextAreaValue} |
|
messageInput={messageInput} |
|
setMessageInput={setMessageInput} |
|
handleDisconnect={handleDisconnect} |
|
setIsCallView={setIsCallView} |
|
/> |
|
</div> |
|
</div> |
|
</div> |
|
)} |
|
<Footer /> |
|
</div> |
|
); |
|
} |
|
|
|
export default App; |
|
|