@@ -11,8 +11,8 @@ import { selectTemperature } from '../store/parameters';
1111import { openSystemPromptPanel , openTemperaturePanel } from '../store/settings-ui' ;
1212import { speechRecognition } from '../speech-recognition-types.d'
1313import MicRecorder from 'mic-recorder-to-mp3' ;
14- import { selectUseOpenAIWhisper } from '../store/api-keys' ;
15-
14+ import { selectUseOpenAIWhisper , selectOpenAIApiKey } from '../store/api-keys' ;
15+ import { Mp3Encoder } from 'lamejs' ;
1616
1717const Container = styled . div `
1818 background: #292933;
@@ -38,13 +38,53 @@ export interface MessageInputProps {
3838 disabled ?: boolean ;
3939}
4040
41+
42+
43+ async function chunkAndEncodeMP3File ( file : Blob ) : Promise < Array < File > > {
44+ const MAX_CHUNK_SIZE = 25 * 1024 * 1024 ; // 25 MB
45+ const audioContext = new AudioContext ( ) ;
46+ const audioBuffer = await audioContext . decodeAudioData ( await file . arrayBuffer ( ) ) ;
47+ const duration = audioBuffer . duration ;
48+ const sampleRate = audioBuffer . sampleRate ;
49+ const numChannels = audioBuffer . numberOfChannels ;
50+ const bytesPerSample = 2 ; // 16-bit audio
51+ const samplesPerChunk = Math . floor ( ( MAX_CHUNK_SIZE / bytesPerSample ) / numChannels ) ;
52+ const totalSamples = Math . floor ( duration * sampleRate ) ;
53+ const numChunks = Math . ceil ( totalSamples / samplesPerChunk ) ;
54+
55+ const chunks : Array < File > = [ ] ;
56+ for ( let i = 0 ; i < numChunks ; i ++ ) {
57+ const startSample = i * samplesPerChunk ;
58+ const endSample = Math . min ( startSample + samplesPerChunk , totalSamples ) ;
59+ const chunkDuration = ( endSample - startSample ) / sampleRate ;
60+ const chunkBuffer = audioContext . createBuffer ( numChannels , endSample - startSample , sampleRate ) ;
61+ for ( let c = 0 ; c < numChannels ; c ++ ) {
62+ const channelData = audioBuffer . getChannelData ( c ) . subarray ( startSample , endSample ) ;
63+ chunkBuffer . copyToChannel ( channelData , c ) ;
64+ }
65+ const chunkBlob = await new Promise < Blob > ( ( resolve ) => {
66+ const encoder = new Mp3Encoder ( numChannels , sampleRate , 128 ) ;
67+ const leftData = chunkBuffer . getChannelData ( 0 ) ;
68+ const rightData = numChannels === 1 ? leftData : chunkBuffer . getChannelData ( 1 ) ;
69+ const mp3Data = encoder . encodeBuffer ( leftData , rightData ) ;
70+ const blob = new Blob ( [ mp3Data ] , { type : 'audio/mp3' } ) ;
71+ resolve ( blob ) ;
72+ } ) ;
73+ chunks . push ( new File ( [ chunkBlob ] , `text-${ i } .mp3` , { type : 'audio/mp3' } ) ) ;
74+ }
75+
76+ return chunks ;
77+ }
78+
79+
4180export default function MessageInput ( props : MessageInputProps ) {
4281 const temperature = useAppSelector ( selectTemperature ) ;
4382 const message = useAppSelector ( selectMessage ) ;
4483 const [ recording , setRecording ] = useState ( false ) ;
4584 const hasVerticalSpace = useMediaQuery ( '(min-height: 1000px)' ) ;
46- const recorder = new MicRecorder ( { bitRate : 128 } )
85+ const recorder = useMemo ( ( ) => new MicRecorder ( { bitRate : 128 } ) , [ ] ) ;
4786 const useOpenAIWhisper = useAppSelector ( selectUseOpenAIWhisper ) ;
87+ const openAIApiKey = useAppSelector ( selectOpenAIApiKey ) ;
4888
4989 const context = useAppContext ( ) ;
5090 const dispatch = useAppDispatch ( ) ;
@@ -65,14 +105,14 @@ export default function MessageInput(props: MessageInputProps) {
65105 } , [ context , message , dispatch ] ) ;
66106
67107 const onSpeechStart = ( ) => {
108+
68109 if ( ! recording ) {
69110 setRecording ( true ) ;
70111
71112 // if we are using whisper, the we will just record with the browser and send the api when done
72113 if ( useOpenAIWhisper ) {
73-
114+ recorder . start ( ) . catch ( ( e : any ) => console . error ( e ) ) ;
74115 } else {
75-
76116 speechRecognition . continuous = true ;
77117 speechRecognition . interimResults = true ;
78118
@@ -86,7 +126,36 @@ export default function MessageInput(props: MessageInputProps) {
86126 } else {
87127 setRecording ( false ) ;
88128 if ( useOpenAIWhisper ) {
129+ const mp3 = recorder . stop ( ) . getMp3 ( ) ;
130+
131+ mp3 . then ( async ( [ buffer , blob ] ) => {
132+
133+ const file = new File ( buffer , 'chat.mp3' , {
134+ type : blob . type ,
135+ lastModified : Date . now ( )
136+ } ) ;
137+
138+ // TODO: cut in chunks
139+
140+ var data = new FormData ( )
141+ data . append ( 'file' , file ) ;
142+ data . append ( 'model' , 'whisper-1' )
143+
144+ const response = await fetch ( "https://api.openai.com/v1/audio/transcriptions" , {
145+ method : "POST" ,
146+ headers : {
147+ 'Authorization' : `Bearer ${ openAIApiKey } ` ,
148+ } ,
149+ body : data ,
150+ } ) ;
151+
152+ const json = await response . json ( )
153+
154+ if ( json . text ) {
155+ dispatch ( setMessage ( json . text ) ) ;
156+ }
89157
158+ } ) . catch ( ( e : any ) => console . error ( e ) ) ;
90159 } else {
91160 speechRecognition . stop ( ) ;
92161
0 commit comments