Skip to content

Commit a8d6d1d

Browse files
committed
whisper added
1 parent 0584d29 commit a8d6d1d

File tree

5 files changed

+90
-10
lines changed

5 files changed

+90
-10
lines changed

app/src/components/input.tsx

Lines changed: 74 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ import { selectTemperature } from '../store/parameters';
1111
import { openSystemPromptPanel, openTemperaturePanel } from '../store/settings-ui';
1212
import { speechRecognition } from '../speech-recognition-types.d'
1313
import MicRecorder from 'mic-recorder-to-mp3';
14-
import { selectUseOpenAIWhisper } from '../store/api-keys';
15-
14+
import { selectUseOpenAIWhisper, selectOpenAIApiKey } from '../store/api-keys';
15+
import { Mp3Encoder } from 'lamejs';
1616

1717
const Container = styled.div`
1818
background: #292933;
@@ -38,13 +38,53 @@ export interface MessageInputProps {
3838
disabled?: boolean;
3939
}
4040

41+
42+
43+
async function chunkAndEncodeMP3File(file: Blob): Promise<Array<File>> {
44+
const MAX_CHUNK_SIZE = 25 * 1024 * 1024; // 25 MB
45+
const audioContext = new AudioContext();
46+
const audioBuffer = await audioContext.decodeAudioData(await file.arrayBuffer());
47+
const duration = audioBuffer.duration;
48+
const sampleRate = audioBuffer.sampleRate;
49+
const numChannels = audioBuffer.numberOfChannels;
50+
const bytesPerSample = 2; // 16-bit audio
51+
const samplesPerChunk = Math.floor((MAX_CHUNK_SIZE / bytesPerSample) / numChannels);
52+
const totalSamples = Math.floor(duration * sampleRate);
53+
const numChunks = Math.ceil(totalSamples / samplesPerChunk);
54+
55+
const chunks: Array<File> = [];
56+
for (let i = 0; i < numChunks; i++) {
57+
const startSample = i * samplesPerChunk;
58+
const endSample = Math.min(startSample + samplesPerChunk, totalSamples);
59+
const chunkDuration = (endSample - startSample) / sampleRate;
60+
const chunkBuffer = audioContext.createBuffer(numChannels, endSample - startSample, sampleRate);
61+
for (let c = 0; c < numChannels; c++) {
62+
const channelData = audioBuffer.getChannelData(c).subarray(startSample, endSample);
63+
chunkBuffer.copyToChannel(channelData, c);
64+
}
65+
const chunkBlob = await new Promise<Blob>((resolve) => {
66+
const encoder = new Mp3Encoder(numChannels, sampleRate, 128);
67+
const leftData = chunkBuffer.getChannelData(0);
68+
const rightData = numChannels === 1 ? leftData : chunkBuffer.getChannelData(1);
69+
const mp3Data = encoder.encodeBuffer(leftData, rightData);
70+
const blob = new Blob([mp3Data], { type: 'audio/mp3' });
71+
resolve(blob);
72+
});
73+
chunks.push(new File([chunkBlob], `text-${i}.mp3`, { type: 'audio/mp3' }));
74+
}
75+
76+
return chunks;
77+
}
78+
79+
4180
export default function MessageInput(props: MessageInputProps) {
4281
const temperature = useAppSelector(selectTemperature);
4382
const message = useAppSelector(selectMessage);
4483
const [recording, setRecording] = useState(false);
4584
const hasVerticalSpace = useMediaQuery('(min-height: 1000px)');
46-
const recorder = new MicRecorder({ bitRate: 128 })
85+
const recorder = useMemo(() => new MicRecorder({ bitRate: 128 }), []);
4786
const useOpenAIWhisper = useAppSelector(selectUseOpenAIWhisper);
87+
const openAIApiKey = useAppSelector(selectOpenAIApiKey);
4888

4989
const context = useAppContext();
5090
const dispatch = useAppDispatch();
@@ -65,14 +105,14 @@ export default function MessageInput(props: MessageInputProps) {
65105
}, [context, message, dispatch]);
66106

67107
const onSpeechStart = () => {
108+
68109
if (!recording) {
69110
setRecording(true);
70111

71112
// if we are using whisper, the we will just record with the browser and send the api when done
72113
if (useOpenAIWhisper) {
73-
114+
recorder.start().catch((e: any) => console.error(e));
74115
} else {
75-
76116
speechRecognition.continuous = true;
77117
speechRecognition.interimResults = true;
78118

@@ -86,7 +126,36 @@ export default function MessageInput(props: MessageInputProps) {
86126
} else {
87127
setRecording(false);
88128
if (useOpenAIWhisper) {
129+
const mp3 = recorder.stop().getMp3();
130+
131+
mp3.then(async ([buffer, blob]) => {
132+
133+
const file = new File(buffer, 'chat.mp3', {
134+
type: blob.type,
135+
lastModified: Date.now()
136+
});
137+
138+
// TODO: cut in chunks
139+
140+
var data = new FormData()
141+
data.append('file', file);
142+
data.append('model', 'whisper-1')
143+
144+
const response = await fetch("https://api.openai.com/v1/audio/transcriptions", {
145+
method: "POST",
146+
headers: {
147+
'Authorization': `Bearer ${openAIApiKey}`,
148+
},
149+
body: data,
150+
});
151+
152+
const json = await response.json()
153+
154+
if (json.text) {
155+
dispatch(setMessage(json.text));
156+
}
89157

158+
}).catch((e: any) => console.error(e));
90159
} else {
91160
speechRecognition.stop();
92161

app/src/components/message.tsx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ export default function MessageComponent(props: { message: Message, last: boolea
244244
<Button variant="subtle" size="sm" compact onClick={copy} style={{ marginLeft: '1rem' }}>
245245
<i className="fa fa-clipboard" />
246246
{copied ? <FormattedMessage defaultMessage="Copied" description="Label for copy-to-clipboard button after a successful copy" />
247-
: <FormattedMessage defaultMessage="Copy" description="Label for copy-to-clipboard button" />}
247+
: <FormattedMessage defaultMessage="Copy" description="Label for copy-to-clipboard button" />}
248248
</Button>
249249
)}
250250
</CopyButton>
@@ -263,7 +263,7 @@ export default function MessageComponent(props: { message: Message, last: boolea
263263
}}>
264264
<i className="fa fa-edit" />
265265
<span>
266-
{editing ? <FormattedMessage defaultMessage="Cancel" description="Label for a button that appears when the user is editing the text of one of their messages, to cancel without saving changes" />
266+
{editing ? <FormattedMessage defaultMessage="Cancel" description="Label for a button that appears when the user is editing the text of one of their messages, to cancel without saving changes" />
267267
: <FormattedMessage defaultMessage="Edit" description="Label for the button the user can click to edit the text of one of their messages" />}
268268
</span>
269269
</Button>

app/src/openai.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ export interface OpenAIResponseChunk {
2626

2727
function parseResponseChunk(buffer: any): OpenAIResponseChunk {
2828
const chunk = buffer.toString().replace('data: ', '').trim();
29-
29+
3030
if (chunk === '[DONE]') {
3131
return {
3232
done: true,
@@ -51,7 +51,7 @@ export async function createChatCompletion(messages: OpenAIMessage[], parameters
5151
const configuration = new Configuration({
5252
apiKey: parameters.apiKey,
5353
});
54-
54+
5555
const openai = new OpenAIApi(configuration);
5656

5757
const response = await openai.createChatCompletion({
@@ -129,6 +129,7 @@ export async function createStreamingChatCompletion(messages: OpenAIMessage[], p
129129
});
130130

131131
eventSource.addEventListener('message', async (event: any) => {
132+
132133
if (event.data === '[DONE]') {
133134
emitter.emit('done');
134135
return;
@@ -147,7 +148,7 @@ export async function createStreamingChatCompletion(messages: OpenAIMessage[], p
147148

148149
eventSource.stream();
149150

150-
return {
151+
return {
151152
emitter,
152153
cancel: () => eventSource.close(),
153154
};

server/src/endpoints/whisper.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
import express from 'express';
2+
import RequestHandler from "./base";
3+
4+
export default class WhisperRequestHandler extends RequestHandler {
5+
handler(req: express.Request, res: express.Response): any {
6+
res.json({ status: 'ok' });
7+
}
8+
}

server/src/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import BasicCompletionRequestHandler from './endpoints/completion/basic';
1818
import StreamingCompletionRequestHandler from './endpoints/completion/streaming';
1919
import SessionRequestHandler from './endpoints/session';
2020
import GetShareRequestHandler from './endpoints/get-share';
21+
import WhisperRequestHandler from './endpoints/whisper';
2122
import { configurePassport } from './passport';
2223
import { configureAuth0 } from './auth0';
2324
import DeleteChatRequestHandler from './endpoints/delete-chat';
@@ -82,6 +83,7 @@ export default class ChatServer {
8283
this.app.post('/chatapi/sync', (req, res) => new SyncRequestHandler(this, req, res));
8384
this.app.get('/chatapi/share/:id', (req, res) => new GetShareRequestHandler(this, req, res));
8485
this.app.post('/chatapi/share', (req, res) => new ShareRequestHandler(this, req, res));
86+
this.app.post('/chatapi/whisper', (req, res) => new WhisperRequestHandler(this, req, res));
8587

8688
if (process.env.ENABLE_SERVER_COMPLETION) {
8789
this.app.post('/chatapi/completion', (req, res) => new BasicCompletionRequestHandler(this, req, res));

0 commit comments

Comments
 (0)