Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/sound-effects/video-to-sfx/.env.example
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
ELEVENLABS_API_KEY=
OPENAI_API_KEY=
GEMINI_API_KEY=
4 changes: 1 addition & 3 deletions examples/sound-effects/video-to-sfx/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ We built this demo to show the power of the ElevenLabs Texts to Sounds Effects A
How it works:

- Extracts 4 frames from the video at 1 second intervals (all client side)
- Sends the frames and a prompt to GPT-4o to create the custom Text to sound effects prompt
- Sends the frames and a prompt to Gemini 2.0 Flash to create the custom Text to sound effects prompt
- Uses the prompt to create a sound effect with the [ElevenLabs Text to Sounds Effects API](https://elevenlabs.io/docs/api-reference/how-to-use-text-to-sound-effects)
- Combines the video and audio on the client side with ffmpeg.wasm for a single file to download
- Hosted on Vercel at [videotosoundeffects.com](https://www.videotosoundeffects.com/)
Expand All @@ -14,8 +14,6 @@ How it works:

![Screenshot elevenlabs-video-to-sfx vercel app (Arc) 2024-06-16 at 23 33@2x](https://github.com/elevenlabs/elevenlabs-examples/assets/22766134/20fba872-e8d1-4f30-92af-fcb52bab45da)



## Getting Started

First, run the development server:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ export const dynamic = "force-dynamic";
import {
VideoToSFXRequestBody,
VideoToSFXResponseBody,
} from "@/app/api/interface";
} from "@/app/api/sound/interface";
import OpenAI from "openai";
import { ChatCompletionMessageParam } from "openai/resources/index.mjs";
import { Ratelimit } from "@upstash/ratelimit";
Expand Down Expand Up @@ -53,16 +53,17 @@ const generateSoundEffect = async (
};

const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
apiKey: process.env.GEMINI_API_KEY,
baseURL: process.env.GEMINI_API_BASE_URL,
});

const isCaptionSafeForWork = async (caption: string): Promise<boolean> => {
if (!process.env.OPENAI_API_KEY) {
if (!process.env.GEMINI_API_KEY) {
throw new Error("No API key");
}

const response = await openai.chat.completions.create({
model: "gpt-4o",
model: "gemini-2.0-flash",
messages: [
{
role: "user",
Expand Down Expand Up @@ -95,11 +96,11 @@ const isCaptionSafeForWork = async (caption: string): Promise<boolean> => {
const generateCaptionForImage = async (
imagesBase64: string[]
): Promise<string> => {
if (!process.env.OPENAI_API_KEY) {
if (!process.env.GEMINI_API_KEY) {
throw new Error("No API key");
}
const response = await openai.chat.completions.create({
model: "gpt-4o",
model: "gemini-2.0-flash",
messages: [
{
role: "user",
Expand Down
52 changes: 52 additions & 0 deletions examples/sound-effects/video-to-sfx/app/api/veo/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
export const maxDuration = 120; // This function can run for a maximum of 60 seconds
export const dynamic = "force-dynamic";

import { NextResponse } from "next/server";
import { GoogleGenAI } from "@google/genai";

const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });

export async function GET(request: Request) {
return new Response("Live");
}

export async function POST(request: Request) {
try {
const { prompt } = await request.json();
console.log(prompt);

if (!prompt) {
return NextResponse.json(
{ error: "Prompt is required" },
{ status: 400 }
);
}

let operation = await ai.models.generateVideos({
model: "veo-2.0-generate-001",
prompt,
config: {
personGeneration: "dont_allow",
aspectRatio: "16:9",
},
});

while (!operation.done) {
await new Promise(resolve => setTimeout(resolve, 10000));
operation = await ai.operations.getVideosOperation({
operation: operation,
});
}

const videoUrl = operation.response?.generatedVideos?.[0]?.video?.uri;
if (!videoUrl) throw new Error("No video URL found");
const resp = await fetch(`${videoUrl}&key=${process.env.GEMINI_API_KEY}`); // append your API key
return resp;
} catch (error) {
console.error("Error processing request:", error);
return NextResponse.json(
{ error: "Internal server error" },
{ status: 500 }
);
}
}
130 changes: 95 additions & 35 deletions examples/sound-effects/video-to-sfx/app/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@ import { AudioPlayer } from "./state/player";
import { observer } from "mobx-react";
import { cn } from "@/lib/utils";
import { reaction } from "mobx";
import { QueryClient, QueryClientProvider, useMutation } from "@tanstack/react-query";
import {
QueryClient,
QueryClientProvider,
useMutation,
} from "@tanstack/react-query";
import { convertVideoToSFX } from "@/lib/videoToSFX";
import { ArrowRight, DownloadIcon, Github, LoaderCircle } from "lucide-react";
import { Button } from "@/components/ui/button";
Expand All @@ -23,7 +27,7 @@ const HoverOverlay = ({ className }: { className?: string }) => {
<div
className={cn(
"absolute inset-[4px] bg-gradient-to-tr from-[#08B0D5] to-[#AD20D0] rounded-[inherit] opacity-0 -z-10 group-hover:inset-0 group-hover:opacity-[17.5%] transition-all duration-300",
className,
className
)}
></div>
);
Expand Down Expand Up @@ -138,7 +142,7 @@ if (typeof window !== "undefined") {

const HomeDetails = observer(() => {
const videoRef = useRef<HTMLVideoElement | null>(null);
const [file, setFile] = useState<File | null>(null);
const [file, setFile] = useState<File | Blob | null>(null);
const [orchestrator, setOrchestrator] = useState<Orchestrator | null>(null);
const canceledRef = useRef(false);
const [isDownloading, setIsDownloading] = useState([
Expand All @@ -147,16 +151,11 @@ const HomeDetails = observer(() => {
false,
false,
]);
const [progress, setProgress] = useState([
0,
0,
0,
0,
]);
const [progress, setProgress] = useState([0, 0, 0, 0]);

const previewUrl = useMemo(
() => (file ? URL.createObjectURL(file) : null),
[file],
[file]
);

useEffect(() => {
Expand Down Expand Up @@ -187,7 +186,7 @@ const HomeDetails = observer(() => {
videoRef.current.pause();
}
}
},
}
);
}, [orchestrator]);

Expand Down Expand Up @@ -242,7 +241,7 @@ const HomeDetails = observer(() => {
<motion.div
className={cn(
"flex flex-col md:hidden text-black p-4 gap-4",
previewUrl && "hidden",
previewUrl && "hidden"
)}
>
<a
Expand Down Expand Up @@ -270,6 +269,63 @@ const HomeDetails = observer(() => {
variants={variants.card}
className="w-full aspect-video rounded-3xl bg-white/80 backdrop-blur-[16px] text-transparent md:text-black"
>
<div className="flex flex-col items-center justify-center gap-4">
<input
type="text"
placeholder="Enter video prompt..."
className="w-3/4 px-4 py-2 rounded-lg border border-gray-300 focus:outline-none focus:border-gray-500"
/>
<button
className="px-6 py-2 bg-black text-white rounded-lg hover:bg-gray-800 transition-colors font-mono text-sm"
onClick={async () => {
try {
// Get the prompt value from the input field
const promptInput = document.querySelector(
'input[type="text"]'
) as HTMLInputElement;
const prompt = promptInput?.value;

if (!prompt) {
throw new Error("Please enter a prompt");
}

const response = await fetch("/api/veo", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ prompt }),
}).then(res => res.blob());

setFile(response);
canceledRef.current = false;
// const sfx = await convertVideoToSFX(
// URL.createObjectURL(files[0])
// );
const sfx = await mutations.videoToSfx.mutateAsync(response, {
onError: e => {
setFile(null);
window.alert(`Error: ${e}`);
},
});
if (!canceledRef.current) {
setOrchestrator(
new Orchestrator({
soundEffects: sfx.soundEffects,
caption: sfx.caption,
})
);
}
// Handle the generated video URL/data here
} catch (error) {
console.error("Error:", error);
window.alert("Failed to generate video");
}
}}
>
Generate with Google Veo 2
</button>
</div>
{!previewUrl && (
<FileInput
className="h-full w-full"
Expand All @@ -290,7 +346,7 @@ const HomeDetails = observer(() => {
new Orchestrator({
soundEffects: sfx.soundEffects,
caption: sfx.caption,
}),
})
);
}
}}
Expand Down Expand Up @@ -362,13 +418,17 @@ const HomeDetails = observer(() => {
newState[index] = true;
return newState;
});
await mergeAndDownload(file, url, (newProgress: number) => {
setProgress(prev => {
const newState = [...prev];
newState[index] = newProgress;
return newState;
});
});
await mergeAndDownload(
file,
url,
(newProgress: number) => {
setProgress(prev => {
const newState = [...prev];
newState[index] = newProgress;
return newState;
});
}
);
setIsDownloading(prev => {
const newState = [...prev];
newState[index] = false;
Expand Down Expand Up @@ -398,9 +458,9 @@ const Home = () => {

const Waveform = observer(
({
player,
barBgColor = "bg-gray-800/30",
}: {
player,
barBgColor = "bg-gray-800/30",
}: {
player: AudioPlayer;
barBgColor: string;
}) => {
Expand All @@ -425,28 +485,28 @@ const Waveform = observer(
))}
</div>
);
},
}
);

const SoundEffect = observer(
({
index,
player,
onPlay,
onPause,
active,
onDownload,
isDownloading,
progress,
}: {
index,
player,
onPlay,
onPause,
active,
onDownload,
isDownloading,
progress,
}: {
index: number;
player: AudioPlayer;
onPlay: () => void;
onPause: () => void;
active: boolean;
onDownload: () => void;
isDownloading: boolean;
progress: number
progress: number;
}) => {
return (
<motion.div
Expand Down Expand Up @@ -503,7 +563,7 @@ const SoundEffect = observer(
className="self-center mr-3 rounded-full bg-transparent hover:bg-white/25 active:bg-white/40 border-gray-800/20"
>
{isDownloading ? (
<span className={'text-[10px] text-gray-900/50'}>{progress}%</span>
<span className={"text-[10px] text-gray-900/50"}>{progress}%</span>
) : (
<DownloadIcon size={16} className="text-gray-800/50" />
)}
Expand Down
6 changes: 3 additions & 3 deletions examples/sound-effects/video-to-sfx/lib/videoToSFX.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import { maxDuration } from "./../app/api/route";
import { maxDuration } from "../app/api/sound/route";
import { posthog } from "posthog-js";
import {
VideoToSFXRequestBody,
VideoToSFXResponseBody,
} from "@/app/api/interface";
} from "@/app/api/sound/interface";

const apiVideoToSFX = async (frames: string[], maxDuration: number) => {
posthog?.capture("video_to_sfx_started");
const response = await fetch("/api", {
const response = await fetch("/api/sound", {
method: "POST",
body: JSON.stringify({ frames, maxDuration } as VideoToSFXRequestBody),
});
Expand Down
Loading