elevenlabs · thorwebdev · May 21, 2025
diff --git a/examples/sound-effects/video-to-sfx/.env.example b/examples/sound-effects/video-to-sfx/.env.example
@@ -1,2 +1,2 @@
 ELEVENLABS_API_KEY=
-OPENAI_API_KEY=
+GEMINI_API_KEY=
diff --git a/examples/sound-effects/video-to-sfx/README.md b/examples/sound-effects/video-to-sfx/README.md
@@ -5,7 +5,7 @@ We built this demo to show the power of the ElevenLabs Texts to Sounds Effects A
 How it works:
 
 - Extracts 4 frames from the video at 1 second intervals (all client side)
-- Sends the frames and a prompt to GPT-4o to create the custom Text to sound effects prompt
+- Sends the frames and a prompt to Gemini 2.0 Flash to create the custom Text to sound effects prompt
 - Uses the prompt to create a sound effect with the [ElevenLabs Text to Sounds Effects API](https://elevenlabs.io/docs/api-reference/how-to-use-text-to-sound-effects)
 - Combines the video and audio on the client side with ffmpeg.wasm for a single file to download
 - Hosted on Vercel at [videotosoundeffects.com](https://www.videotosoundeffects.com/)
@@ -14,8 +14,6 @@ How it works:
 
 ![Screenshot elevenlabs-video-to-sfx vercel app (Arc) 2024-06-16 at 23 33@2x](https://github.com/elevenlabs/elevenlabs-examples/assets/22766134/20fba872-e8d1-4f30-92af-fcb52bab45da)
 
-
-
 ## Getting Started
 
 First, run the development server:

diff --git a/...effects/video-to-sfx/app/api/interface.ts → ...s/video-to-sfx/app/api/sound/interface.ts b/...effects/video-to-sfx/app/api/interface.ts → ...s/video-to-sfx/app/api/sound/interface.ts
diff --git a/...und-effects/video-to-sfx/app/api/route.ts → ...fects/video-to-sfx/app/api/sound/route.ts b/...und-effects/video-to-sfx/app/api/route.ts → ...fects/video-to-sfx/app/api/sound/route.ts
@@ -4,7 +4,7 @@ export const dynamic = "force-dynamic";
 import {
   VideoToSFXRequestBody,
   VideoToSFXResponseBody,
-} from "@/app/api/interface";
+} from "@/app/api/sound/interface";
 import OpenAI from "openai";
 import { ChatCompletionMessageParam } from "openai/resources/index.mjs";
 import { Ratelimit } from "@upstash/ratelimit";
@@ -53,16 +53,17 @@ const generateSoundEffect = async (
 };
 
 const openai = new OpenAI({
-  apiKey: process.env.OPENAI_API_KEY,
+  apiKey: process.env.GEMINI_API_KEY,
+  baseURL: process.env.GEMINI_API_BASE_URL,
 });
 
 const isCaptionSafeForWork = async (caption: string): Promise<boolean> => {
-  if (!process.env.OPENAI_API_KEY) {
+  if (!process.env.GEMINI_API_KEY) {
     throw new Error("No API key");
   }
 
   const response = await openai.chat.completions.create({
-    model: "gpt-4o",
+    model: "gemini-2.0-flash",
     messages: [
       {
         role: "user",
@@ -95,11 +96,11 @@ const isCaptionSafeForWork = async (caption: string): Promise<boolean> => {
 const generateCaptionForImage = async (
   imagesBase64: string[]
 ): Promise<string> => {
-  if (!process.env.OPENAI_API_KEY) {
+  if (!process.env.GEMINI_API_KEY) {
     throw new Error("No API key");
   }
   const response = await openai.chat.completions.create({
-    model: "gpt-4o",
+    model: "gemini-2.0-flash",
     messages: [
       {
         role: "user",

diff --git a/examples/sound-effects/video-to-sfx/app/api/veo/route.ts b/examples/sound-effects/video-to-sfx/app/api/veo/route.ts
@@ -0,0 +1,52 @@
+export const maxDuration = 120; // This function can run for a maximum of 60 seconds
+export const dynamic = "force-dynamic";
+
+import { NextResponse } from "next/server";
+import { GoogleGenAI } from "@google/genai";
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
+
+export async function GET(request: Request) {
+  return new Response("Live");
+}
+
+export async function POST(request: Request) {
+  try {
+    const { prompt } = await request.json();
+    console.log(prompt);
+
+    if (!prompt) {
+      return NextResponse.json(
+        { error: "Prompt is required" },
+        { status: 400 }
+      );
+    }
+
+    let operation = await ai.models.generateVideos({
+      model: "veo-2.0-generate-001",
+      prompt,
+      config: {
+        personGeneration: "dont_allow",
+        aspectRatio: "16:9",
+      },
+    });
+
+    while (!operation.done) {
+      await new Promise(resolve => setTimeout(resolve, 10000));
+      operation = await ai.operations.getVideosOperation({
+        operation: operation,
+      });
+    }
+
+    const videoUrl = operation.response?.generatedVideos?.[0]?.video?.uri;
+    if (!videoUrl) throw new Error("No video URL found");
+    const resp = await fetch(`${videoUrl}&key=${process.env.GEMINI_API_KEY}`); // append your API key
+    return resp;
+  } catch (error) {
+    console.error("Error processing request:", error);
+    return NextResponse.json(
+      { error: "Internal server error" },
+      { status: 500 }
+    );
+  }
+}
diff --git a/examples/sound-effects/video-to-sfx/app/page.tsx b/examples/sound-effects/video-to-sfx/app/page.tsx
@@ -10,7 +10,11 @@ import { AudioPlayer } from "./state/player";
 import { observer } from "mobx-react";
 import { cn } from "@/lib/utils";
 import { reaction } from "mobx";
-import { QueryClient, QueryClientProvider, useMutation } from "@tanstack/react-query";
+import {
+  QueryClient,
+  QueryClientProvider,
+  useMutation,
+} from "@tanstack/react-query";
 import { convertVideoToSFX } from "@/lib/videoToSFX";
 import { ArrowRight, DownloadIcon, Github, LoaderCircle } from "lucide-react";
 import { Button } from "@/components/ui/button";
@@ -23,7 +27,7 @@ const HoverOverlay = ({ className }: { className?: string }) => {
     <div
       className={cn(
         "absolute inset-[4px] bg-gradient-to-tr from-[#08B0D5] to-[#AD20D0] rounded-[inherit] opacity-0 -z-10 group-hover:inset-0 group-hover:opacity-[17.5%] transition-all duration-300",
-        className,
+        className
       )}
     ></div>
   );
@@ -138,7 +142,7 @@ if (typeof window !== "undefined") {
 
 const HomeDetails = observer(() => {
   const videoRef = useRef<HTMLVideoElement | null>(null);
-  const [file, setFile] = useState<File | null>(null);
+  const [file, setFile] = useState<File | Blob | null>(null);
   const [orchestrator, setOrchestrator] = useState<Orchestrator | null>(null);
   const canceledRef = useRef(false);
   const [isDownloading, setIsDownloading] = useState([
@@ -147,16 +151,11 @@ const HomeDetails = observer(() => {
     false,
     false,
   ]);
-  const [progress, setProgress] = useState([
-    0,
-    0,
-    0,
-    0,
-  ]);
+  const [progress, setProgress] = useState([0, 0, 0, 0]);
 
   const previewUrl = useMemo(
     () => (file ? URL.createObjectURL(file) : null),
-    [file],
+    [file]
   );
 
   useEffect(() => {
@@ -187,7 +186,7 @@ const HomeDetails = observer(() => {
             videoRef.current.pause();
           }
         }
-      },
+      }
     );
   }, [orchestrator]);
 
@@ -242,7 +241,7 @@ const HomeDetails = observer(() => {
       <motion.div
         className={cn(
           "flex flex-col md:hidden text-black p-4 gap-4",
-          previewUrl && "hidden",
+          previewUrl && "hidden"
         )}
       >
         <a
@@ -270,6 +269,63 @@ const HomeDetails = observer(() => {
           variants={variants.card}
           className="w-full aspect-video rounded-3xl bg-white/80 backdrop-blur-[16px] text-transparent md:text-black"
         >
+          <div className="flex flex-col items-center justify-center gap-4">
+            <input
+              type="text"
+              placeholder="Enter video prompt..."
+              className="w-3/4 px-4 py-2 rounded-lg border border-gray-300 focus:outline-none focus:border-gray-500"
+            />
+            <button
+              className="px-6 py-2 bg-black text-white rounded-lg hover:bg-gray-800 transition-colors font-mono text-sm"
+              onClick={async () => {
+                try {
+                  // Get the prompt value from the input field
+                  const promptInput = document.querySelector(
+                    'input[type="text"]'
+                  ) as HTMLInputElement;
+                  const prompt = promptInput?.value;
+
+                  if (!prompt) {
+                    throw new Error("Please enter a prompt");
+                  }
+
+                  const response = await fetch("/api/veo", {
+                    method: "POST",
+                    headers: {
+                      "Content-Type": "application/json",
+                    },
+                    body: JSON.stringify({ prompt }),
+                  }).then(res => res.blob());
+
+                  setFile(response);
+                  canceledRef.current = false;
+                  // const sfx = await convertVideoToSFX(
+                  //   URL.createObjectURL(files[0])
+                  // );
+                  const sfx = await mutations.videoToSfx.mutateAsync(response, {
+                    onError: e => {
+                      setFile(null);
+                      window.alert(`Error: ${e}`);
+                    },
+                  });
+                  if (!canceledRef.current) {
+                    setOrchestrator(
+                      new Orchestrator({
+                        soundEffects: sfx.soundEffects,
+                        caption: sfx.caption,
+                      })
+                    );
+                  }
+                  // Handle the generated video URL/data here
+                } catch (error) {
+                  console.error("Error:", error);
+                  window.alert("Failed to generate video");
+                }
+              }}
+            >
+              Generate with Google Veo 2
+            </button>
+          </div>
           {!previewUrl && (
             <FileInput
               className="h-full w-full"
@@ -290,7 +346,7 @@ const HomeDetails = observer(() => {
                     new Orchestrator({
                       soundEffects: sfx.soundEffects,
                       caption: sfx.caption,
-                    }),
+                    })
                   );
                 }
               }}
@@ -362,13 +418,17 @@ const HomeDetails = observer(() => {
                         newState[index] = true;
                         return newState;
                       });
-                      await mergeAndDownload(file, url, (newProgress: number) => {
-                        setProgress(prev => {
-                          const newState = [...prev];
-                          newState[index] = newProgress;
-                          return newState;
-                        });
-                      });
+                      await mergeAndDownload(
+                        file,
+                        url,
+                        (newProgress: number) => {
+                          setProgress(prev => {
+                            const newState = [...prev];
+                            newState[index] = newProgress;
+                            return newState;
+                          });
+                        }
+                      );
                       setIsDownloading(prev => {
                         const newState = [...prev];
                         newState[index] = false;
@@ -398,9 +458,9 @@ const Home = () => {
 
 const Waveform = observer(
   ({
-     player,
-     barBgColor = "bg-gray-800/30",
-   }: {
+    player,
+    barBgColor = "bg-gray-800/30",
+  }: {
     player: AudioPlayer;
     barBgColor: string;
   }) => {
@@ -425,28 +485,28 @@ const Waveform = observer(
         ))}
       </div>
     );
-  },
+  }
 );
 
 const SoundEffect = observer(
   ({
-     index,
-     player,
-     onPlay,
-     onPause,
-     active,
-     onDownload,
-     isDownloading,
-     progress,
-   }: {
+    index,
+    player,
+    onPlay,
+    onPause,
+    active,
+    onDownload,
+    isDownloading,
+    progress,
+  }: {
     index: number;
     player: AudioPlayer;
     onPlay: () => void;
     onPause: () => void;
     active: boolean;
     onDownload: () => void;
     isDownloading: boolean;
-    progress: number
+    progress: number;
   }) => {
     return (
       <motion.div
@@ -503,7 +563,7 @@ const SoundEffect = observer(
           className="self-center mr-3 rounded-full bg-transparent hover:bg-white/25 active:bg-white/40 border-gray-800/20"
         >
           {isDownloading ? (
-            <span className={'text-[10px] text-gray-900/50'}>{progress}%</span>
+            <span className={"text-[10px] text-gray-900/50"}>{progress}%</span>
           ) : (
             <DownloadIcon size={16} className="text-gray-800/50" />
           )}

diff --git a/examples/sound-effects/video-to-sfx/lib/videoToSFX.ts b/examples/sound-effects/video-to-sfx/lib/videoToSFX.ts
@@ -1,13 +1,13 @@
-import { maxDuration } from "./../app/api/route";
+import { maxDuration } from "../app/api/sound/route";
 import { posthog } from "posthog-js";
 import {
   VideoToSFXRequestBody,
   VideoToSFXResponseBody,
-} from "@/app/api/interface";
+} from "@/app/api/sound/interface";
 
 const apiVideoToSFX = async (frames: string[], maxDuration: number) => {
   posthog?.capture("video_to_sfx_started");
-  const response = await fetch("/api", {
+  const response = await fetch("/api/sound", {
     method: "POST",
     body: JSON.stringify({ frames, maxDuration } as VideoToSFXRequestBody),
   });