|
| 1 | +const { z } = require('zod'); |
| 2 | +const { tool } = require('@langchain/core/tools'); |
| 3 | +const { youtube } = require('@googleapis/youtube'); |
| 4 | +const { YoutubeTranscript } = require('youtube-transcript'); |
| 5 | +const { getApiKey } = require('./credentials'); |
| 6 | +const { logger } = require('~/config'); |
| 7 | + |
| 8 | +function extractVideoId(url) { |
| 9 | + const rawIdRegex = /^[a-zA-Z0-9_-]{11}$/; |
| 10 | + if (rawIdRegex.test(url)) { |
| 11 | + return url; |
| 12 | + } |
| 13 | + |
| 14 | + const regex = new RegExp( |
| 15 | + '(?:youtu\\.be/|youtube(?:\\.com)?/(?:' + |
| 16 | + '(?:watch\\?v=)|(?:embed/)|(?:shorts/)|(?:live/)|(?:v/)|(?:/))?)' + |
| 17 | + '([a-zA-Z0-9_-]{11})(?:\\S+)?$', |
| 18 | + ); |
| 19 | + const match = url.match(regex); |
| 20 | + return match ? match[1] : null; |
| 21 | +} |
| 22 | + |
| 23 | +function parseTranscript(transcriptResponse) { |
| 24 | + if (!Array.isArray(transcriptResponse)) { |
| 25 | + return ''; |
| 26 | + } |
| 27 | + |
| 28 | + return transcriptResponse |
| 29 | + .map((entry) => entry.text.trim()) |
| 30 | + .filter((text) => text) |
| 31 | + .join(' ') |
| 32 | + .replaceAll(''', '\''); |
| 33 | +} |
| 34 | + |
| 35 | +function createYouTubeTools(fields = {}) { |
| 36 | + const envVar = 'YOUTUBE_API_KEY'; |
| 37 | + const override = fields.override ?? false; |
| 38 | + const apiKey = fields.apiKey ?? fields[envVar] ?? getApiKey(envVar, override); |
| 39 | + |
| 40 | + const youtubeClient = youtube({ |
| 41 | + version: 'v3', |
| 42 | + auth: apiKey, |
| 43 | + }); |
| 44 | + |
| 45 | + const searchTool = tool( |
| 46 | + async ({ query, maxResults = 5 }) => { |
| 47 | + const response = await youtubeClient.search.list({ |
| 48 | + part: 'snippet', |
| 49 | + q: query, |
| 50 | + type: 'video', |
| 51 | + maxResults: maxResults || 5, |
| 52 | + }); |
| 53 | + const result = response.data.items.map((item) => ({ |
| 54 | + title: item.snippet.title, |
| 55 | + description: item.snippet.description, |
| 56 | + url: `https://www.youtube.com/watch?v=${item.id.videoId}`, |
| 57 | + })); |
| 58 | + return JSON.stringify(result, null, 2); |
| 59 | + }, |
| 60 | + { |
| 61 | + name: 'youtube_search', |
| 62 | + description: `Search for YouTube videos by keyword or phrase. |
| 63 | +- Required: query (search terms to find videos) |
| 64 | +- Optional: maxResults (number of videos to return, 1-50, default: 5) |
| 65 | +- Returns: List of videos with titles, descriptions, and URLs |
| 66 | +- Use for: Finding specific videos, exploring content, research |
| 67 | +Example: query="cooking pasta tutorials" maxResults=3`, |
| 68 | + schema: z.object({ |
| 69 | + query: z.string().describe('Search query terms'), |
| 70 | + maxResults: z.number().int().min(1).max(50).optional().describe('Number of results (1-50)'), |
| 71 | + }), |
| 72 | + }, |
| 73 | + ); |
| 74 | + |
| 75 | + const infoTool = tool( |
| 76 | + async ({ url }) => { |
| 77 | + const videoId = extractVideoId(url); |
| 78 | + if (!videoId) { |
| 79 | + throw new Error('Invalid YouTube URL or video ID'); |
| 80 | + } |
| 81 | + |
| 82 | + const response = await youtubeClient.videos.list({ |
| 83 | + part: 'snippet,statistics', |
| 84 | + id: videoId, |
| 85 | + }); |
| 86 | + |
| 87 | + if (!response.data.items?.length) { |
| 88 | + throw new Error('Video not found'); |
| 89 | + } |
| 90 | + const video = response.data.items[0]; |
| 91 | + |
| 92 | + const result = { |
| 93 | + title: video.snippet.title, |
| 94 | + description: video.snippet.description, |
| 95 | + views: video.statistics.viewCount, |
| 96 | + likes: video.statistics.likeCount, |
| 97 | + comments: video.statistics.commentCount, |
| 98 | + }; |
| 99 | + return JSON.stringify(result, null, 2); |
| 100 | + }, |
| 101 | + { |
| 102 | + name: 'youtube_info', |
| 103 | + description: `Get detailed metadata and statistics for a specific YouTube video. |
| 104 | +- Required: url (full YouTube URL or video ID) |
| 105 | +- Returns: Video title, description, view count, like count, comment count |
| 106 | +- Use for: Getting video metrics and basic metadata |
| 107 | +- DO NOT USE FOR VIDEO SUMMARIES, USE TRANSCRIPTS FOR COMPREHENSIVE ANALYSIS |
| 108 | +- Accepts both full URLs and video IDs |
| 109 | +Example: url="https://youtube.com/watch?v=abc123" or url="abc123"`, |
| 110 | + schema: z.object({ |
| 111 | + url: z.string().describe('YouTube video URL or ID'), |
| 112 | + }), |
| 113 | + }, |
| 114 | + ); |
| 115 | + |
| 116 | + const commentsTool = tool( |
| 117 | + async ({ url, maxResults = 10 }) => { |
| 118 | + const videoId = extractVideoId(url); |
| 119 | + if (!videoId) { |
| 120 | + throw new Error('Invalid YouTube URL or video ID'); |
| 121 | + } |
| 122 | + |
| 123 | + const response = await youtubeClient.commentThreads.list({ |
| 124 | + part: 'snippet', |
| 125 | + videoId, |
| 126 | + maxResults: maxResults || 10, |
| 127 | + }); |
| 128 | + |
| 129 | + const result = response.data.items.map((item) => ({ |
| 130 | + author: item.snippet.topLevelComment.snippet.authorDisplayName, |
| 131 | + text: item.snippet.topLevelComment.snippet.textDisplay, |
| 132 | + likes: item.snippet.topLevelComment.snippet.likeCount, |
| 133 | + })); |
| 134 | + return JSON.stringify(result, null, 2); |
| 135 | + }, |
| 136 | + { |
| 137 | + name: 'youtube_comments', |
| 138 | + description: `Retrieve top-level comments from a YouTube video. |
| 139 | +- Required: url (full YouTube URL or video ID) |
| 140 | +- Optional: maxResults (number of comments, 1-50, default: 10) |
| 141 | +- Returns: Comment text, author names, like counts |
| 142 | +- Use for: Sentiment analysis, audience feedback, engagement review |
| 143 | +Example: url="abc123" maxResults=20`, |
| 144 | + schema: z.object({ |
| 145 | + url: z.string().describe('YouTube video URL or ID'), |
| 146 | + maxResults: z |
| 147 | + .number() |
| 148 | + .int() |
| 149 | + .min(1) |
| 150 | + .max(50) |
| 151 | + .optional() |
| 152 | + .describe('Number of comments to retrieve'), |
| 153 | + }), |
| 154 | + }, |
| 155 | + ); |
| 156 | + |
| 157 | + const transcriptTool = tool( |
| 158 | + async ({ url }) => { |
| 159 | + const videoId = extractVideoId(url); |
| 160 | + if (!videoId) { |
| 161 | + throw new Error('Invalid YouTube URL or video ID'); |
| 162 | + } |
| 163 | + |
| 164 | + try { |
| 165 | + try { |
| 166 | + const transcript = await YoutubeTranscript.fetchTranscript(videoId, { lang: 'en' }); |
| 167 | + return parseTranscript(transcript); |
| 168 | + } catch (e) { |
| 169 | + logger.error(e); |
| 170 | + } |
| 171 | + |
| 172 | + try { |
| 173 | + const transcript = await YoutubeTranscript.fetchTranscript(videoId, { lang: 'de' }); |
| 174 | + return parseTranscript(transcript); |
| 175 | + } catch (e) { |
| 176 | + logger.error(e); |
| 177 | + } |
| 178 | + |
| 179 | + const transcript = await YoutubeTranscript.fetchTranscript(videoId); |
| 180 | + return parseTranscript(transcript); |
| 181 | + } catch (error) { |
| 182 | + throw new Error(`Failed to fetch transcript: ${error.message}`); |
| 183 | + } |
| 184 | + }, |
| 185 | + { |
| 186 | + name: 'youtube_transcript', |
| 187 | + description: `Fetch and parse the transcript/captions of a YouTube video. |
| 188 | +- Required: url (full YouTube URL or video ID) |
| 189 | +- Returns: Full video transcript as plain text |
| 190 | +- Use for: Content analysis, summarization, translation reference |
| 191 | +- This is the "Go-to" tool for analyzing actual video content |
| 192 | +- Attempts to fetch English first, then German, then any available language |
| 193 | +Example: url="https://youtube.com/watch?v=abc123"`, |
| 194 | + schema: z.object({ |
| 195 | + url: z.string().describe('YouTube video URL or ID'), |
| 196 | + }), |
| 197 | + }, |
| 198 | + ); |
| 199 | + |
| 200 | + return [searchTool, infoTool, commentsTool, transcriptTool]; |
| 201 | +} |
| 202 | + |
| 203 | +module.exports = createYouTubeTools; |
0 commit comments