Skip to content

Commit 3d77635

Browse files
committed
Add API endpoint for fetching transcripts from YouTube (#4788)
2 parents d0433c8 + b2f5b1e commit 3d77635

File tree

3 files changed

+122
-0
lines changed

3 files changed

+122
-0
lines changed

src/invidious/routes/api/v1/videos.cr

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,4 +429,90 @@ module Invidious::Routes::API::V1::Videos
429429
end
430430
end
431431
end
432+
433+
# Fetches transcripts from YouTube
434+
#
435+
# Use the `lang` and `autogen` query parameter to select which transcript to fetch
436+
# Request without any URL parameters to see all the available transcripts.
437+
def self.transcripts(env)
438+
env.response.content_type = "application/json"
439+
440+
id = env.params.url["id"]
441+
lang = env.params.query["lang"]?
442+
label = env.params.query["label"]?
443+
auto_generated = env.params.query["autogen"]? ? true : false
444+
445+
# Return all available transcript options when none is given
446+
if !label && !lang
447+
begin
448+
video = get_video(id)
449+
rescue ex : NotFoundException
450+
return error_json(404, ex)
451+
rescue ex
452+
return error_json(500, ex)
453+
end
454+
455+
response = JSON.build do |json|
456+
# The amount of transcripts available to fetch is the
457+
# same as the amount of captions available.
458+
available_transcripts = video.captions
459+
460+
json.object do
461+
json.field "transcripts" do
462+
json.array do
463+
available_transcripts.each do |transcript|
464+
json.object do
465+
json.field "label", transcript.name
466+
json.field "languageCode", transcript.language_code
467+
json.field "autoGenerated", transcript.auto_generated
468+
469+
if transcript.auto_generated
470+
json.field "url", "/api/v1/transcripts/#{id}?lang=#{URI.encode_www_form(transcript.language_code)}&autogen"
471+
else
472+
json.field "url", "/api/v1/transcripts/#{id}?lang=#{URI.encode_www_form(transcript.language_code)}"
473+
end
474+
end
475+
end
476+
end
477+
end
478+
end
479+
end
480+
481+
return response
482+
end
483+
484+
# If lang is not given then we attempt to fetch
485+
# the transcript through the given label
486+
if lang.nil?
487+
begin
488+
video = get_video(id)
489+
rescue ex : NotFoundException
490+
return error_json(404, ex)
491+
rescue ex
492+
return error_json(500, ex)
493+
end
494+
495+
target_transcript = video.captions.select(&.name.== label)
496+
if target_transcript.empty?
497+
return error_json(404, NotFoundException.new("Requested transcript does not exist"))
498+
else
499+
target_transcript = target_transcript[0]
500+
lang, auto_generated = target_transcript.language_code, target_transcript.auto_generated
501+
end
502+
end
503+
504+
params = Invidious::Videos::Transcript.generate_param(id, lang, auto_generated)
505+
506+
begin
507+
transcript = Invidious::Videos::Transcript.from_raw(
508+
YoutubeAPI.get_transcript(params), lang, auto_generated
509+
)
510+
rescue ex : NotFoundException
511+
return error_json(404, ex)
512+
rescue ex
513+
return error_json(500, ex)
514+
end
515+
516+
return transcript.to_json
517+
end
432518
end

src/invidious/routing.cr

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ module Invidious::Routing
236236
get "/api/v1/annotations/:id", {{namespace}}::Videos, :annotations
237237
get "/api/v1/comments/:id", {{namespace}}::Videos, :comments
238238
get "/api/v1/clips/:id", {{namespace}}::Videos, :clips
239+
get "/api/v1/transcripts/:id", {{namespace}}::Videos, :transcripts
239240

240241
# Feeds
241242
get "/api/v1/trending", {{namespace}}::Feeds, :trending

src/invidious/videos/transcript.cr

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,5 +122,40 @@ module Invidious::Videos
122122

123123
return vtt
124124
end
125+
126+
def to_json(json : JSON::Builder)
127+
json.field "languageCode", @language_code
128+
json.field "autoGenerated", @auto_generated
129+
json.field "label", @label
130+
json.field "body" do
131+
json.array do
132+
@lines.each do |line|
133+
json.object do
134+
if line.is_a? HeadingLine
135+
json.field "type", "heading"
136+
else
137+
json.field "type", "regular"
138+
end
139+
140+
json.field "startMs", line.start_ms.total_milliseconds
141+
json.field "endMs", line.end_ms.total_milliseconds
142+
json.field "line", line.line
143+
end
144+
end
145+
end
146+
end
147+
end
148+
149+
def to_json
150+
JSON.build do |json|
151+
json.object do
152+
json.field "transcript" do
153+
json.object do
154+
to_json(json)
155+
end
156+
end
157+
end
158+
end
159+
end
125160
end
126161
end

0 commit comments

Comments
 (0)