Spaces:
Sleeping
Sleeping
## Julia py_init file | |
module Transcriber | |
using PyCall | |
function __init__() | |
py""" | |
import os | |
from youtube_transcript_api import YouTubeTranscriptApi | |
from youtube_transcript_api.proxies import WebshareProxyConfig | |
#webshareuser = os.environ.get("WEBSHAREUSER") # Changed from webuser to webshareuser | |
#websharepass = os.environ.get("WEBSHAREPASS") | |
#ytt_api = YouTubeTranscriptApi( | |
# proxy_config=WebshareProxyConfig( | |
# proxy_username=webshareuser, | |
# proxy_password=websharepass, | |
# ) | |
#) | |
def get_transcript(video_id): | |
return YouTubeTranscriptApi.get_transcript(video_id) | |
""" | |
end | |
# test id: "SW14tOda_kI" | |
# get_transcript("SW14tOda_kI") | |
function get_transcript(video_id::String; textonly::Bool=true) | |
transcript = py"get_transcript"(video_id) | |
## | |
if textonly | |
transcript = [line["text"] for line in transcript] | |
end | |
return transcript | |
end | |
function get_transcript_text(video_id::String) | |
transcript = py"get_transcript"(video_id) | |
text = "" | |
for line in transcript | |
text *= line["text"] * " " | |
end | |
return text | |
end | |
function chunk_text(text::String, chunk_size::Int=280) | |
chunks = [] | |
for i in 1:chunk_size:length(text) | |
newchunk = text[i:min(i+chunk_size-1, length(text))] | |
push!(chunks, newchunk) | |
end | |
return string.(chunks) | |
end | |
end |