| |
| |
| |
| |
|
|
| |
| import httpcore |
| |
| import pathlib |
| import sys, os |
| from gtts import gTTS |
| import gradio as gr |
| import os |
| import speech_recognition as sr |
| from googletrans import Translator, constants |
| from pprint import pprint |
| from moviepy.editor import * |
| from pytube import YouTube |
| from youtube_transcript_api import YouTubeTranscriptApi |
| from utils import * |
| import json |
| import re |
| from pytube import YouTube |
| from yt_dlp import YoutubeDL |
| from yt_dlp import YoutubeDL |
| import os |
|
|
| import yt_dlp |
|
|
| def download_video(url): |
| """ |
| Downloads a video from YouTube using yt-dlp with browser authentication. |
| """ |
| print("Starting download...") |
|
|
| ydl_opts = { |
| 'format': 'bestvideo+bestaudio/best', |
| 'merge_output_format': 'mp4', |
| 'outtmpl': '%(title)s.%(ext)s', |
| 'quiet': False, |
| 'cookies': 'youtube_cookies.txt', |
| 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36', |
| } |
|
|
| try: |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
| info = ydl.extract_info(url, download=True) |
| local_file = ydl.prepare_filename(info) |
| print(f"✅ Downloaded: {local_file}") |
| return local_file |
| except Exception as e: |
| print(f"❌ Download failed: {str(e)}") |
| return None |
|
|
| |
| url = "https://www.youtube.com/watch?v=uLVRZE8OAI4" |
| download_video(url) |
|
|
|
|
|
|
|
|
|
|
| def validate_youtube(url): |
| """ |
| Validates a YouTube URL, checks if the video exists, and returns whether its length exceeds 10 minutes. |
| Uses yt-dlp for more robust URL handling. |
| |
| :param url: str - YouTube video URL |
| :return: bool - True if the URL is invalid or video is longer than 10 minutes, otherwise False |
| """ |
| try: |
| with YoutubeDL({'quiet': True, 'no_warnings': True}) as ydl: |
| info = ydl.extract_info(url, download=False) |
| video_length = info.get('duration') |
|
|
| if video_length is None: |
| print("Could not determine video length.") |
| return True |
|
|
| if video_length > 600: |
| print("Your video is longer than 10 minutes.") |
| return True |
| else: |
| print("Your video is 10 minutes or shorter.") |
| return False |
|
|
| except Exception as e: |
| print(f"Error: The provided URL is invalid or not accessible. ({e})") |
| return True |
| |
| def validate_url(url): |
| import validators |
| if not validators.url(url): |
| print("Hi there URL seems invalid ") |
| return True |
| else: |
| return False |
| def cleanup(): |
| import pathlib |
| import glob |
| types = ('*.mp4', '*.wav') |
| |
| junks = [] |
| for files in types: |
| junks.extend(glob.glob(files)) |
| try: |
| |
| for junk in junks: |
| print("Deleting",junk) |
| |
| file = pathlib.Path(junk) |
| |
| file.unlink() |
| except Exception: |
| print("I cannot delete the file because it is being used by another process") |
|
|
| def getSize(filename): |
| st = os.stat(filename) |
| return st.st_size |
|
|
|
|
| def clean_transcript(transcript_list): |
| script = "" |
| for text in transcript_list: |
| t = text["text"] |
| if( (t != '[music]') and \ |
| (t != '[Music]') and \ |
| (t != '[музыка]') and \ |
| (t != '[Музыка]') and \ |
| (t != '[musik]') and \ |
| (t != '[Musik]') and \ |
| (t != '[musica]') and \ |
| (t != '[Musica]') and \ |
| (t != '[música]') and \ |
| (t != '[Música]') and \ |
| (t != '[音楽]') and \ |
| (t != '[音乐]') |
| ) : |
| script += t + " " |
| return script |
| |
| |
| def get_transcript(url,desired_language): |
| id_you= url[url.index("=")+1:] |
| try: |
| |
| transcript_list = YouTubeTranscriptApi.list_transcripts(id_you) |
|
|
| except Exception: |
| print('TranscriptsDisabled:') |
| is_translated = False |
| return " ", " ", is_translated |
|
|
| lista=[] |
| transcript_translation_languages=[] |
| |
| for transcript in transcript_list: |
| lista.extend([ |
| transcript.language_code, |
| transcript.is_generated, |
| transcript.is_translatable, |
| transcript_translation_languages.append(transcript.translation_languages), |
| ]) |
| print(lista) |
| n_size=int(len(lista)/4) |
| print("There are {} avialable scripts".format(n_size)) |
| import numpy as np |
| matrix = np.array(lista) |
| shape = (n_size,4) |
| matrix=matrix.reshape(shape) |
| matrix=matrix.tolist() |
| is_manually=False |
| is_automatic=False |
| for lista in matrix: |
| |
| language_code=lista[0] |
| is_generated=lista[1] |
| is_translatable=lista[2] |
| if not is_generated and is_translatable : |
| print("Script found manually generated") |
| is_manually=True |
| language_code_man=language_code |
| if is_generated and is_translatable : |
| print("Script found automatic generated") |
| is_automatic=True |
| language_code_au=language_code |
| |
| if is_manually: |
| |
| print('We extract manually created transcripts') |
| transcript = transcript_list.find_manually_created_transcript([language_code]) |
| |
| elif is_automatic: |
| print('We extract generated transcript') |
| |
| transcript = transcript_list.find_generated_transcript([language_code]) |
| else: |
| print('We try find the transcript') |
| |
| transcript = transcript_list.find_transcript([language_code]) |
|
|
| is_translated = False |
| if is_translatable : |
| for available_trad in transcript_translation_languages[0]: |
| if available_trad['language_code']==desired_language: |
| print("It was found the translation for lang:",desired_language) |
| print('We translate directly the transcript') |
| transcript_translated = transcript.translate(desired_language) |
| transcript_translated=transcript_translated.fetch() |
| translated=clean_transcript(transcript_translated) |
| is_translated = True |
| script_translated = "" |
| if is_translated : |
| script_translated = translated |
|
|
| transcript=transcript.fetch() |
| script = clean_transcript(transcript) |
| |
| return script, script_translated, is_translated |
|
|
| |
| home_dir = os.getcwd() |
| temp_dir=os.path.join(home_dir, "temp") |
| |
| pathlib.Path(temp_dir).mkdir(parents=True, exist_ok=True) |
| os.environ['home_dir'] = home_dir |
| os.environ['temp_dir'] = temp_dir |
|
|
| def video_to_translate(url,initial_language,final_language): |
| print('Checking the url') |
| check =validate_youtube(url) |
| if check is True: return "./demo/tryagain2.mp4" |
|
|
| |
| if initial_language == "English": |
| lang_in='en-US' |
| lang_api='en' |
| elif initial_language == "Italian": |
| lang_in='it-IT' |
| lang_api='it' |
| elif initial_language == "Chinese": |
| lang_in='zh-CN' |
| lang_api='zh' |
| elif initial_language == "Spanish": |
| lang_in='es-MX' |
| lang_api='es' |
| elif initial_language == "Russian": |
| lang_in='ru-RU' |
| lang_api='rus' |
| elif initial_language == "German": |
| lang_in='de-DE' |
| lang_api='de' |
| elif initial_language == "Japanese": |
| lang_in='ja-JP' |
| lang_api='ja' |
| if final_language == "English": |
| lang='en' |
| elif final_language == "Italian": |
| lang='it' |
| elif final_language == "Spanish": |
| lang='es' |
| elif final_language == "Russian": |
| lang='ru' |
| elif final_language == "German": |
| lang='de' |
| elif final_language == "Vietnamese": |
| lang='vi' |
| elif final_language == "Japanese": |
| lang='ja' |
| |
| home_dir= os.getenv('home_dir') |
| print('Initial directory:',home_dir) |
| |
| cleanup() |
| file_obj=download_video(url) |
| print(file_obj) |
| |
| videoclip = VideoFileClip(file_obj) |
| is_traduc=False |
| |
|
|
| text, trans, is_traduc = get_transcript(url,desired_language=lang) |
| print("Transcript Found") |
|
|
| if not is_traduc: |
| print("No Transcript Found") |
| |
| |
| videoclip.audio.write_audiofile("audio.wav",codec='pcm_s16le') |
| |
| r = sr.Recognizer() |
| |
| with sr.AudioFile("audio.wav") as source: |
| |
| audio_data = r.record(source) |
| |
| print("Recognize from ",lang_in) |
| |
| size_wav=getSize("audio.wav") |
| if size_wav > 50000000: |
| print("The wav is too large") |
| audio_chunks=split_audio_wav("audio.wav") |
| text="" |
| for chunk in audio_chunks: |
| print("Converting audio to text",chunk) |
| try: |
| text_chunk= r.recognize_google(audio_data, language = lang_in) |
| except Exception: |
| print("This video cannot be recognized") |
| cleanup() |
| return "./demo/tryagain.mp4" |
| text=text+text_chunk+" " |
| text=str(text) |
| print(type(text)) |
| |
| else: |
| try: |
| text = r.recognize_google(audio_data, language = lang_in) |
| except Exception: |
| print("This video cannot be recognized") |
| cleanup() |
| return "./demo/tryagain.mp4" |
| |
| |
| print("Destination language ",lang) |
|
|
| |
| translator = Translator() |
|
|
|
|
| try: |
| translation = translator.translate(text, dest=lang) |
| except Exception: |
| print("This text cannot be translated") |
| cleanup() |
| return "./demo/tryagain.mp4" |
| |
| |
| trans=translation.text |
|
|
| myobj = gTTS(text=trans, lang=lang, slow=False) |
| myobj.save("audio.wav") |
| |
| audioclip = AudioFileClip("audio.wav") |
| |
| |
| new_audioclip = CompositeAudioClip([audioclip]) |
| videoclip.audio = new_audioclip |
| new_video="video_translated_"+lang+".mp4" |
| |
| |
| os.chdir(home_dir) |
| print('Final directory',os.getcwd()) |
|
|
| videoclip.write_videofile(new_video) |
|
|
| videoclip.close() |
| del file_obj |
|
|
| return new_video |
|
|
| initial_language = gr.Dropdown(choices=["English", "Italian", "Japanese", "Russian", "Spanish", "German"], label="Initial Language") |
| final_language = gr.Dropdown(choices=["Russian", "Italian", "Spanish", "German", "English", "Japanese"], label="Final Language") |
| url = gr.Textbox(label="Enter the YouTube URL below:") |
| gr.Interface( |
| fn=video_to_translate, |
| inputs=[url, initial_language, final_language], |
| outputs="video", |
| title="Video YouTube Translator", |
| description="A simple application that translates YouTube small videos from English, Italian, Japanese, Russian, Spanish, and German to Italian, Spanish, Russian, English, and Japanese. Wait one minute to process.", |
| article="""<div> |
| <p style="text-align: center"> All you need to do is to paste the YouTube link and hit submit, then wait for compiling. After that, click on Play/Pause to listen to the video. The video is saved in an MP4 format. |
| The length video limit is 10 minutes. For more information visit <a href="https://ruslanmv.com/">ruslanmv.com</a>. |
| </p> |
| </div>""", |
| examples=[ |
| ["https://youtu.be/uLVRZE8OAI4?si=LA08t9hUJHLYg8K_", "English", "Spanish"], |
|
|
| ], |
| ).launch() |
|
|