There’s a speech_recognition module in Python that transcribes an audio file — since ffmpeg can convert a video file to mp3, that means you can also use Python to transcribe a video file.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | # requires pocketsphinx from CMU if using sphinx for speech to text recognition import os import speech_recognition as sr import ffmpeg strFFMPEGBinaryLocation = 'c:/tmp/ffmpeg/bin/ffmpeg.exe' strCurrentDirectory = os.getcwd() strInputVideo = "\"Z:/Path To/My Video/file.MP4\"" strOutputFileName = "converted.wav" # Convert mp4 to wav file strffmpeg_convert_mp4_to_wav = f '{strFFMPEGBinaryLocation} -i {strInputVideo} {strCurrentDirectory}/{strOutputFileName}' os.system(strffmpeg_convert_mp4_to_wav) # Run converted wav file through speech recognizer r = sr.Recognizer() audio = sr.AudioFile(f '{strCurrentDirectory}/{strOutputFileName}' ) with audio as source: #audio = r.record(source, 90) # Would need API key to process longer audio? #text = r.recognize_google(audio) audio = r.record(source) text = r.recognize_sphinx(audio) print (text) |