Tag: speech to text

Python: Generate Transcript of Video File

There’s a speech_recognition module in Python that transcribes an audio file — since ffmpeg can convert a video file to mp3, that means you can also use Python to transcribe a video file.

# requires pocketsphinx from CMU if using sphinx for speech to text recognition
import os
import speech_recognition as sr
import ffmpeg

strFFMPEGBinaryLocation = 'c:/tmp/ffmpeg/bin/ffmpeg.exe'
strCurrentDirectory = os.getcwd()

strInputVideo = "\"Z:/Path To/My Video/file.MP4\""
strOutputFileName = "converted.wav"
# Convert mp4 to wav file
strffmpeg_convert_mp4_to_wav = f'{strFFMPEGBinaryLocation} -i {strInputVideo} {strCurrentDirectory}/{strOutputFileName}'
os.system(strffmpeg_convert_mp4_to_wav)

# Run converted wav file through speech recognizer
r = sr.Recognizer()
audio = sr.AudioFile(f'{strCurrentDirectory}/{strOutputFileName}')

with audio as source:
	#audio = r.record(source, 90)				# Would need API key to process longer audio?
	#text = r.recognize_google(audio)
	audio = r.record(source)
	text = r.recognize_sphinx(audio)
print(text)