lmmx · October 18, 2025 02:52
diff --git a/CONDA_SETUP.md b/CONDA_SETUP.md
diff --git a/stream_transcribe.sh b/stream_transcribe.sh
 #!/bin/bash

 # set up recording parameters
 sample_rate=44100 # audio sample rate
 tempfile=$PWD/arec_chunk.tmp.wav
 destfile=$PWD/arec_chunk.wav
 duration=100

 # start recording loop
 while true; do
  # record audio in the background and save to a temporary file
  arecord -d $duration -r $sample_rate -c 1 -t wav $tempfile 2> /dev/null &
  # capture the process ID of the arecord run
  arec_pid=$!
  # echo "Recorded PID $arec_pid"
  # wait for a specified duration or for user input to kill the process
  while true; do
    read -d '' -t $duration -n 1 # wait for 10 seconds or a single spacebar press
    echo -en "\r"
    if [[ $REPLY == ' ' ]]; then
      # echo "Stop recording"
      break # Stop recording
    else
      # echo "Extend recording"
      : # Extend recording time
    fi
  done
  # echo "(Program exitted read loop)"
  if [[ -z $REPLY ]]; then
    # echo "Recording timed out after $duration seconds"
    : # Timeout reached
  fi
  # echo "Now killing PID $arec_pid"
  kill $arec_pid # send signal to kill arecord process
  mv $tempfile $destfile # Move the full WAV to read it while a new arecord process begins
  
  # run the transcription script in the background, reading from the temporary file
  pushd ~/dev/faster-whisper/ > /dev/null
  python transcribe_arec_chunk.py &
  popd > /dev/null
 done
diff --git a/transcribe.py b/transcribe.py
 from pathlib import Path

 from faster_whisper import WhisperModel

 # Run on GPU with FP16
 # model_path = "whisper-large-v2-ct2/"
 # model = WhisperModel(model_path, device="cuda", compute_type="float16")

 # or run on GPU with INT8
 model_path = "whisper-large-v2-ct2-int8/"
 model = WhisperModel(model_path, device="cuda", compute_type="int8_float16")
 # or run on CPU with INT8
 # model = WhisperModel(model_path, device="cpu", compute_type="int8")

 audio_file = Path.home() / "dev" / "whisper.cpp" / "samples" / "jfk.wav"
 segments, info = model.transcribe(str(audio_file), beam_size=5)

 print(
    "Detected language '%s' with probability %f"
    % (info.language, info.language_probability)
 )

 for segment in segments:
    print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
diff --git a/transcribe_arec_chunk.py b/transcribe_arec_chunk.py
 from pathlib import Path

 from faster_whisper import WhisperModel

 # run on GPU with INT8
 model_path = "whisper-large-v2-ct2-int8/"
 model = WhisperModel(model_path, device="cuda", compute_type="int8_float16")

 audio_file = (
    Path.home() / "dev" / "testing" / "audio" / "fasterwhisper" / "arec_chunk.wav"
 )
 segments, info = model.transcribe(str(audio_file), language="en", beam_size=5)

 for segment in segments:
    print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
	#!/bin/bash

	# set up recording parameters
	sample_rate=44100 # audio sample rate
	tempfile=$PWD/arec_chunk.tmp.wav
	destfile=$PWD/arec_chunk.wav
	duration=100

	# start recording loop
	while true; do
	# record audio in the background and save to a temporary file
	arecord -d $duration -r $sample_rate -c 1 -t wav $tempfile 2> /dev/null &
	# capture the process ID of the arecord run
	arec_pid=$!
	# echo "Recorded PID $arec_pid"
	# wait for a specified duration or for user input to kill the process
	while true; do
	read -d '' -t $duration -n 1 # wait for 10 seconds or a single spacebar press
	echo -en "\r"
	if [[ $REPLY == ' ' ]]; then
	# echo "Stop recording"
	break # Stop recording
	else
	# echo "Extend recording"
	: # Extend recording time
	fi
	done
	# echo "(Program exitted read loop)"
	if [[ -z $REPLY ]]; then
	# echo "Recording timed out after $duration seconds"
	: # Timeout reached
	fi
	# echo "Now killing PID $arec_pid"
	kill $arec_pid # send signal to kill arecord process
	mv $tempfile $destfile # Move the full WAV to read it while a new arecord process begins

	# run the transcription script in the background, reading from the temporary file
	pushd ~/dev/faster-whisper/ > /dev/null
	python transcribe_arec_chunk.py &
	popd > /dev/null
	done
	from pathlib import Path

	from faster_whisper import WhisperModel

	# Run on GPU with FP16
	# model_path = "whisper-large-v2-ct2/"
	# model = WhisperModel(model_path, device="cuda", compute_type="float16")

	# or run on GPU with INT8
	model_path = "whisper-large-v2-ct2-int8/"
	model = WhisperModel(model_path, device="cuda", compute_type="int8_float16")
	# or run on CPU with INT8
	# model = WhisperModel(model_path, device="cpu", compute_type="int8")

	audio_file = Path.home() / "dev" / "whisper.cpp" / "samples" / "jfk.wav"
	segments, info = model.transcribe(str(audio_file), beam_size=5)

	print(
	"Detected language '%s' with probability %f"
	% (info.language, info.language_probability)
	)

	for segment in segments:
	print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
	from pathlib import Path

	from faster_whisper import WhisperModel

	# run on GPU with INT8
	model_path = "whisper-large-v2-ct2-int8/"
	model = WhisperModel(model_path, device="cuda", compute_type="int8_float16")

	audio_file = (
	Path.home() / "dev" / "testing" / "audio" / "fasterwhisper" / "arec_chunk.wav"
	)
	segments, info = model.transcribe(str(audio_file), language="en", beam_size=5)

	for segment in segments:
	print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))