Skip to content

Instantly share code, notes, and snippets.

@KristofferEriksson
Last active June 22, 2025 11:57
Show Gist options
  • Save KristofferEriksson/d9dba72519c3caaf9de8d4774850b929 to your computer and use it in GitHub Desktop.
Save KristofferEriksson/d9dba72519c3caaf9de8d4774850b929 to your computer and use it in GitHub Desktop.

Revisions

  1. KristofferEriksson revised this gist Feb 7, 2024. 1 changed file with 15 additions and 17 deletions.
    32 changes: 15 additions & 17 deletions useSpeechToText.ts
    Original file line number Diff line number Diff line change
    @@ -58,44 +58,41 @@ export const useSpeechToText = ({
    let finalTranscript = "";

    // Iterate through all the current results
    for (let i = 0; i < event.results.length; i++) {
    for (let i = lastProcessedIndex; i < event.results.length; i++) {
    const result = event.results[i];
    // If the result is final, append to the final transcript
    if (result?.isFinal) {
    finalTranscript += result?.[0]?.transcript + " ";
    if (result.isFinal) {
    finalTranscript += result[0].transcript + " ";
    setLastProcessedIndex(i + 1);
    } else {
    // Otherwise, append to the interim transcript
    interimTranscript += result?.[0]?.transcript + " ";
    interimTranscript += result[0].transcript + " ";
    }
    }

    // Update the transcript state with a combination of the final and interim results
    setTranscript(finalTranscript + interimTranscript);
    setTranscript(transcript + finalTranscript + interimTranscript);

    // Invoke callback with the latest transcript
    if (
    interimTranscript.trim().length > 0 ||
    finalTranscript.trim().length > 0
    ) {
    onResult && onResult(finalTranscript + interimTranscript);
    }
    onResult && onResult(transcript + finalTranscript + interimTranscript);
    },
    [onResult]
    [onResult, transcript, lastProcessedIndex],
    );

    // start and stop functions using useCallback
    const start = useCallback(() => {
    if (!recognition) return;
    if (!recognition || isListening) return;
    setTranscript("");
    setLastProcessedIndex(0);
    setIsListening(true);
    recognition.start();
    }, [recognition]);
    }, [recognition, isListening]);

    const stop = useCallback(() => {
    if (!recognition) return;
    if (!recognition || !isListening) return;
    recognition.stop();
    }, [recognition]);
    setIsListening(false);
    }, [recognition, isListening]);

    useEffect(() => {
    if (!recognition) {
    @@ -115,7 +112,7 @@ export const useSpeechToText = ({
    };

    return () => {
    recognition.stop();
    if (isListening) recognition.stop();
    };
    }, [
    lang,
    @@ -127,6 +124,7 @@ export const useSpeechToText = ({
    recognition,
    start,
    stop,
    isListening,
    ]);

    return { start, stop, transcript, isListening };
  2. KristofferEriksson revised this gist Feb 5, 2024. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion useSpeechToText.ts
    Original file line number Diff line number Diff line change
    @@ -41,7 +41,7 @@ export const useSpeechToText = ({
    maxAlternatives = 1,
    onResult,
    onError,
    }: UseSpeechToTextProps) => {
    }: UseSpeechToTextProps = {}) => {
    const [isListening, setIsListening] = useState(false);
    const [transcript, setTranscript] = useState("");
    const [lastProcessedIndex, setLastProcessedIndex] = useState(0);
  3. KristofferEriksson created this gist Feb 5, 2024.
    135 changes: 135 additions & 0 deletions useSpeechToText.ts
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,135 @@
    import { useCallback, useEffect, useState } from "react";

    // Define custom types for SpeechRecognition and SpeechRecognitionEvent
    interface ISpeechRecognitionEvent extends Event {
    results: SpeechRecognitionResultList;
    resultIndex: number;
    }

    interface ISpeechRecognition extends EventTarget {
    lang: string;
    continuous: boolean;
    interimResults: boolean;
    maxAlternatives: number;
    start: () => void;
    stop: () => void;
    onresult: (event: ISpeechRecognitionEvent) => void;
    onerror: (event: Event) => void;
    onend: () => void;
    }

    declare global {
    interface Window {
    SpeechRecognition: new () => ISpeechRecognition;
    webkitSpeechRecognition: new () => ISpeechRecognition;
    }
    }

    interface UseSpeechToTextProps {
    lang?: string;
    continuous?: boolean;
    interimResults?: boolean;
    maxAlternatives?: number;
    onResult?: (result: string) => void;
    onError?: (error: string) => void;
    }

    export const useSpeechToText = ({
    lang = "en-US",
    continuous = true,
    interimResults = true,
    maxAlternatives = 1,
    onResult,
    onError,
    }: UseSpeechToTextProps) => {
    const [isListening, setIsListening] = useState(false);
    const [transcript, setTranscript] = useState("");
    const [lastProcessedIndex, setLastProcessedIndex] = useState(0);

    const recognition: ISpeechRecognition | null =
    typeof window !== "undefined" &&
    (window.SpeechRecognition || window.webkitSpeechRecognition)
    ? new (window.SpeechRecognition || window.webkitSpeechRecognition)()
    : null;

    const handleResult = useCallback(
    (event: ISpeechRecognitionEvent) => {
    let interimTranscript = "";
    let finalTranscript = "";

    // Iterate through all the current results
    for (let i = 0; i < event.results.length; i++) {
    const result = event.results[i];
    // If the result is final, append to the final transcript
    if (result?.isFinal) {
    finalTranscript += result?.[0]?.transcript + " ";
    } else {
    // Otherwise, append to the interim transcript
    interimTranscript += result?.[0]?.transcript + " ";
    }
    }

    // Update the transcript state with a combination of the final and interim results
    setTranscript(finalTranscript + interimTranscript);

    // Invoke callback with the latest transcript
    if (
    interimTranscript.trim().length > 0 ||
    finalTranscript.trim().length > 0
    ) {
    onResult && onResult(finalTranscript + interimTranscript);
    }
    },
    [onResult]
    );

    // start and stop functions using useCallback
    const start = useCallback(() => {
    if (!recognition) return;
    setTranscript("");
    setLastProcessedIndex(0);
    setIsListening(true);
    recognition.start();
    }, [recognition]);

    const stop = useCallback(() => {
    if (!recognition) return;
    recognition.stop();
    }, [recognition]);

    useEffect(() => {
    if (!recognition) {
    onError &&
    onError("Speech recognition is not supported in this browser.");
    return;
    }

    recognition.lang = lang;
    recognition.continuous = continuous;
    recognition.interimResults = interimResults;
    recognition.maxAlternatives = maxAlternatives;
    recognition.onresult = handleResult;
    recognition.onerror = (event) => onError && onError(event.type);
    recognition.onend = () => {
    setIsListening(false);
    };

    return () => {
    recognition.stop();
    };
    }, [
    lang,
    continuous,
    interimResults,
    maxAlternatives,
    handleResult,
    onError,
    recognition,
    start,
    stop,
    ]);

    return { start, stop, transcript, isListening };
    };

    export default useSpeechToText;