KristofferEriksson · June 22, 2025 11:57 · Feb 7, 2024 · Feb 5, 2024 · Feb 5, 2024
diff --git a/useSpeechToText.ts b/useSpeechToText.ts
@@ -58,44 +58,41 @@ export const useSpeechToText = ({
       let finalTranscript = "";
 
       // Iterate through all the current results
-      for (let i = 0; i < event.results.length; i++) {
+      for (let i = lastProcessedIndex; i < event.results.length; i++) {
         const result = event.results[i];
         // If the result is final, append to the final transcript
-        if (result?.isFinal) {
-          finalTranscript += result?.[0]?.transcript + " ";
+        if (result.isFinal) {
+          finalTranscript += result[0].transcript + " ";
+          setLastProcessedIndex(i + 1);
         } else {
           // Otherwise, append to the interim transcript
-          interimTranscript += result?.[0]?.transcript + " ";
+          interimTranscript += result[0].transcript + " ";
         }
       }
 
       // Update the transcript state with a combination of the final and interim results
-      setTranscript(finalTranscript + interimTranscript);
+      setTranscript(transcript + finalTranscript + interimTranscript);
 
       // Invoke callback with the latest transcript
-      if (
-        interimTranscript.trim().length > 0 ||
-        finalTranscript.trim().length > 0
-      ) {
-        onResult && onResult(finalTranscript + interimTranscript);
-      }
+      onResult && onResult(transcript + finalTranscript + interimTranscript);
     },
-    [onResult]
+    [onResult, transcript, lastProcessedIndex],
   );
 
   // start and stop functions using useCallback
   const start = useCallback(() => {
-    if (!recognition) return;
+    if (!recognition || isListening) return;
     setTranscript("");
     setLastProcessedIndex(0);
     setIsListening(true);
     recognition.start();
-  }, [recognition]);
+  }, [recognition, isListening]);
 
   const stop = useCallback(() => {
-    if (!recognition) return;
+    if (!recognition || !isListening) return;
     recognition.stop();
-  }, [recognition]);
+    setIsListening(false);
+  }, [recognition, isListening]);
 
   useEffect(() => {
     if (!recognition) {
@@ -115,7 +112,7 @@ export const useSpeechToText = ({
     };
 
     return () => {
-      recognition.stop();
+      if (isListening) recognition.stop();
     };
   }, [
     lang,
@@ -127,6 +124,7 @@ export const useSpeechToText = ({
     recognition,
     start,
     stop,
+    isListening,
   ]);
 
   return { start, stop, transcript, isListening };

diff --git a/useSpeechToText.ts b/useSpeechToText.ts
@@ -41,7 +41,7 @@ export const useSpeechToText = ({
   maxAlternatives = 1,
   onResult,
   onError,
-}: UseSpeechToTextProps) => {
+}: UseSpeechToTextProps = {}) => {
   const [isListening, setIsListening] = useState(false);
   const [transcript, setTranscript] = useState("");
   const [lastProcessedIndex, setLastProcessedIndex] = useState(0);

diff --git a/useSpeechToText.ts b/useSpeechToText.ts
@@ -0,0 +1,135 @@
+import { useCallback, useEffect, useState } from "react";
+
+// Define custom types for SpeechRecognition and SpeechRecognitionEvent
+interface ISpeechRecognitionEvent extends Event {
+  results: SpeechRecognitionResultList;
+  resultIndex: number;
+}
+
+interface ISpeechRecognition extends EventTarget {
+  lang: string;
+  continuous: boolean;
+  interimResults: boolean;
+  maxAlternatives: number;
+  start: () => void;
+  stop: () => void;
+  onresult: (event: ISpeechRecognitionEvent) => void;
+  onerror: (event: Event) => void;
+  onend: () => void;
+}
+
+declare global {
+  interface Window {
+    SpeechRecognition: new () => ISpeechRecognition;
+    webkitSpeechRecognition: new () => ISpeechRecognition;
+  }
+}
+
+interface UseSpeechToTextProps {
+  lang?: string;
+  continuous?: boolean;
+  interimResults?: boolean;
+  maxAlternatives?: number;
+  onResult?: (result: string) => void;
+  onError?: (error: string) => void;
+}
+
+export const useSpeechToText = ({
+  lang = "en-US",
+  continuous = true,
+  interimResults = true,
+  maxAlternatives = 1,
+  onResult,
+  onError,
+}: UseSpeechToTextProps) => {
+  const [isListening, setIsListening] = useState(false);
+  const [transcript, setTranscript] = useState("");
+  const [lastProcessedIndex, setLastProcessedIndex] = useState(0);
+
+  const recognition: ISpeechRecognition | null =
+    typeof window !== "undefined" &&
+    (window.SpeechRecognition || window.webkitSpeechRecognition)
+      ? new (window.SpeechRecognition || window.webkitSpeechRecognition)()
+      : null;
+
+  const handleResult = useCallback(
+    (event: ISpeechRecognitionEvent) => {
+      let interimTranscript = "";
+      let finalTranscript = "";
+
+      // Iterate through all the current results
+      for (let i = 0; i < event.results.length; i++) {
+        const result = event.results[i];
+        // If the result is final, append to the final transcript
+        if (result?.isFinal) {
+          finalTranscript += result?.[0]?.transcript + " ";
+        } else {
+          // Otherwise, append to the interim transcript
+          interimTranscript += result?.[0]?.transcript + " ";
+        }
+      }
+
+      // Update the transcript state with a combination of the final and interim results
+      setTranscript(finalTranscript + interimTranscript);
+
+      // Invoke callback with the latest transcript
+      if (
+        interimTranscript.trim().length > 0 ||
+        finalTranscript.trim().length > 0
+      ) {
+        onResult && onResult(finalTranscript + interimTranscript);
+      }
+    },
+    [onResult]
+  );
+
+  // start and stop functions using useCallback
+  const start = useCallback(() => {
+    if (!recognition) return;
+    setTranscript("");
+    setLastProcessedIndex(0);
+    setIsListening(true);
+    recognition.start();
+  }, [recognition]);
+
+  const stop = useCallback(() => {
+    if (!recognition) return;
+    recognition.stop();
+  }, [recognition]);
+
+  useEffect(() => {
+    if (!recognition) {
+      onError &&
+        onError("Speech recognition is not supported in this browser.");
+      return;
+    }
+
+    recognition.lang = lang;
+    recognition.continuous = continuous;
+    recognition.interimResults = interimResults;
+    recognition.maxAlternatives = maxAlternatives;
+    recognition.onresult = handleResult;
+    recognition.onerror = (event) => onError && onError(event.type);
+    recognition.onend = () => {
+      setIsListening(false);
+    };
+
+    return () => {
+      recognition.stop();
+    };
+  }, [
+    lang,
+    continuous,
+    interimResults,
+    maxAlternatives,
+    handleResult,
+    onError,
+    recognition,
+    start,
+    stop,
+  ]);
+
+  return { start, stop, transcript, isListening };
+};
+
+export default useSpeechToText;