import { useEffect, useRef } from "react";

import { SOCKET_ENDPOINTS } from "api/endpoints";
import { webSocketForEndpointOrThrow } from "api/socket-client";
import { RequestContext } from "api/types";
import { useGetAuthenticatedRequestContextOrThrow } from "api/utils";
import { notifier } from "utils/notifier";

import { useSyncRef } from "./useSyncRef";

const rawPCM16WorkerSource = "/worklets/audioDecompressionProcessor.js";
const rawPCM16WorkerName = "raw-pcm-16-worker";
const sampleRate = 32000;
const sampleSize = 16;
const channelCount = 1;

export const navigatorSupportsVoiceInput = () =>
  /chrom/iu.test(navigator.userAgent) || /safari/iu.test(navigator.userAgent);

export type SpeechStreamLanguage = "fr-FR" | "en-GB" | "en-US";

export const useVoiceInput = ({
  transcribe,
  language,
  onTranscriptionError,
  onTranscriptionStart,
  onTranscriptionEnd,
  onChange,
}: {
  transcribe: boolean;
  language?: string;
  onTranscriptionError?: () => void;
  onTranscriptionStart?: () => void;
  onTranscriptionEnd?: () => void;
  onChange?: (transcript: TranscriptionResult[]) => void;
}) => {
  // Allows the callbacks to change in the middle of transcription without
  // needing to destroy and re-create the pipeline.
  const onTranscriptionErrorRef = useSyncRef(onTranscriptionError);
  const onTranscriptionStartRef = useSyncRef(onTranscriptionStart);
  const onTranscriptionEndRef = useSyncRef(onTranscriptionEnd);
  const onChangeRef = useSyncRef(onChange);

  const getAuthenticatedRequestContextOrThrow =
    useGetAuthenticatedRequestContextOrThrow(
      SOCKET_ENDPOINTS.PROVIDER_SPEECH_TO_TEXT.authenticationKind,
    );

  // Results of last run. This is used to maintain a cumulated transcript
  // even when destroying and re-creating the pipeline during transcription,
  const cumulatedResultsRef = useRef<TranscriptionResult[]>([]);
  const isNewRun = useRef(true);

  useEffect(() => {
    if (!transcribe) {
      cumulatedResultsRef.current = [];
      isNewRun.current = true;
      onTranscriptionEndRef.current?.();
      return;
    }

    const { stopTranscription } = startTranscription(
      language ?? "fr-FR",
      cumulatedResultsRef.current,
      getAuthenticatedRequestContextOrThrow,
      // In case we are starting a new run immediately (for instance, if the
      // language hot swaps), we don't want the onTranscriptionStart to fire
      isNewRun.current ? () => onTranscriptionStartRef.current?.() : () => null,
      () => onTranscriptionErrorRef.current?.(),
      (results: TranscriptionResult[]) => onChangeRef.current?.(results),
    );
    isNewRun.current = false;
    return () => {
      cumulatedResultsRef.current = stopTranscription();
    };
  }, [
    transcribe,
    onTranscriptionErrorRef,
    onChangeRef,
    language,
    onTranscriptionStartRef,
    onTranscriptionEndRef,
    getAuthenticatedRequestContextOrThrow,
  ]);
};

const startTranscription = (
  language: string,
  initialCumulatedResults: TranscriptionResult[],
  getAuthenticatedRequestContextOrThrow: () => RequestContext<"AUTHENTICATED_AS_DOCTOR">,
  onTranscriptionStart: () => void,
  onTranscriptionError: () => void,
  onChange: (results: TranscriptionResult[]) => void,
) => {
  let websocket: WebSocket | undefined = undefined;
  let audioPipeline: AudioPipeline | undefined = undefined;
  let isClosed = false;
  let audioBuffer: ArrayBuffer[] = [];
  let cumulatedResults: TranscriptionResult[] = initialCumulatedResults;

  const cancelAll = () => {
    isClosed = true;
    if (websocket) {
      // Make sure the callbacks onTranscriptionError and
      // onChange will not be called later.
      websocket.onclose = () => null;
      websocket.onmessage = () => null;
      websocket.close();
    }
    audioPipeline?.close();
  };

  const stopTranscription = () => {
    cancelAll();
    // Right before returning result, all non-final results are switched to final
    cumulatedResults.forEach((it) => {
      it.isFinal = true;
    });
    return cumulatedResults;
  };

  webSocketForEndpointOrThrow(
    SOCKET_ENDPOINTS.PROVIDER_SPEECH_TO_TEXT,
    getAuthenticatedRequestContextOrThrow(),
    "speech2text-subprotocol",
  )
    .then((newWebsocket) => {
      websocket = newWebsocket;
      if (isClosed) stopTranscription();

      newWebsocket.onopen = () => {
        newWebsocket.send(
          JSON.stringify({
            sampleRate,
            language,
          }),
        );
        audioBuffer.forEach((frame) => {
          newWebsocket.send(frame);
        });
      };

      newWebsocket.onmessage = (message) => {
        const newResult = JSON.parse(message.data) as TranscriptionResult;
        cumulatedResults = updateCumulatedTranscript(
          cumulatedResults,
          newResult,
        );
        onChange(cumulatedResults);
      };

      newWebsocket.onerror = () => {
        newWebsocket.close();
      };

      newWebsocket.onclose = () => {
        cancelAll();
        onTranscriptionError();
      };
    })
    .catch((exception) => {
      cancelAll();
      onTranscriptionError();
      notifier.error({ sentry: { exception } });
    });

  createAudioPipeline((message) => {
    if (websocket?.readyState === WebSocket.OPEN) {
      websocket.send(message.data);
    } else {
      audioBuffer = [...audioBuffer, message.data];
    }
  })
    .then((pipeline) => {
      audioPipeline = pipeline;
      if (isClosed) {
        cancelAll();
      } else {
        onTranscriptionStart();
        audioPipeline = pipeline;
      }
    })
    .catch((exception) => {
      cancelAll();
      onTranscriptionError();
      notifier.error({ sentry: { exception } });
    });

  return { stopTranscription };
};

type AudioPipeline = {
  close: () => void;
};

const updateCumulatedTranscript = (
  cumulatedResult: TranscriptionResult[],
  newResult: TranscriptionResult,
) => [...cumulatedResult.filter((it) => it.isFinal), newResult];

const createAudioPipeline = async (
  onMessage: (message: MessageEvent) => void,
) => {
  let audioContext: AudioContext | undefined = undefined;
  let pcmWorker: AudioWorkletNode | undefined = undefined;
  let mediaSource: MediaStreamAudioSourceNode | undefined = undefined;
  let hasAlreadyBeenClosed = false;

  const close = () => {
    if (hasAlreadyBeenClosed) return;
    hasAlreadyBeenClosed = true;
    audioContext?.close();
    pcmWorker?.port.close();
    pcmWorker?.disconnect();
    mediaSource?.mediaStream.getTracks().forEach((track) => track.stop());
    mediaSource?.disconnect();
  };

  try {
    audioContext = new AudioContext({
      sampleRate,
    });

    await audioContext.audioWorklet.addModule(rawPCM16WorkerSource);
    pcmWorker = new AudioWorkletNode(audioContext, rawPCM16WorkerName, {
      outputChannelCount: [1],
    });

    const stream = await navigator.mediaDevices.getUserMedia({
      audio: {
        deviceId: "default",
        sampleRate,
        sampleSize,
        channelCount,
      },
      video: false,
    });
    mediaSource = audioContext.createMediaStreamSource(stream);
    mediaSource.connect(pcmWorker);
    pcmWorker.port.onmessage = onMessage;
    pcmWorker.port.start();
  } catch (e: unknown) {
    close();
    throw e;
  }
  return { close };
};

export type TranscriptionResult = {
  text: string;
  isFinal: boolean;
};
