import { useEffect } from "react";
import JSZip from "jszip";

import { createAtom } from "atoms/createAtom";
import {
  CPT_CODES_STATISTICS_KEY,
  ICD10_CM_CODES_STATISTICS_KEY,
  ICD10_WHO_CODES_STATISTICS_KEY,
} from "consts";
import { DoctorSummaryFragment } from "generated/provider";
import { useSyncRef } from "hooks/useSyncRef";
import { LOCALE } from "i18n";
import {
  CptCode,
  CptCodeKnowledgeBaseEntry,
  ICD10Code,
  ICD10KnowledgeBaseEntry,
} from "types";
import { notifier } from "utils/notifier";
import stopWords from "utils/stopWords.json";

import cptKnowledgeBaseUrl from "./cptCodesKnowledgeBase.jsonl?url";
import icd10CM2024KnowledgeBaseUrl from "./icd10CM2024KnowledgeBase.jsonl.zip?url";
import icd10CMKnowledgeBaseUrl from "./icd10CMKnowledgeBase.jsonl.zip?url";
import icd10WHOKnowledgeBaseUrl from "./icd10WHOKnowledgeBase.jsonl.zip?url";

const DEFAULT_MAX_NUM_ITEMS = 20;
// Optimization. During search, before re-ranking,
// we will early-stop filtering to that number of results
// (search time went down from ~15ms to < 1ms on a M1 MacBook pro
// with this optimization)
const MAX_NUM_ITEMS_AFTER_FILTERING = 300;

const normalize = (text: string) =>
  text
    .normalize("NFD")
    .replaceAll(/[\u0300-\u036f]/gu, "")
    .toLowerCase();

const searchKnowledgeBase = (
  knowledgeBase:
    | {
        entries: Map<string, ICD10KnowledgeBaseEntry>;
        statistics: Icd10CodesStatistics;
      }
    | {
        entries: Map<string, CptCodeKnowledgeBaseEntry>;
        statistics: CptCodesStatistics;
      },
  query: string,
  maxNumItems?: number,
  language?: LOCALE,
  pickedCodes: (ICD10Code | CptCode)[] = [],
) => {
  const cap = maxNumItems ?? DEFAULT_MAX_NUM_ITEMS;
  const normalizedQuery = normalize(query);
  const tokens = normalizedQuery
    .split(" ")
    .filter(
      (token) =>
        token === normalizedQuery ||
        (!stopWords.fr.includes(token) &&
          !stopWords.en.includes(token) &&
          token.isNotEmpty()),
    );
  if (tokens.isEmpty()) return [];

  // Filter
  const filteredKnowledge: (
    | ICD10KnowledgeBaseEntry
    | CptCodeKnowledgeBaseEntry
  )[] = [];
  const toSkip = new Set<string>(pickedCodes);
  for (const code of Object.keys(knowledgeBase.statistics)) {
    const item = knowledgeBase.entries.get(code.toLowerCase());
    if (
      item &&
      !toSkip.has(item.code) &&
      tokens.every((token) =>
        "search" in item
          ? item.search.includes(token)
          : item.description.includes(token) || item.code.includes(query),
      )
    ) {
      filteredKnowledge.push(item);
    }
    toSkip.add(code);
    if (filteredKnowledge.length >= MAX_NUM_ITEMS_AFTER_FILTERING) break;
  }
  for (const item of knowledgeBase.entries.values()) {
    if (filteredKnowledge.length >= MAX_NUM_ITEMS_AFTER_FILTERING) break;
    if (
      !toSkip.has(item.code) &&
      tokens.every((token) =>
        "search" in item
          ? item.search.includes(token)
          : item.description.includes(token) || item.code.includes(query),
      )
    ) {
      filteredKnowledge.push(item);
    }
    // Note: values() keep the insertion order
    // (https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/values)
    // and we made sure to insert items by ascending size of icd10Code,
    // so in case we reach this limit, the selected items make sense.
  }

  // Rerank
  return filteredKnowledge
    .sortAsc((item) => {
      if (tokens.includes(item.code.toLowerCase())) return 0; // top priority if exact match with code
      const count = knowledgeBase.statistics[item.code];
      if (
        language &&
        query.length > 5 &&
        "en" in item &&
        item[language] &&
        normalize(item[language]).includes(normalizedQuery)
      ) {
        // Exact match for "Vitamin D" type of query, sorted by descending count (score in ]0, 1])
        return 1 / (1 + (count ?? 0));
      }
      // Make seen items appear strictly before unseen items (score in ]1, 1.5])
      if (count) return 1 + 1 / (1 + count);
      return (
        item.code.length * 5 +
        ("en" in item ? item.en.length : item.description.length)
      ); // arbitrary (score > 1.5)
    })
    .slice(0, cap);
};

type CptCodesStatistics = Partial<Record<CptCode, number>>;
type Icd10CodesStatistics = Partial<Record<ICD10Code, number>>;

export const useMedicalCodes = createAtom(
  {
    icd10CM: {
      icd10CMKnowledgeBase: new Map<string, ICD10KnowledgeBaseEntry>(),
      loading: true,
    },
    icd10WHO: {
      icd10WHOKnowledgeBase: new Map<string, ICD10KnowledgeBaseEntry>(),
      loading: true,
    },
    cpt: {
      cptKnowledgeBase: new Map<string, CptCodeKnowledgeBaseEntry>(),
      loading: true,
    },
  },

  ({ set, get }) => ({
    setICD10CMKnowledgeBase: (items: ICD10KnowledgeBaseEntry[]) => {
      set({
        icd10CM: {
          icd10CMKnowledgeBase: new Map(
            items
              .sortAsc((it) => it.code.length)
              .map((it) => [it.code.toLowerCase(), it]),
          ),
          loading: false,
        },
      });
    },
    getICD10CMByCode: (code: string) =>
      get().icd10CM.icd10CMKnowledgeBase.get(code.toLowerCase()),
    setICD10WHOKnowledgeBase: (items: ICD10KnowledgeBaseEntry[]) => {
      set({
        icd10WHO: {
          icd10WHOKnowledgeBase: new Map(
            items
              .sortAsc((it) => it.code.length)
              .map((it) => [it.code.toLowerCase(), it]),
          ),
          loading: false,
        },
      });
    },
    getICD10WHOByCode: (code: string) =>
      get().icd10WHO.icd10WHOKnowledgeBase.get(code.toLowerCase()),
    setCptKnowledgeBase: (items: CptCodeKnowledgeBaseEntry[]) => {
      set({
        cpt: {
          cptKnowledgeBase: new Map(
            items
              .sortAsc((it) => it.code.length)
              .map((it) => [it.code.toLowerCase(), it]),
          ),
          loading: false,
        },
      });
    },
    getCptByCode: (code: string) =>
      get().cpt.cptKnowledgeBase.get(code.toLowerCase()),
    searchICD10CM: ({
      query,
      maxNumItems,
      language,
      icd10CMCodeStatistics,
      pickedCodes,
    }: {
      query: string;
      maxNumItems?: number;
      language: LOCALE;
      icd10CMCodeStatistics: Icd10CodesStatistics;
      pickedCodes: ICD10Code[];
    }) =>
      searchKnowledgeBase(
        {
          entries: get().icd10CM.icd10CMKnowledgeBase,
          statistics: icd10CMCodeStatistics,
        },
        query,
        maxNumItems,
        language,
        pickedCodes,
      ),
    searchICD10WHO: ({
      query,
      maxNumItems,
      pickedCodes,
      icd10WHOCodeStatistics,
    }: {
      query: string;
      maxNumItems?: number;
      pickedCodes: ICD10Code[];
      icd10WHOCodeStatistics: Icd10CodesStatistics;
    }) =>
      searchKnowledgeBase(
        {
          entries: get().icd10WHO.icd10WHOKnowledgeBase,
          statistics: icd10WHOCodeStatistics,
        },
        query,
        maxNumItems,
        "fr",
        pickedCodes,
      ),
    searchCpt: ({
      query,
      maxNumItems,
      cptCodeStatistics,
      pickedCodes,
    }: {
      query: string;
      maxNumItems?: number;
      cptCodeStatistics: CptCodesStatistics;
      pickedCodes: CptCode[];
    }) =>
      searchKnowledgeBase(
        {
          entries: get().cpt.cptKnowledgeBase,
          statistics: cptCodeStatistics,
        },
        query,
        maxNumItems,
        undefined,
        pickedCodes,
      ),
  }),
);

export const useFetchICD10CMKnowledgeBase = (isNoteNormalization: boolean) => {
  const { setICD10CMKnowledgeBase } = useMedicalCodes();
  const setICD10CMKnowledgeBaseRef = useSyncRef(setICD10CMKnowledgeBase);

  // The icd10CMKnowledgeBase is a zip archive, in which one can find
  // an actual file "icd10CMKnowledgeBase.jsonl"
  useEffect(() => {
    fetch(
      isNoteNormalization
        ? icd10CM2024KnowledgeBaseUrl
        : icd10CMKnowledgeBaseUrl,
    )
      .then((resp) => resp.blob())
      .then((resp) => {
        const zip = new JSZip();
        return zip.loadAsync(resp);
      })
      .then((zip) =>
        zip.file(
          isNoteNormalization
            ? "icd10CM2024KnowledgeBase.jsonl"
            : "icd10CMKnowledgeBase.jsonl",
        ),
      )
      .then(async (resp) => {
        if (!resp) return;
        const content = await resp.async("string");
        const allRefs = content
          .split("\n")
          .flatMap((it) => it.split("\r"))
          .filter((it) => it.isNotBlank())
          .map((it) => JSON.parse(it) as ICD10KnowledgeBaseEntry);
        setICD10CMKnowledgeBaseRef.current(allRefs);
      })
      .catch((error: Error) => {
        notifier.error({ sentry: { exception: error } });
      });
  }, [setICD10CMKnowledgeBaseRef, isNoteNormalization]);
};

export const useFetchICD10WHOKnowledgeBase = () => {
  const { setICD10WHOKnowledgeBase } = useMedicalCodes();
  const setICD10WHOKnowledgeBaseRef = useSyncRef(setICD10WHOKnowledgeBase);

  useEffect(() => {
    fetch(icd10WHOKnowledgeBaseUrl)
      .then((resp) => resp.blob())
      .then((resp) => {
        const zip = new JSZip();
        return zip.loadAsync(resp);
      })
      .then((zip) => zip.file("icd10WHOKnowledgeBase.jsonl"))
      .then(async (resp) => {
        if (!resp) return;
        const content = await resp.async("string");
        const allRefs = content
          .split("\n")
          .flatMap((it) => it.split("\r"))
          .filter((it) => it.isNotBlank())
          .map((it) => JSON.parse(it) as ICD10KnowledgeBaseEntry);
        setICD10WHOKnowledgeBaseRef.current(allRefs);
      })
      .catch((error: Error) => {
        notifier.error({ sentry: { exception: error } });
      });
  }, [setICD10WHOKnowledgeBaseRef]);
};

export const useFetchCptKnowledgeBase = () => {
  const { setCptKnowledgeBase } = useMedicalCodes();
  const setCptKnowledgeBaseRef = useSyncRef(setCptKnowledgeBase);

  useEffect(() => {
    fetch(cptKnowledgeBaseUrl)
      .then((resp) => resp.text())
      .then((resp) => {
        const allRefs = resp
          .split("\n")
          .flatMap((it) => it.split("\r"))
          .filter((it) => it !== "")
          .map((it) => JSON.parse(it) as CptCodeKnowledgeBaseEntry)
          .filter((it) => it.description !== "");
        setCptKnowledgeBaseRef.current(allRefs);
      });
  }, [setCptKnowledgeBaseRef]);
};

export const getIcd10CMCodeStatistics = (doctor: DoctorSummaryFragment) =>
  JSON.parse(
    storage.getItem(`${ICD10_CM_CODES_STATISTICS_KEY}:${doctor.uuid}`) ?? "{}",
  ) as Icd10CodesStatistics;

export const getIcd10WHOCodeStatistics = (doctor: DoctorSummaryFragment) =>
  JSON.parse(
    storage.getItem(`${ICD10_WHO_CODES_STATISTICS_KEY}:${doctor.uuid}`) ?? "{}",
  ) as Icd10CodesStatistics;

export const getCptCodeStatistics = (doctor: DoctorSummaryFragment) =>
  JSON.parse(
    storage.getItem(`${CPT_CODES_STATISTICS_KEY}:${doctor.uuid}`) ?? "{}",
  ) as CptCodesStatistics;

export const consumeIcd10CMCode = (
  code: ICD10Code,
  doctor: DoctorSummaryFragment,
) => {
  const icd10CMCodeStatistics = getIcd10CMCodeStatistics(doctor);
  icd10CMCodeStatistics[code] = (icd10CMCodeStatistics[code] ?? 0) + 1;
  storage.setItem(
    `${ICD10_CM_CODES_STATISTICS_KEY}:${doctor.uuid}`,
    JSON.stringify(icd10CMCodeStatistics),
  );
};

export const consumeCptCode = (
  code: CptCode,
  doctor: DoctorSummaryFragment,
) => {
  const cptCodeStatistics = getCptCodeStatistics(doctor);
  cptCodeStatistics[code] = (cptCodeStatistics[code] ?? 0) + 1;
  storage.setItem(
    `${CPT_CODES_STATISTICS_KEY}:${doctor.uuid}`,
    JSON.stringify(cptCodeStatistics),
  );
};

export const consumeIcd10WHOCode = (
  code: ICD10Code,
  doctor: DoctorSummaryFragment,
) => {
  const icd10WHOCodeStatistics = getIcd10WHOCodeStatistics(doctor);
  icd10WHOCodeStatistics[code] = (icd10WHOCodeStatistics[code] ?? 0) + 1;
  storage.setItem(
    `${ICD10_WHO_CODES_STATISTICS_KEY}:${doctor.uuid}`,
    JSON.stringify(icd10WHOCodeStatistics),
  );
};
