import cuid from 'cuid';
import { APIModel } from 'services/api/embeddingModels/types';
import { identity, removeDuplicates } from 'utils/utils';

import WordEmbeddingAPI from '../../../services/api/wordEmbedding';
import { TokenizedWord, TokenizedWordStatus } from '../store/types';

export const getTokensFromText = async (
  text: string,
  model: APIModel
): Promise<TokenizedWord[]> => {
  if (text?.trim()) {
    const response = await WordEmbeddingAPI.tokenizer({
      text: text.trim(),
      language: model.language
    });

    if (response.status) {
      const tokens = response.data
        .filter(({ token }) => {
          if (token) {
            const trimmedToken = token.trim();
            return trimmedToken ? !['<', 'u', '>', '/', '/u'].includes(trimmedToken) : false;
          } else {
            return true;
          }
        })
        .map<TokenizedWord>(data => ({
          id: cuid(),
          token: data.token,
          word: data.string,
          indexStart: data.index_start,
          indexEnd: data.index_end,
          status: data.is_stop ? 'STOP_WORD' : undefined,
          synonyms: data.synonyms,
          isLoading: false
        }));

      const uniqueWords = removeDuplicates(
        tokens.map(({ token }) => token).filter((token): token is string => !!token)
      );

      const wordsAvailability = (
        await WordEmbeddingAPI.wordsAvailable({
          words: uniqueWords,
          model: model.id
        })
      ).data.reduce<Record<string, boolean>>((acc, { is_available: isAvailable, word }) => {
        acc[word] = isAvailable;
        return acc;
      }, {});

      tokens.forEach(token => {
        if (token.status === undefined) {
          token.status = wordsAvailability[token.token] ? 'AVAILABLE' : 'FAULTY';
          if (token.status === 'AVAILABLE') {
            token.synonyms = token.synonyms.filter(identity);
          } else {
            token.synonyms = [];
          }
        }
      });

      return tokens;
    }
  }
  return [];
};

export type WordsStats = {
  stopWords: number;
  available: number;
  faulty: number;
  total: number;
};

type StatusEntry = {
  status?: TokenizedWordStatus;
};

export const getWordsStats = (statusEntires: StatusEntry[]) => {
  return statusEntires.reduce(
    (counter, { status }) => {
      if (status === 'AVAILABLE') {
        counter.available += 1;
      } else if (status === 'STOP_WORD') {
        counter.stopWords += 1;
      } else if (status === 'FAULTY') {
        counter.faulty += 1;
      }
      counter.total += 1;
      return counter;
    },
    {
      stopWords: 0,
      available: 0,
      faulty: 0,
      total: 0
    }
  );
};
