import { stemmer } from './stemmer'
import { stopwords } from './stopwords'

const stopStems = stopwords.map(stemmer)

export function tokenize(text: string, minLength = 2): string[] {
  const stemmeredWords = text
    .toLowerCase()
    .replace(/\W/gu, ' ')
    .replace(/\s+/gu, ' ')
    .trim()
    .split(' ')
    // .map(word => stemmer(word))
    .filter(word => word.length >= minLength)

  // Filter out stopStems
  return stemmeredWords // .filter(word => stopStems.indexOf(word) === -1)
}
