export const PRODUCT_STOPWORDS = new Set([
  "bag",
  "bagged",
  "bunch",
  "box",
  "bundle",
  "carton",
  "case",
  "clamshell",
  "container",
  "count",
  "crate",
  "ct",
  "dozen",
  "each",
  "f",
  "fcy",
  "gallon",
  "jar",
  "kg",
  "lb",
  "liter",
  "loaf",
  "pack",
  "packet",
  "piece",
  "pint",
  "pound",
  "pouch",
  "quart",
  "roll",
  "sack",
  "sheet",
  "tote",
  "unit",
  "x/xf",
  "x",
  "xf",
  "fxf",
  "bc",
  "wa",
  "ca",
  "ir",
  "upc",
  "fcy",
  "mx",
  "vf",
]);

export const PRODUCT_NAME_REPLACEMENTS = new Map<string, string>([
  // ["blueberries", "blueberry"], ["raspberries", "raspberry"],  //  could just handle with regex or replace on berries
  ["macintosh", "mcintosh"],
]);

export const processProductName = (word: string): string => {
  // Remove any non-alphabetical characters, return the result in lowercase while preserving whitespace
  let processedName = word
    .replace(/[^a-zA-Z\s]/g, "")
    .replace("  ", " ")
    .toLowerCase();
  for (const [key, value] of PRODUCT_NAME_REPLACEMENTS) {
    processedName = processedName.replace(key, value);
  }
  return processedName;
};

export const removeProductStopwords = (productName: string): string => {
  // Split the product name into words
  const words = productName.split(" ");
  // Filter out the stopwords
  const filteredWords = words.filter((word) => !PRODUCT_STOPWORDS.has(word));
  // Join the words back together
  return filteredWords.join(" ");
};

export const removeTrailingS = (productName: string): string => {
  // Split the product name into words, remove any trailing "s" characters as long as it's not a double "s", e.g. "Hass"
  const words = productName
    .split(" ")
    .map((word) => word.replace(/(?<!s)s$/, ""));
  return words.join(" ");
};
