import { downloadDataFile, backend, apFly } from "System/system";
import store from "System/mainStore";

export const classifySingleSource = async (source) => {
  const result = await apFly("classify/sources", { source });
  if (result.data) {
    if (result.data.result) {
      const split = result.data.result.split(":");
      if (split.length === 2) {
        const prediction = split[1].trim();
        return prediction;
      }
    }
  }
  return null;
};

const condense = (str) => {
  // remove whitespace, uppercase:
  return str.replace(/\s+/g, "").toUpperCase();
};

export const batchClassifySources = () => async (dispatch) => {
  const datatype = "source";

  const norming = store.getState().norming;
  const projects = store.getState().projects;
  const projectid = projects.project._id;
  const { data } = norming["source"];

  const predictions = data.map((item) => [""]);

  let substringSources;
  const result = await apFly("classify/retrieve_substring_sources", {});
  if (result.data) {
    substringSources = result.data.sources;
  }

  const queries = [];
  for (let ii = 0; ii < data.length; ii++) {
    const source = data[ii];
    let hasMatched = false;
    const inputString = source["Source_SB"].trim();
    if (inputString != "") {
      const source_sb = condense(inputString);
      let matches = new Set();
      for (const substringSource of substringSources) {
        if (source_sb.includes(condense(substringSource))) {
          matches.add(substringSource);
        }
      }
      if (matches.size > 0) {
        // the longest string is the best match:
        const match = Array.from(matches).sort(
          (a, b) => b.length - a.length
        )[0];
        predictions[ii][0] = match.trim();
        hasMatched = 9;
      }
    }
    if (inputString == "") {
      predictions[ii][0] = "UNKNOWN";
      hasMatched = 9;
    }

    if (!hasMatched) {
      queries.push({ inputString, index: ii });
    }
  }

  dispatch({
    type: "INIT_MAIN_PROGRESS",
    title: "Classifying Sources",
    total: queries.length,
  });

  for (const queryindex in queries) {
    const query = queries[queryindex];
    console.log("Classifying", query.inputString, queryindex, queries.length);
    dispatch({
      type: "UPDATE_MAIN_PROGRESS",
      current: queryindex,
      status: "Classifying:" + query.inputString,
    });
    const match = await classifySingleSource(query.inputString);
    predictions[query.index][0] = match;
  }

  dispatch({
    type: "CLOSE_MAIN_PROGRESS",
  });

  dispatch({
    type: "SET_PREDICTIONS",
    datatype: "source",
    predictions,
  });

  backend("norming", "update", {
    projectid,
    datatype: "source",
    updateType: "predictions",
    dataString: JSON.stringify(predictions),
  });

  return null;
};
export const batchClassifySources2 = () => async (dispatch) => {
  const datatype = "source";

  const norming = store.getState().norming;
  const projects = store.getState().projects;
  const projectid = projects.project._id;
  const { data } = norming["source"];

  const predictions = data.map((item) => [""]);

  const sources = data.map((item) => item["Source_SB"]);

  const batch_size = 200;
  const numberOfBatches = Math.ceil(sources.length / batch_size);
  console.log("Number of batches:", numberOfBatches);

  dispatch({
    type: "INIT_MAIN_PROGRESS",
    title: "Classifying Sources",
    total: numberOfBatches,
  });

  let cnt = 0;
  for (let batch_number = 0; batch_number < numberOfBatches; batch_number++) {
    dispatch({
      type: "UPDATE_MAIN_PROGRESS",
      current: batch_number,
      status: "Classifying batch:" + batch_number + " of " + numberOfBatches,
    });
    const result = await apFly("classify/sources2", {
      batch_size,
      batch_number: batch_number,
      sources: JSON.stringify(sources),
    });
    if (result.data) {
      result.data.results.forEach((item, index) => {
        predictions[cnt++] = [item.normalized, item.score];
      });
    }
  }
  dispatch({
    type: "SET_PREDICTIONS",
    datatype: "source",
    predictions,
  });
  dispatch({
    type: "CLOSE_MAIN_PROGRESS",
  });

  await backend("norming", "update", {
    projectid,
    datatype: "source",
    updateType: "predictions",
    dataString: JSON.stringify(predictions),
  });

  return null;
};
