import type { MetricCategory } from "./metricsHelper";

/**
 * 
 * @param metric Logic:
 if a metric is positive indicator, 
   if delta > 0, 
       p-value <= 0.001, it should be dark green.
       p-value <= 0.05, it should be light green.
   if delta < 0, 
       p-value <= 0.001, it should be dark red.
       p-value <= 0.05, it should be light red.
 if a metric is not positive indicator, 
   if delta < 0, 
       p-value <= 0.001, it should be dark green.
       p-value <= 0.05, it should be light green.
   if delta > 0, 
       p-value <= 0.001, it should be dark red.
       p-value <= 0.05, it should be light red.
 * @returns 
 */
export const checkMetricIsPositiveIndicator = (metric: string): number => {
  if (
    metric.startsWith("citedcg_num_bad_cites") ||
    metric.startsWith("citedcg_all_bad_cites") ||
    metric.startsWith("citedcg_webwork_num_bad_cites") ||
    metric.startsWith("citedcg_webwork_all_bad_cites")
  ) {
    return -1;
  } else {
    switch (metric) {
      case "sydney_e2e_latency":
      case "p75_latency":
      case "avg_latency":
      case "p99_latency":
      case "p90_latency":
      case "iterations_count":
      case "apology_rate":
      case "lang_mismatch":
      case "NDCG_LLM_labeler_failure_rate":
      case "perc_llm_failed_annotation":
      case "NRR_Turn":
      case "NRR_Turn_3S":
      case "NRR_Tool_3S_mean":
      case "no_citation_nor_annotation_rate":
      case "failure_reason_Other_http_error":
      case "failure_reason_Sydney_call_3S_plugin_failure":
      case "has_3S_partial_failures":
        return -1;

      default:
        return 1;
    }
  }
};

// Use getMetricCategory(metrics.lowerCase()) to avoid case sensitive issues, please use all lower case for the metrics
export const getMetricCategory = (metric: string): MetricCategory => {
  const lowerMetric = metric.toLowerCase();
  if (lowerMetric.startsWith("entity_annotations_")) {
    return {
      category: "Entity Annotations",
      categoryIndex: 15,
      stage: "Alpha",
    };
  }

  if (
    lowerMetric.startsWith("entitytype_") ||
    lowerMetric.startsWith("has_connector_")
  ) {
    return {
      category: "EntityType Breakdown",
      categoryIndex: 16,
      stage: "Alpha",
    };
  }

  if (
    lowerMetric.includes("_rate") &&
    (lowerMetric.startsWith("plugin_") ||
      lowerMetric.startsWith("entity_type_"))
  ) {
    return {
      category: "Citations/Annotations",
      categoryIndex: 17,
      stage: "Beta",
    };
  }
  if (metric.match(/^search_enterprise_[\S]+_[\S]+@01$/)) {
    return {
      category: "NDCG@1",
      categoryIndex: 10,
      stage: "Production",
    };
  }
  if (metric.match(/^search_enterprise_[\S]+_[\S]+@03$/)) {
    return {
      category: "NDCG@3",
      categoryIndex: 11,
      stage: "Production",
    };
  }
  if (metric.match(/^search_enterprise_[\S]+_[\S]+@10$/)) {
    return {
      category: "NDCG@10",
      categoryIndex: 12,
      stage: "Production",
    };
  }
  if (metric.match(/^ndcg@(\d+)$/i)) {
    return {
      category: "NDCG",
      categoryIndex: 23,
      stage: "Production",
    };
  }

  if (lowerMetric.startsWith("citedcg_webwork_")) {
    return {
      category: "CiteDCG_WebWork",
      categoryIndex: 21,
      stage: "Beta",
    };
  }

  if (lowerMetric.startsWith("citedcg_")) {
    return {
      category: "CiteDCG",
      categoryIndex: 6,
      stage: "Beta",
    };
  }
  if (lowerMetric.startsWith("lmc_metric_")) {
    return {
      category: "LMC-based Metrics",
      categoryIndex: 19,
      stage: "Beta",
    };
  }
  if (
    (lowerMetric.includes("leo") ||
      lowerMetric.includes("lmchecklist") ||
      lowerMetric.includes("tcr")) &&
    lowerMetric.endsWith("_failure")
  ) {
    return {
      category: "Fundamental Metrics",
      categoryIndex: 2,
      stage: "Production",
    };
  }

  if (
    lowerMetric.startsWith("changerate") ||
    lowerMetric.startsWith("domain_")
  ) {
    return {
      category: "Impact Metrics",
      categoryIndex: 24,
      stage: "Production",
    };
  }
  if (lowerMetric.startsWith("gap") || lowerMetric.startsWith("vgap")) {
    return {
      category: "Precision Metrics",
      categoryIndex: 25,
      stage: "Production",
    };
  }

  switch (lowerMetric) {
    case "groundleo_score":
    case "groundleo_claimbreak":
    case "groundleo_claimbreakv1_1":
    case "stewieleo_relevance":
    case "stewieleo_engagement":
    case "stewieleo_detail":
    case "stewieleo_clarity":
    case "stewieleo_score":
    case "pileo_score":
    case "groundleo":
    case "stewieleo":
    case "pileo":
    case "contextleo_score":
    case "scleo_score":
    case "ruleleo_score":
    case "opgsummleo_score":
    case "voiceleo_score":
      return {
        category: "Natural Language Metrics",
        categoryIndex: 0,
        stage: "Production",
      };

    case "ee-success":
    case "nrr_turn":
    case "nrr_turn_3s":
    case "nrr_tool_3s_mean":
      return {
        category: "Search Quality",
        categoryIndex: 1,
        stage: "Production",
      };

    case "sydney_e2e_latency":
    case "avg_availability":
    case "p75_latency":
    case "avg_latency":
    case "p99_latency":
    case "p90_latency":
    case "num_utterances":
    case "perc_llm_failed_annotation":
    case "num_annotations":
    case "num_conversation_id":
    case "ndcg_llm_labeler_failure_rate":
    case "sydney_reliability":
    case "3s_reliability":
    case "e2e_reliability":
    case "comet_reliability":
      return {
        category: "Fundamental Metrics",
        categoryIndex: 2,
        stage: "Production",
      };

    case "stewieleosbs_relevance":
    case "stewieleosbs_engagement":
    case "stewieleosbs_detail":
    case "stewieleosbs_clarity":
    case "stewieleosbs_perceived_intelligence":
    case "stewieleosbs_score":
    case "stewieleosbs":
    case "sbsleo_relevance":
    case "sbsleo_engagement":
    case "sbsleo_detail":
    case "sbsleo_clarity":
    case "sbsleo_perceived_intelligence":
    case "sbsleo_score":
    case "sbsleo":
    case "sbsleov2":
    case "sbsleorecap_score":
    case "sbsleorecap_Exceptional_Quality_score":
    case "sbsleorecap_Effortless_Quality_score":
    case "sbsleorecap_Completeness_Quality_score":
    case "sbsleorecap_Relevance_Quality_score":
    case "sbsleorecap_Difference_score":
    case "sbsleorecap_Exceptional_Difference_score":
    case "sbsleorecap_Effortless_Difference_score":
    case "sbsleorecap_Completeness_Difference_score":
    case "sbsleorecap_Relevance_Difference_score":
      return {
        category: "SBS Metric",
        categoryIndex: 3,
        stage: "Alpha",
      };

    case "reasoningleo":
    case "lmchecklist_critical":
    case "lmchecklist_expected":
    case "tcr_score":
    case "lmchecklist":
    case "tcr":
      return {
        category: "NL Metric",
        categoryIndex: 4,
        stage: "Beta",
      };

    case "scleo_sensible_score":
    case "scleo_group_score":
    case "screlevance_score":
    case "screlevance_scconciseness_score":
      return {
        category: "SC Metric",
        categoryIndex: 5,
        stage: "Alpha",
      };

    case "citedcg_all_bad_cites":
    case "citedcg_has_good_cite":
    case "citedcg_all_bad_cites_v2":
    case "citedcg_has_good_cite_v2":
    case "citedcg_avg_label":
    case "citedcg_sum_label":
    case "citedcg_num_bad":
    case "citedcg_num_good":
    case "citedcg_num_enterprise_cites":
    case "citedcg_gcc":
    case "citedcg_one_centric":
    case "citedcg_precision":
    case "citedcg_recall":
    case "citedcg_f1":
    case "citedcg_ncg@short":
    case "citedcg_ncg@long":
      return {
        category: "CiteDCG",
        categoryIndex: 6,
        stage: "Beta",
      };

    case "hate_fairness":
    case "illegal_activity":
    case "interpersonal_workplace":
    case "self_harm":
    case "sexual_content":
    case "violence_content":
      return {
        category: "RAI Harm Scores",
        categoryIndex: 7,
        stage: "Beta",
      };

    case "disengage_rate":
    case "jailbreak-filter":
    case "offensive-request-filter":
    case "deepleo":
      return {
        category: "RAI Intervention Rates",
        categoryIndex: 8,
        stage: "Beta",
      };

    case "both_disengage":
    case "control_only_disengages":
    case "treatment_only_disengages":
    case "neither_disengages":
      return {
        category: "RAI Intervention Confusion Matrix",
        categoryIndex: 9,
        stage: "Beta",
      };

    case "scrape_total_failure_rate":
    case "3s_failure_rate":
    case "3s_latency_p95":
    case "3s_requests":
    case "3s_requests_4xx":
    case "3s_requests_5xx":
    case "3s_requests_has_lss_failure":
    case "3s_requests_hit_cache":
    case "3s_requests_hit_cache_rate":
    case "3s_requests_success":
    case "requests":
    case "requests_success":
    case "requests_with_3s_failure":
    case "sydney_failure_rate":
    case "sydney_request_count_rate":
    case "sydney_with_3s_failure_rate":
      return {
        category: "Scraping stats",
        categoryIndex: 13,
        stage: "Production",
      };
    case "citation_rate":
    case "citation_rate_3s_results":
      return {
        category: "Citation Rate",
        categoryIndex: 22,
        stage: "Production",
      };
    case "citation_rate_any_search_triggered":
    case "citation_rate_3s_search_triggered":
    case "citation_rate_web_search_triggered":
    case "citation_rate_any_results":
    case "citation_rate_web_results":
    case "num_citations_per_utterance":
    case "num_work_citations_per_utterance":
    case "num_web_citations_per_utterance":
    case "annotation_rate":
    case "annotation_rate_any_search_triggered":
    case "annotation_rate_3s_search_triggered":
    case "annotation_rate_web_search_triggered":
    case "annotation_rate_any_results":
    case "annotation_rate_3s_results":
    case "annotation_rate_web_results":
    case "num_annotations_per_utterance":
    case "no_citation_nor_annotation_rate":
      return {
        category: "Citations/Annotations",
        categoryIndex: 17,
        stage: "Beta",
      };
    case "stewieleov5_accurate":
    case "stewieleov5_complete":
    case "stewieleov5_relevant":
    case "stewieleov5_effortless":
    case "stewieleov5_exceptional":
    case "stewieleov5_score":
    case "stewieleov5":
      return {
        category: "ACRUE Metrics (known as stewieleov5 in metric diagnosis)",
        categoryIndex: 18,
        stage: "Beta",
      };
    case "acrueleo_accurate":
    case "acrueleo_complete":
    case "acrueleo_relevant":
    case "acrueleo_usefulness":
    case "acrueleo_exceptional":
    case "acrueleo_score":
    case "acrueleo":
      return {
        category: "ACRUE Metrics (known as stewieleov5 in metric diagnosis)",
        categoryIndex: 18,
        stage: "Beta",
      };
    case "codeleo_num_turns":
    case "codeleo_reply_length":
    case "codeleo_score":
    case "sbsleov2_sbsleov2_score":
    case "sbsleov2_engagement":
    case "sbsleov2_specificity":
    case "sbsleov2_perceived intelligence":
    case "sbsleov2_relevance":
    case "sbsleov2_clarity":
      return {
        category: "BizChat web preview metrics",
        categoryIndex: 20,
        stage: "Beta",
      };
    default:
      if (
        ["tools_", "entities_", "citation_tools_", "citation_entities_"].some(
          (prefix) => metric.startsWith(prefix),
        )
      ) {
        return {
          category: "TEPR",
          categoryIndex: 14,
          stage: "Alpha",
        };
      }

      return {
        category: "Others",
        categoryIndex: 100,
        stage: "Beta",
      };
  }
};

export const getMetricGroups = (metric: string): MetricsGroup[] => {
  const lowerMetric = metric.toLowerCase();
  return (
    metricsGroupMapping.get(lowerMetric) ??
    getMetricGroupsRegex(lowerMetric) ?? ["Debug Metric"]
  );
};

export const MetricsGroup = [
  "Flight Review Metric",
  "V-Next Metric",
  "Debug Metric",
  "Query Level - Post Merge",
  "Query Level - Pre Merge",
  "Query Set Level - Post Merge",
  "Query Set Level - Pre Merge",
] as const;

export type MetricsGroup = (typeof MetricsGroup)[number];

const getMetricGroupsRegex = (metric: string): MetricsGroup[] | undefined => {
  for (const [regex, groups] of regexGroupMapping.entries()) {
    const res = metric.match(regex);
    if (res) return groups;
  }
};

const regexGroupMapping = new Map<RegExp, MetricsGroup[]>([
  [/^search_enterprise_[\S]+_(?!ndcg@10)([\S]+@\d+)$/, ["Debug Metric"]],
  [
    /^search_enterprise_[\S]+_ndcg@10$/,
    ["Debug Metric", "Flight Review Metric"],
  ],
  [/^(.*leo.*|.*lmchecklist.*|.*tcr.*)_failure$/, ["Flight Review Metric"]],
  [/^(sbsleo_[\S]+)$/, ["Flight Review Metric"]],
  [/^(sbsleov2_[\S|\s]+)$/, ["V-Next Metric"]],
]);

// getMetricGroup(metrics_key.toLowerCase()), please add lowercase metrics_key to the metricsGroupMapping
export const metricsGroupMapping = new Map<string, MetricsGroup[]>(
  Object.entries({
    groundleo_score: ["Flight Review Metric"],
    groundleo_claimbreak: ["Flight Review Metric"],
    groundleo_claimbreakv1_1: ["Flight Review Metric"],
    voiceleo_score: ["Flight Review Metric"],
    stewieleo_relevance: ["Debug Metric"],
    stewieleo_engagement: ["Debug Metric"],
    stewieleo_detail: ["Debug Metric"],
    stewieleo_clarity: ["Debug Metric"],
    stewieleo_score: ["Flight Review Metric"],
    sydney_e2e_latency: ["Flight Review Metric"],
    nrr_turn: ["Flight Review Metric", "Debug Metric"],
    nrr_turn_3s: ["Debug Metric"],
    nrr_tool_3s_mean: ["Debug Metric"],
    stewieleosbs_relevance: ["Debug Metric"],
    stewieleosbs_engagement: ["Debug Metric"],
    stewieleosbs_detail: ["Debug Metric"],
    stewieleosbs_clarity: ["Debug Metric"],
    stewieleosbs_perceived_intelligence: ["Debug Metric"],
    sbsleo: ["Flight Review Metric"],
    sbsleov2: ["V-Next Metric"],
    stewieleov5: ["Flight Review Metric"],
    stewieleov5_score: ["Flight Review Metric"],
    stewieleov5_accurate: ["Flight Review Metric"],
    stewieleov5_complete: ["Flight Review Metric"],
    stewieleov5_relevant: ["Flight Review Metric"],
    stewieleov5_effortless: ["Flight Review Metric"],
    stewieleov5_exceptional: ["Flight Review Metric"],
    acrueleo: ["Flight Review Metric"],
    acrueleo_score: ["Flight Review Metric"],
    acrueleo_accurate: ["Flight Review Metric"],
    acrueleo_complete: ["Flight Review Metric"],
    acrueleo_relevant: ["Flight Review Metric"],
    acrueleo_usefulness: ["Flight Review Metric"],
    acrueleo_exceptional: ["Flight Review Metric"],
    stewieleosbs_score: ["Flight Review Metric", "V-Next Metric"],
    tcr_score: ["Debug Metric"],
    lmchecklist_critical: ["V-Next Metric"],
    lmchecklist_expected: ["V-Next Metric"],
    lmchecklist_aspirational: ["V-Next Metric"],
    lmchecklist_score: ["V-Next Metric"],
    p90_latency: ["Flight Review Metric"],
    avg_latency: ["Flight Review Metric"],
    p75_latency: ["Flight Review Metric"],
    p99_latency: ["Flight Review Metric"],
    avg_availability: ["Flight Review Metric"],
    num_utterances: ["Debug Metric"],
    perc_llm_failed_annotation: ["Debug Metric"],
    num_conversation_id: ["Debug Metric"],
    num_annotations: ["Debug Metric"],
    pileo_score: ["Flight Review Metric"],
    contextleo_score: ["Flight Review Metric"],
    scleo_score: ["Flight Review Metric"],
    scleo_group_score: ["Debug Metric"],
    scleo_sensible_score: ["Debug Metric"],
    ruleleo_score: ["Flight Review Metric"],
    e2e_reliability: ["Flight Review Metric"],
    comet_reliability: ["Flight Review Metric"],
    ndcg_llm_labeler_failure_rate: ["Flight Review Metric"],
    opgsummleo_score: ["Flight Review Metric"],
    reasoningleo: ["Debug Metric"],
    citedcg_all_bad_cites: ["Debug Metric"],
    citedcg_has_good_cite: ["Debug Metric"],
    citedcg_all_bad_cites_v2: ["Debug Metric"],
    citedcg_has_good_cite_v2: ["Debug Metric"],
    citedcg_avg_label: ["Debug Metric"],
    citedcg_sum_label: ["Debug Metric"],
    citedcg_num_bad: ["Debug Metric"],
    citedcg_num_good: ["Debug Metric"],
    citedcg_num_enterprise_cites: ["Flight Review Metric", "Debug Metric"],
    citedcg_gcc: ["Debug Metric"],
    citedcg_one_centric: ["Flight Review Metric", "Debug Metric"],
    citedcg_precision: ["Debug Metric"],
    citedcg_recall: ["Debug Metric"],
    citedcg_f1: ["Debug Metric"],
    "citedcg_ncg@short": ["Debug Metric"],
    "citedcg_ncg@long": ["Debug Metric"],
    citedcg_webwork_avg_cited_label: ["V-Next Metric"],
    citedcg_webwork_sum_cited_label: ["V-Next Metric"],
    citedcg_webwork_num_bad_cites: ["V-Next Metric"],
    citedcg_webwork_num_good_cites: ["V-Next Metric"],
    citedcg_webwork_num_enterprise_cites: ["V-Next Metric"],
    citedcg_webwork_gcc: ["V-Next Metric"],
    citedcg_webwork_one_centric: ["V-Next Metric"],
    citedcg_webwork_max_label: ["V-Next Metric"],
    citedcg_webwork_all_bad_cites: ["V-Next Metric"],
    citedcg_webwork_has_good_cite: ["V-Next Metric"],
    citedcg_webwork_all_bad_cites_v2: ["V-Next Metric"],
    citedcg_webwork_has_good_cite_v2: ["V-Next Metric"],
    citedcg_webwork_p: ["V-Next Metric"],
    citedcg_webwork_r: ["V-Next Metric"],
    citedcg_webwork_f1: ["V-Next Metric"],
    "citedcg_webwork_ncg@short": ["V-Next Metric"],
    "citedcg_webwork_ncg@long": ["V-Next Metric"],
    sydney_reliability: ["Flight Review Metric"],
    "3s_reliability": ["Flight Review Metric"],
    citation_rate: ["Flight Review Metric", "Debug Metric"],
    citation_rate_any_search_triggered: ["Debug Metric"],
    citation_rate_3s_search_triggered: ["Debug Metric"],
    citation_rate_web_search_triggered: ["Debug Metric"],
    citation_rate_any_results: ["Debug Metric"],
    citation_rate_3s_results: ["Flight Review Metric", "Debug Metric"],
    citation_rate_web_results: ["Debug Metric"],
    num_citations_per_utterance: ["Debug Metric"],
    num_work_citations_per_utterance: ["Debug Metric"],
    num_web_citations_per_utterance: ["Debug Metric"],
    annotation_rate: ["Debug Metric"],
    annotation_rate_any_search_triggered: ["Debug Metric"],
    annotation_rate_3s_search_triggered: ["Debug Metric"],
    annotation_rate_web_search_triggered: ["Debug Metric"],
    annotation_rate_any_results: ["Debug Metric"],
    annotation_rate_3s_results: ["Debug Metric"],
    annotation_rate_web_results: ["Debug Metric"],
    num_annotations_per_utterance: ["Debug Metric"],
    no_citation_nor_annotation_rate: ["Debug Metric"],
    sbsleorecap_score: ["Debug Metric"],
    sbsleorecap_Exceptional_Quality_score: ["Debug Metric"],
    sbsleorecap_Effortless_Quality_score: ["Debug Metric"],
    sbsleorecap_Completeness_Quality_score: ["Debug Metric"],
    sbsleorecap_Relevance_Quality_score: ["Debug Metric"],
    sbsleorecap_Difference_score: ["Debug Metric"],
    sbsleorecap_Exceptional_Difference_score: ["Debug Metric"],
    sbsleorecap_Effortless_Difference_score: ["Debug Metric"],
    sbsleorecap_Completeness_Difference_score: ["Debug Metric"],
    sbsleorecap_Relevance_Difference_score: ["Debug Metric"],
  }),
);
