import { mixEvalTests } from './MixEvalTests';

export const baseModelScores = [
  {
    id: 1,
    displayName: 'GPT-3.5-turbo',
    name: 'gpt_35_turbo_0125',
    scores: {
      'Overall Score': 0.8069999999999999,
      DROP: 0.872,
      BBH: 0.724,
      MATH: 0.725,
      GSM8k: 0.96,
      TriviaQA: 0.877,
      AGIEval: 0.537,
      MMLU: 0.75,
      MBPP: 0.0,
      BoolQ: 0.869,
      HellaSwag: 0.606,
      GPQA: 0.25,
      PIQA: 0.885,
      ARC: 0.891,
      OpenBookQA: 0.795,
      SIQA: 0.714,
      CommonsenseQA: 0.807,
      WinoGrande: 1.0,
    },
  },
  {
    id: 2,
    displayName: 'GPT-4',
    name: 'gpt_4_0613',
    scores: {
      'Overall Score': 0.874,
      DROP: 0.872,
      BBH: 0.847,
      MATH: 0.76,
      GSM8k: 0.985,
      TriviaQA: 0.906,
      AGIEval: 0.732,
      MMLU: 0.832,
      MBPP: 0.0,
      BoolQ: 0.903,
      HellaSwag: 0.882,
      GPQA: 0.75,
      PIQA: 0.906,
      ARC: 0.989,
      OpenBookQA: 0.886,
      SIQA: 0.745,
      CommonsenseQA: 0.868,
      WinoGrande: 1.0,
    },
  },
  {
    id: 3,
    displayName: 'GPT-4o',
    name: 'gpt_4o',
    scores: {
      'Overall Score': 0.89,
      DROP: 0.912,
      BBH: 0.901,
      MATH: 0.895,
      GSM8k: 0.97,
      TriviaQA: 0.907,
      AGIEval: 0.756,
      MMLU: 0.855,
      MBPP: 1.0,
      BoolQ: 0.874,
      HellaSwag: 0.929,
      GPQA: 0.5,
      PIQA: 0.969,
      ARC: 0.989,
      OpenBookQA: 0.909,
      SIQA: 0.755,
      CommonsenseQA: 0.858,
      WinoGrande: 0.667,
    },
  },
  {
    id: 10,
    displayName: 'GPT-4o Mini',
    name: 'gpt_4o_mini',
    scores: {
      'Overall Score': 0.852,
      DROP: 0.881,
      BBH: 0.912,
      MATH: 0.965,
      GSM8k: 0.968,
      TriviaQA: 0.859,
      AGIEval: 0.642,
      MMLU: 0.816,
      MBPP: 0.0,
      BoolQ: 0.863,
      HellaSwag: 0.859,
      GPQA: 0.5,
      PIQA: 0.938,
      ARC: 0.967,
      OpenBookQA: 0.932,
      SIQA: 0.714,
      CommonsenseQA: 0.84,
      WinoGrande: 0.667,
    },
  },
  {
    id: 4,
    displayName: 'Mistral 7B',
    name: 'mistral_7b',
    scores: {
      'Overall Score': 0.7255,
      DROP: 0.783,
      BBH: 0.693,
      MATH: 0.53,
      GSM8k: 0.826,
      TriviaQA: 0.767,
      AGIEval: 0.431,
      MMLU: 0.647,
      MBPP: 0.0,
      BoolQ: 0.794,
      HellaSwag: 0.67,
      GPQA: 0.0,
      PIQA: 0.875,
      ARC: 0.891,
      OpenBookQA: 0.773,
      SIQA: 0.653,
      CommonsenseQA: 0.731,
      WinoGrande: 0.667,
    },
  },
  {
    id: 5,
    displayName: 'Mixtral 8x7B',
    name: 'mixtral_8x7b',
    scores: {
      'Overall Score': 0.7735,
      DROP: 0.815,
      BBH: 0.799,
      MATH: 0.665,
      GSM8k: 0.9,
      TriviaQA: 0.847,
      AGIEval: 0.52,
      MMLU: 0.715,
      MBPP: 1.0,
      BoolQ: 0.823,
      HellaSwag: 0.522,
      GPQA: 0.25,
      PIQA: 0.896,
      ARC: 0.913,
      OpenBookQA: 0.795,
      SIQA: 0.735,
      CommonsenseQA: 0.764,
      WinoGrande: 0.667,
    },
  },
  {
    id: 7,
    displayName: 'Claude 3 Sonnet',
    name: 'claude_3_sonnet',
    scores: {
      'Overall Score': 0.8240000000000001,
      DROP: 0.89,
      BBH: 0.918,
      MATH: 0.705,
      GSM8k: 0.965,
      TriviaQA: 0.854,
      AGIEval: 0.593,
      MMLU: 0.754,
      MBPP: 1.0,
      BoolQ: 0.823,
      HellaSwag: 0.879,
      GPQA: 0.5,
      PIQA: 0.833,
      ARC: 0.935,
      OpenBookQA: 0.659,
      SIQA: 0.633,
      CommonsenseQA: 0.797,
      WinoGrande: 0.333,
    },
  },
  {
    id: 8,
    displayName: 'Claude 3 Haiku',
    name: 'claude_3_haiku',
    scores: {
      'Overall Score': 0.8160000000000001,
      DROP: 0.876,
      BBH: 0.862,
      MATH: 0.885,
      GSM8k: 0.967,
      TriviaQA: 0.847,
      AGIEval: 0.634,
      MMLU: 0.757,
      MBPP: 1.0,
      BoolQ: 0.851,
      HellaSwag: 0.771,
      GPQA: 0.25,
      PIQA: 0.906,
      ARC: 0.913,
      OpenBookQA: 0.864,
      SIQA: 0.571,
      CommonsenseQA: 0.792,
      WinoGrande: 0.667,
    },
  },
  {
    id: 9,
    displayName: 'Claude 3 Opus',
    name: 'claude_3_opus',
    scores: {
      'Overall Score': 0.883,
      DROP: 0.917,
      BBH: 0.895,
      MATH: 0.95,
      GSM8k: 0.963,
      TriviaQA: 0.913,
      AGIEval: 0.691,
      MMLU: 0.835,
      MBPP: 1.0,
      BoolQ: 0.863,
      HellaSwag: 0.926,
      GPQA: 0.25,
      PIQA: 0.906,
      ARC: 0.978,
      OpenBookQA: 0.818,
      SIQA: 0.724,
      CommonsenseQA: 0.863,
      WinoGrande: 1.0,
    },
  },
  {
    id: 6,
    displayName: 'Llama 3',
    name: 'llama_3',
    scores: {
      'Overall Score': 0.8565,
      DROP: 0.884,
      BBH: 0.919,
      MATH: 0.835,
      GSM8k: 0.987,
      TriviaQA: 0.886,
      AGIEval: 0.667,
      MMLU: 0.798,
      MBPP: 0.0,
      BoolQ: 0.88,
      HellaSwag: 0.818,
      GPQA: 0.25,
      PIQA: 0.948,
      ARC: 0.967,
      OpenBookQA: 0.886,
      SIQA: 0.786,
      CommonsenseQA: 0.849,
      WinoGrande: 0.333,
    },
  },
  {
    id: 11,
    displayName: 'Llama 3.1 405B Instruct',
    name: 'llama-3.1-405B-instruct',
    scores: {
      'Overall Score': 0.874,
      DROP: 0.89,
      BBH: 0.933,
      MATH: 0.785,
      GSM8k: 0.989,
      TriviaQA: 0.913,
      AGIEval: 0.74,
      MMLU: 0.843,
      MBPP: 0.0,
      BoolQ: 0.897,
      HellaSwag: 0.943,
      GPQA: 0.5,
      PIQA: 0.927,
      ARC: 0.978,
      OpenBookQA: 0.909,
      SIQA: 0.765,
      CommonsenseQA: 0.887,
      WinoGrande: 0.667,
    },
  },
  {
    id: 12,
    displayName: 'Llama 3.1 70B Instruct',
    name: 'llama-3.1-70B-instruct',
    scores: {
      'Overall Score': 0.874,
      DROP: 0.866,
      BBH: 0.867,
      MATH: 0.855,
      GSM8k: 0.974,
      TriviaQA: 0.872,
      AGIEval: 0.748,
      MMLU: 0.795,
      MBPP: 0.0,
      BoolQ: 0.903,
      HellaSwag: 0.798,
      GPQA: 0.25,
      PIQA: 0.938,
      ARC: 0.989,
      OpenBookQA: 0.955,
      SIQA: 0.765,
      CommonsenseQA: 0.821,
      WinoGrande: 0.667,
    },
  },
  {
    id: 13,
    displayName: 'Llama 3.1 8B Instruct',
    name: 'llama-3.1-8B-instruct',
    scores: {
      'Overall Score': 0.774,
      DROP: 0.75,
      BBH: 0.769,
      MATH: 0.77,
      GSM8k: 0.896,
      TriviaQA: 0.683,
      AGIEval: 0.423,
      MMLU: 0.665,
      MBPP: 0.0,
      BoolQ: 0.817,
      HellaSwag: 0.589,
      GPQA: 0.75,
      PIQA: 0.865,
      ARC: 0.902,
      OpenBookQA: 0.795,
      SIQA: 0.663,
      CommonsenseQA: 0.722,
      WinoGrande: 0.667,
    },
  },
];

export const baseModelRankingData = baseModelScores.map((model) => ({
  name: model.name,
  displayName: model.displayName,
  id: `base_${model.id}`,
  evaluations: mixEvalTests.reduce((acc, test) => {
    acc[test] =
      Math.round((model.scores[test] + Number.EPSILON) * 100) / 100 || null;
    return acc;
  }, {}),
  type: 'base',
}));
