'''
This file is part of Open-MoE-LLM-Leaderboard and is modified based on work
under the Apache 2.0 License from the arena-hard project.
(https://github.com/lm-sys/arena-hard)
Original Copyright (c) 2024 Tianle Li*, Wei-Lin Chiang*, Evan Frick, Lisa Dunlap, Banghua Zhu, Joseph E. Gonzalez, Ion Stoica
See the NOTICE file distributed with this work for additional
information regarding copyright ownership.
'''


import os
import json
import time
import yaml
import random

from typing import Optional
from glob import glob

# API setting constants
API_MAX_RETRY = 16
API_RETRY_SLEEP = 10
API_ERROR_OUTPUT = "$ERROR$"


OPENAI_MODEL_LIST = (
    "gpt-3.5-turbo",
    "gpt-3.5-turbo-0301",
    "gpt-3.5-turbo-0613",
    "gpt-3.5-turbo-0613-verbose",
    "gpt-3.5-turbo-1106",
    "gpt-3.5-turbo-0125",
    "gpt-4",
    "gpt-4-0314",
    "gpt-4-0613",
    "gpt-4-turbo",
    "gpt-4-1106-preview",
    "gpt-4-0125-preview",
)


temperature_config = {
    "writing": 0.7,
    "roleplay": 0.7,
    "extraction": 0.0,
    "math": 0.0,
    "coding": 0.0,
    "reasoning": 0.0,
    "stem": 0.1,
    "humanities": 0.1,
}


def load_questions(question_file: str):
    """Load questions from a file."""
    questions = []
    with open(question_file, "r") as ques_file:
        for line in ques_file:
            if line:
                questions.append(json.loads(line))
    return questions


def load_model_answers(answer_dir: str):
    """Load model answers.

    The return value is a python dict of type:
    Dict[model_name: str -> Dict[question_id: int -> answer: dict]]
    """
    filenames = glob(os.path.join(answer_dir, "*.jsonl"))
    filenames.sort()
    model_answers = {}

    for filename in filenames:
        model_name = os.path.basename(filename)[:-6]
        answer = {}
        with open(filename) as fin:
            for line in fin:
                line = json.loads(line)
                answer[line["question_id"]] = line
        model_answers[model_name] = answer

    return model_answers


def get_endpoint(endpoint_list):
    if endpoint_list is None:
        return None
    assert endpoint_list is not None
    # randomly pick one
    api_dict = random.choices(
        endpoint_list
    )[0]
    return api_dict


# load config args from config yaml files
def make_config(config_file: str) -> dict:
    config_kwargs = {}
    with open(config_file, "r") as f:
        config_kwargs = yaml.load(f, Loader=yaml.SafeLoader)

    return config_kwargs


def chat_completion_openai(model, messages, temperature, max_tokens, api_dict=None):
    import openai
    if api_dict:
        client = openai.OpenAI(
            base_url=api_dict["api_base"],
            api_key=api_dict["api_key"],
        )
    else:
        client = openai.OpenAI()
    
    output = API_ERROR_OUTPUT
    for _ in range(API_MAX_RETRY):
        try:
            # print(messages)
            completion = client.chat.completions.create(
                model=model,
                messages=messages,
                temperature=temperature,
                max_tokens=max_tokens
                )
            output = completion.choices[0].message.content
            break
        except openai.RateLimitError as e:
            print(type(e), e)
            time.sleep(API_RETRY_SLEEP)
        except openai.BadRequestError as e:
            print(messages)
            print(type(e), e)
        except KeyError:
            print(type(e), e)
            break
    
    return output


# def chat_completion_openai_azure(model, messages, temperature, max_tokens, api_dict=None):
#     import openai
#     from openai import AzureOpenAI

#     api_base = api_dict["api_base"]
#     client = AzureOpenAI(
#         azure_endpoint = api_base,
#         api_key= api_dict["api_key"],
#         api_version=api_dict["api_version"],
#         timeout=240,
#         max_retries=2
#     )

#     output = API_ERROR_OUTPUT
#     for _ in range(API_MAX_RETRY):
#         try:
#             response = client.chat.completions.create(
#                 model=model,
#                 messages=messages,
#                 n=1,
#                 temperature=temperature,
#                 max_tokens=max_tokens,
#                 seed=42,
#             )
#             output = response.choices[0].message.content
#             break
#         except openai.RateLimitError as e:
#             print(type(e), e)
#             time.sleep(API_RETRY_SLEEP)
#         except openai.BadRequestError as e:
#             print(type(e), e)
#             break
#         except KeyError:
#             print(type(e), e)
#             break

#     return output


# def chat_completion_anthropic(model, messages, temperature, max_tokens, api_dict=None):
#     import anthropic

#     if api_dict:
#         api_key = api_dict["api_key"]
#     else:
#         api_key = os.environ["ANTHROPIC_API_KEY"]

#     sys_msg = ""
#     if messages[0]["role"] == "system":
#         sys_msg = messages[0]["content"]
#         messages = messages[1:]

#     output = API_ERROR_OUTPUT
#     for _ in range(API_MAX_RETRY):
#         try:
#             # print(sys_msg)
#             c = anthropic.Anthropic(api_key=api_key)
#             response = c.messages.create(
#                 model=model,
#                 messages=messages,
#                 stop_sequences=[anthropic.HUMAN_PROMPT],
#                 max_tokens=max_tokens,
#                 temperature=temperature,
#                 system=sys_msg
#             )
#             output = response.content[0].text
#             break
#         except anthropic.APIError as e:
#             print(type(e), e)
#             time.sleep(API_RETRY_SLEEP)
#     return output


# def chat_completion_mistral(model, messages, temperature, max_tokens):
#     from mistralai.client import MistralClient
#     from mistralai.models.chat_completion import ChatMessage
#     from mistralai.exceptions import MistralException

#     api_key = os.environ["MISTRAL_API_KEY"]
#     client = MistralClient(api_key=api_key)

#     prompts = [ChatMessage(role=message["role"], content=message["content"]) for message in messages]
    
#     output = API_ERROR_OUTPUT
#     for _ in range(API_MAX_RETRY):
#         try:
#             chat_response = client.chat(
#                 model=model,
#                 messages=prompts,
#                 temperature=temperature,
#                 max_tokens=max_tokens,
#             )
#             output = chat_response.choices[0].message.content
#             break
#         except MistralException as e:
#             print(type(e), e)
#             break

#     return output


# def chat_completion_gemini(model, messages, temperature, max_tokens):
#     import google.generativeai as genai
#     genai.configure(api_key=os.environ["GEMINI_API_KEY"])

#     safety_settings = [
#         {
#             "category": "HARM_CATEGORY_HARASSMENT",
#             "threshold": "BLOCK_NONE"
#         },
#         {
#             "category": "HARM_CATEGORY_HATE_SPEECH",
#             "threshold": "BLOCK_NONE"
#         },
#         {
#             "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
#             "threshold": "BLOCK_NONE"
#         },
#         {
#             "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
#             "threshold": "BLOCK_NONE"
#         },
#     ]

#     # Set up the model
#     generation_config = {
#         "temperature": temperature,
#         "top_p": 1,
#         "top_k": 1,
#         "max_output_tokens": max_tokens,
#     }

#     output = API_ERROR_OUTPUT
#     for _ in range(API_MAX_RETRY):
#         try:
#             gemini = genai.GenerativeModel(
#                 model_name=model,
#                 generation_config=generation_config,
#                 safety_settings=safety_settings)

#             convo = gemini.start_chat(history=[])
            
#             convo.send_message(messages)
#             output = convo.last.text
#             break
#         except genai.types.generation_types.StopCandidateException as e:
#             print(type(e), e)
#             break
#         except Exception as e:
#             print(type(e), e)
#             time.sleep(API_RETRY_SLEEP)
    
#     return output


# def chat_completion_cohere(model, messages, temperature, max_tokens):
#     import cohere

#     co = cohere.Client(os.environ["COHERE_API_KEY"])
#     assert len(messages) > 0

#     template_map = {"system":"SYSTEM",
#                     "assistant":"CHATBOT",
#                     "user":"USER"}

#     assert messages[-1]["role"] == "user"
#     prompt = messages[-1]["content"]

#     if len(messages) > 1:
#         history = []
#         for message in messages[:-1]:
#             history.append({"role":template_map[message["role"]], "message":message["content"]})
#     else:
#         history = None

#     output = API_ERROR_OUTPUT
#     for _ in range(API_MAX_RETRY):
#         try:
#             response = co.chat(
#                 message=prompt,
#                 model=model,
#                 temperature=temperature,
#                 max_tokens=max_tokens,
#                 chat_history=history,
#             )
#             output = response.text
#             break
#         except cohere.core.api_error.ApiError as e:
#             print(type(e), e)
#             raise
#         except Exception as e:
#             print(type(e), e)
#             break
    
#     return output


def reorg_answer_file(answer_file):
    """Sort by question id and de-duplication"""
    answers = {}
    with open(answer_file, "r") as fin:
        for l in fin:
            qid = json.loads(l)["question_id"]
            answers[qid] = l

    qids = sorted(list(answers.keys()))
    with open(answer_file, "w") as fout:
        for qid in qids:
            fout.write(answers[qid])