Open_NotebookLM_TLDW

Paused

File size: 51,920 Bytes

# Summarization_General_Lib.py
#########################################
# General Summarization Library
# This library is used to perform summarization.
#
####
####################
# Function List
#
# 1. extract_text_from_segments(segments: List[Dict]) -> str
# 2. chat_with_openai(api_key, file_path, custom_prompt_arg)
# 3. chat_with_anthropic(api_key, file_path, model, custom_prompt_arg, max_retries=3, retry_delay=5)
# 4. chat_with_cohere(api_key, file_path, model, custom_prompt_arg)
# 5. chat_with_groq(api_key, input_data, custom_prompt_arg, system_prompt=None):
# 6. chat_with_openrouter(api_key, input_data, custom_prompt_arg, system_prompt=None)
# 7. chat_with_huggingface(api_key, input_data, custom_prompt_arg, system_prompt=None)
# 8. chat_with_deepseek(api_key, input_data, custom_prompt_arg, system_prompt=None)
# 9. chat_with_vllm(input_data, custom_prompt_input, api_key=None, vllm_api_url="http://127.0.0.1:8000/v1/chat/completions", system_prompt=None)
#
#
####################
#
# Import necessary libraries
import json
import logging
import os
import time
from typing import List

import requests
#
# Import 3rd-Party Libraries
#
# Import Local libraries
from App_Function_Libraries.Utils.Utils import load_and_log_configs
#
#######################################################################################################################
# Function Definitions
#

#FIXME: Update to include full arguments

def extract_text_from_segments(segments):
    logging.debug(f"Segments received: {segments}")
    logging.debug(f"Type of segments: {type(segments)}")

    text = ""

    if isinstance(segments, list):
        for segment in segments:
            logging.debug(f"Current segment: {segment}")
            logging.debug(f"Type of segment: {type(segment)}")
            if 'Text' in segment:
                text += segment['Text'] + " "
            else:
                logging.warning(f"Skipping segment due to missing 'Text' key: {segment}")
    else:
        logging.warning(f"Unexpected type of 'segments': {type(segments)}")

    return text.strip()



def get_openai_embeddings(input_data: str, model: str) -> List[float]:
    """

    Get embeddings for the input text from OpenAI API.



    Args:

        input_data (str): The input text to get embeddings for.

        model (str): The model to use for generating embeddings.



    Returns:

        List[float]: The embeddings generated by the API.

    """
    loaded_config_data = load_and_log_configs()
    api_key = loaded_config_data['api_keys']['openai']

    if not api_key:
        logging.error("OpenAI: API key not found or is empty")
        raise ValueError("OpenAI: API Key Not Provided/Found in Config file or is empty")

    logging.debug(f"OpenAI: Using API Key: {api_key[:5]}...{api_key[-5:]}")
    logging.debug(f"OpenAI: Raw input data (first 500 chars): {str(input_data)[:500]}...")
    logging.debug(f"OpenAI: Using model: {model}")

    headers = {
        'Authorization': f'Bearer {api_key}',
        'Content-Type': 'application/json'
    }

    request_data = {
        "input": input_data,
        "model": model,
    }

    try:
        logging.debug("OpenAI: Posting request to embeddings API")
        response = requests.post('https://api.openai.com/v1/embeddings', headers=headers, json=request_data)
        logging.debug(f"Full API response data: {response}")
        if response.status_code == 200:
            response_data = response.json()
            if 'data' in response_data and len(response_data['data']) > 0:
                embedding = response_data['data'][0]['embedding']
                logging.debug("OpenAI: Embeddings retrieved successfully")
                return embedding
            else:
                logging.warning("OpenAI: Embedding data not found in the response")
                raise ValueError("OpenAI: Embedding data not available in the response")
        else:
            logging.error(f"OpenAI: Embeddings request failed with status code {response.status_code}")
            logging.error(f"OpenAI: Error response: {response.text}")
            raise ValueError(f"OpenAI: Failed to retrieve embeddings. Status code: {response.status_code}")
    except requests.RequestException as e:
        logging.error(f"OpenAI: Error making API request: {str(e)}", exc_info=True)
        raise ValueError(f"OpenAI: Error making API request: {str(e)}")
    except Exception as e:
        logging.error(f"OpenAI: Unexpected error: {str(e)}", exc_info=True)
        raise ValueError(f"OpenAI: Unexpected error occurred: {str(e)}")


def chat_with_openai(api_key, input_data, custom_prompt_arg, temp=None, system_message=None):
    loaded_config_data = load_and_log_configs()
    openai_api_key = api_key
    try:
        # API key validation
        if not openai_api_key:
            logging.info("OpenAI: API key not provided as parameter")
            logging.info("OpenAI: Attempting to use API key from config file")
            openai_api_key = loaded_config_data['api_keys']['openai']

        if not openai_api_key:
            logging.error("OpenAI: API key not found or is empty")
            return "OpenAI: API Key Not Provided/Found in Config file or is empty"

        logging.debug(f"OpenAI: Using API Key: {openai_api_key[:5]}...{openai_api_key[-5:]}")

        # Input data handling
        logging.debug(f"OpenAI: Raw input data type: {type(input_data)}")
        logging.debug(f"OpenAI: Raw input data (first 500 chars): {str(input_data)[:500]}...")

        if isinstance(input_data, str):
            if input_data.strip().startswith('{'):
                # It's likely a JSON string
                logging.debug("OpenAI: Parsing provided JSON string data for summarization")
                try:
                    data = json.loads(input_data)
                except json.JSONDecodeError as e:
                    logging.error(f"OpenAI: Error parsing JSON string: {str(e)}")
                    return f"OpenAI: Error parsing JSON input: {str(e)}"
            elif os.path.isfile(input_data):
                logging.debug("OpenAI: Loading JSON data from file for summarization")
                with open(input_data, 'r') as file:
                    data = json.load(file)
            else:
                logging.debug("OpenAI: Using provided string data for summarization")
                data = input_data
        else:
            data = input_data

        logging.debug(f"OpenAI: Processed data type: {type(data)}")
        logging.debug(f"OpenAI: Processed data (first 500 chars): {str(data)[:500]}...")

        # Text extraction
        if isinstance(data, dict):
            if 'summary' in data:
                logging.debug("OpenAI: Summary already exists in the loaded data")
                return data['summary']
            elif 'segments' in data:
                text = extract_text_from_segments(data['segments'])
            else:
                text = json.dumps(data)  # Convert dict to string if no specific format
        elif isinstance(data, list):
            text = extract_text_from_segments(data)
        elif isinstance(data, str):
            text = data
        else:
            raise ValueError(f"OpenAI: Invalid input data format: {type(data)}")

        logging.debug(f"OpenAI: Extracted text (first 500 chars): {text[:500]}...")
        logging.debug(f"OpenAI: Custom prompt: {custom_prompt_arg}")

        openai_model = loaded_config_data['models']['openai'] or "gpt-4o"
        logging.debug(f"OpenAI: Using model: {openai_model}")

        headers = {
            'Authorization': f'Bearer {openai_api_key}',
            'Content-Type': 'application/json'
        }

        logging.debug(
            f"OpenAI API Key: {openai_api_key[:5]}...{openai_api_key[-5:] if openai_api_key else None}")
        logging.debug("openai: Preparing data + prompt for submittal")
        openai_prompt = f"{text} \n\n\n\n{custom_prompt_arg}"
        if temp is None:
            temp = 0.7
        if system_message is None:
            system_message = "You are a helpful AI assistant who does whatever the user requests."
        temp = float(temp)
        data = {
            "model": openai_model,
            "messages": [
                {"role": "system", "content": system_message},
                {"role": "user", "content": openai_prompt}
            ],
            "max_tokens": 4096,
            "temperature": temp
        }

        logging.debug("OpenAI: Posting request")
        response = requests.post('https://api.openai.com/v1/chat/completions', headers=headers, json=data)
        logging.debug(f"Full API response data: {response}")
        if response.status_code == 200:
            response_data = response.json()
            logging.debug(response_data)
            if 'choices' in response_data and len(response_data['choices']) > 0:
                chat_response = response_data['choices'][0]['message']['content'].strip()
                logging.debug("openai: Chat Sent successfully")
                logging.debug(f"openai: Chat response: {chat_response}")
                return chat_response
            else:
                logging.warning("openai: Chat response not found in the response data")
                return "openai: Chat not available"
        else:
            logging.error(f"OpenAI: Chat request failed with status code {response.status_code}")
            logging.error(f"OpenAI: Error response: {response.text}")
            return f"OpenAI: Failed to process chat response. Status code: {response.status_code}"
    except json.JSONDecodeError as e:
        logging.error(f"OpenAI: Error decoding JSON: {str(e)}", exc_info=True)
        return f"OpenAI: Error decoding JSON input: {str(e)}"
    except requests.RequestException as e:
        logging.error(f"OpenAI: Error making API request: {str(e)}", exc_info=True)
        return f"OpenAI: Error making API request: {str(e)}"
    except Exception as e:
        logging.error(f"OpenAI: Unexpected error: {str(e)}", exc_info=True)
        return f"OpenAI: Unexpected error occurred: {str(e)}"


def chat_with_anthropic(api_key, input_data, model, custom_prompt_arg, max_retries=3, retry_delay=5, system_prompt=None, temp=None):
    try:
        loaded_config_data = load_and_log_configs()

        # Check if config was loaded successfully
        if loaded_config_data is None:
            logging.error("Anthropic: Failed to load configuration data.")
            return "Anthropic: Failed to load configuration data."

        # Initialize the API key
        anthropic_api_key = api_key

        # API key validation
        if not api_key:
            logging.info("Anthropic: API key not provided as parameter")
            logging.info("Anthropic: Attempting to use API key from config file")
            # Ensure 'api_keys' and 'anthropic' keys exist
            try:
                anthropic_api_key = loaded_config_data['api_keys']['anthropic']
                logging.debug(f"Anthropic: Loaded API Key from config: {anthropic_api_key[:5]}...{anthropic_api_key[-5:]}")
            except (KeyError, TypeError) as e:
                logging.error(f"Anthropic: Error accessing API key from config: {str(e)}")
                return "Anthropic: API Key Not Provided/Found in Config file or is empty"

        if not anthropic_api_key or anthropic_api_key == "":
            logging.error("Anthropic: API key not found or is empty")
            return "Anthropic: API Key Not Provided/Found in Config file or is empty"

        if anthropic_api_key:
            logging.debug(f"Anthropic: Using API Key: {anthropic_api_key[:5]}...{anthropic_api_key[-5:]}")
        else:
            logging.debug(f"Anthropic: Using API Key: {api_key[:5]}...{api_key[-5:]}")

        if system_prompt is not None:
            logging.debug("Anthropic: Using provided system prompt")
            pass
        else:
            system_prompt = "You are a helpful assistant"
            logging.debug("Anthropic: Using default system prompt")

        logging.debug(f"AnthropicAI: Loaded data: {input_data}")
        logging.debug(f"AnthropicAI: Type of data: {type(input_data)}")

        # Retrieve the model from config if not provided
        if not model:
            try:
                anthropic_model = loaded_config_data['models']['anthropic']
                logging.debug(f"Anthropic: Loaded model from config: {anthropic_model}")
            except (KeyError, TypeError) as e:
                logging.error(f"Anthropic: Error accessing model from config: {str(e)}")
                return "Anthropic: Model configuration not found."
        else:
            anthropic_model = model
            logging.debug(f"Anthropic: Using provided model: {anthropic_model}")

        if temp is None:
            temp = 1.0
            logging.debug(f"Anthropic: Using default temperature: {temp}")

        headers = {
            'x-api-key': anthropic_api_key,
            'anthropic-version': '2023-06-01',
            'Content-Type': 'application/json'
        }

        anthropic_user_prompt = custom_prompt_arg if custom_prompt_arg else ""
        logging.debug(f"Anthropic: User Prompt is '{anthropic_user_prompt}'")
        user_message = {
            "role": "user",
            "content": f"{input_data} \n\n\n\n{anthropic_user_prompt}"
        }

        data = {
            "model": anthropic_model,
            "max_tokens": 4096,  # max possible tokens to return
            "messages": [user_message],
            "stop_sequences": ["\n\nHuman:"],
            "temperature": temp,
            "top_k": 0,
            "top_p": 1.0,
            "metadata": {
                "user_id": "example_user_id",
            },
            "stream": False,
            "system": system_prompt
        }

        for attempt in range(max_retries):
            try:
                logging.debug("Anthropic: Posting request to API")
                response = requests.post('https://api.anthropic.com/v1/messages', headers=headers, json=data)
                logging.debug(f"Anthropic: Full API response data: {response}")

                # Check if the status code indicates success
                if response.status_code == 200:
                    logging.debug("Anthropic: Post submittal successful")
                    response_data = response.json()

                    # Corrected path to access the assistant's reply
                    if 'content' in response_data and isinstance(response_data['content'], list) and len(response_data['content']) > 0:
                        chat_response = response_data['content'][0]['text'].strip()
                        logging.debug("Anthropic: Chat request successful")
                        print("Chat request processed successfully.")
                        return chat_response
                    else:
                        logging.error("Anthropic: Unexpected data structure in response.")
                        print("Unexpected response format from Anthropic API:", response.text)
                        return "Anthropic: Unexpected response format from API."
                elif response.status_code == 500:  # Handle internal server error specifically
                    logging.debug("Anthropic: Internal server error")
                    print("Internal server error from API. Retrying may be necessary.")
                    time.sleep(retry_delay)
                else:
                    logging.debug(
                        f"Anthropic: Failed to process chat request, status code {response.status_code}: {response.text}")
                    print(f"Failed to process chat request, status code {response.status_code}: {response.text}")
                    return f"Anthropic: Failed to process chat request, status code {response.status_code}: {response.text}"

            except requests.RequestException as e:
                logging.error(f"Anthropic: Network error during attempt {attempt + 1}/{max_retries}: {str(e)}")
                if attempt < max_retries - 1:
                    logging.debug(f"Anthropic: Retrying in {retry_delay} seconds...")
                    time.sleep(retry_delay)
                else:
                    return f"Anthropic: Network error: {str(e)}"

    except Exception as e:
        logging.error(f"Anthropic: Error in processing: {str(e)}")
        return f"Anthropic: Error occurred while processing summary with Anthropic: {str(e)}"


# Summarize with Cohere
def chat_with_cohere(api_key, input_data, model=None, custom_prompt_arg=None, system_prompt=None, temp=None):
    loaded_config_data = load_and_log_configs()
    cohere_api_key = None

    try:
        # API key validation
        if api_key:
            logging.info(f"Cohere Chat: API Key from parameter: {api_key[:3]}...{api_key[-3:]}")
            cohere_api_key = api_key
        else:
            logging.info("Cohere Chat: API key not provided as parameter")
            logging.info("Cohere Chat: Attempting to use API key from config file")
            logging.debug(f"Cohere Chat: Cohere API Key from config: {loaded_config_data['api_keys']['cohere']}")
            cohere_api_key = loaded_config_data['api_keys']['cohere']
            if cohere_api_key:
                logging.debug(f"Cohere Chat: Cohere API Key from config: {cohere_api_key[:3]}...{cohere_api_key[-3:]}")
            else:
                logging.error("Cohere Chat: API key not found or is empty")
                return "Cohere Chat: API Key Not Provided/Found in Config file or is empty"

        logging.debug(f"Cohere Chat: Loaded data: {input_data}")
        logging.debug(f"Cohere Chat: Type of data: {type(input_data)}")

        # Ensure model is set
        if not model:
            model = loaded_config_data['models']['cohere']
        logging.debug(f"Cohere Chat: Using model: {model}")

        if temp is None:
            temp = 0.3
        else:
            try:
                temp = float(temp)
            except ValueError:
                logging.warning(f"Cohere Chat: Invalid temperature value '{temp}', defaulting to 0.3")
                temp = 0.3

        headers = {
            'accept': 'application/json',
            'content-type': 'application/json',
            'Authorization': f'Bearer {cohere_api_key}'
        }

        # Ensure system_prompt is set
        if not system_prompt:
            system_prompt = "You are a helpful assistant"
        logging.debug(f"Cohere Chat: System Prompt being sent is: '{system_prompt}'")

        cohere_prompt = input_data
        if custom_prompt_arg:
            cohere_prompt += f"\n\n{custom_prompt_arg}"
        logging.debug(f"Cohere Chat: User Prompt being sent is: '{cohere_prompt}'")

        data = {
            "model" : model,
            "temperature": temp,
            "messages": [
                {
                    "role": "system",
                    "content":  system_prompt
                },
                {
                    "role": "user",
                    "content": cohere_prompt,
                }
            ],
        }
        logging.debug(f"Cohere Chat: Request data: {json.dumps(data, indent=2)}")

        logging.debug("cohere chat: Submitting request to API endpoint")
        print("cohere chat: Submitting request to API endpoint")

        try:
            response = requests.post('https://api.cohere.ai/v2/chat', headers=headers, json=data)
            logging.debug(f"Cohere Chat: Raw API response: {response.text}")
        except requests.RequestException as e:
            logging.error(f"Cohere Chat: Error making API request: {str(e)}")
            return f"Cohere Chat: Error making API request: {str(e)}"

        if response.status_code == 200:
            try:
                response_data = response.json()
            except json.JSONDecodeError:
                logging.error("Cohere Chat: Failed to decode JSON response")
                return "Cohere Chat: Failed to decode JSON response"

            if response_data is None:
                logging.error("Cohere Chat: No response data received.")
                return "Cohere Chat: No response data received."

            logging.debug(f"cohere chat: Full API response data: {json.dumps(response_data, indent=2)}")

            if 'message' in response_data and 'content' in response_data['message']:
                content = response_data['message']['content']
                if isinstance(content, list) and len(content) > 0:
                    # Extract text from the first content block
                    text = content[0].get('text', '').strip()
                    if text:
                        logging.debug("Cohere Chat: Chat request successful")
                        print("Cohere Chat request processed successfully.")
                        return text
                    else:
                        logging.error("Cohere Chat: 'text' field is empty in response content.")
                        return "Cohere Chat: 'text' field is empty in response content."
                else:
                    logging.error("Cohere Chat: 'content' field is not a list or is empty.")
                    return "Cohere Chat: 'content' field is not a list or is empty."
            else:
                logging.error("Cohere Chat: 'message' or 'content' field not found in API response.")
                return "Cohere Chat: 'message' or 'content' field not found in API response."

        elif response.status_code == 401:
            error_message = "Cohere Chat: Unauthorized - Invalid API key"
            logging.warning(error_message)
            print(error_message)
            return error_message

        else:
            logging.error(f"Cohere Chat: API request failed with status code {response.status_code}: {response.text}")
            print(f"Cohere Chat: Failed to process chat response, status code {response.status_code}: {response.text}")
            return f"Cohere Chat: API request failed: {response.text}"

    except Exception as e:
        logging.error(f"Cohere Chat: Error in processing: {str(e)}", exc_info=True)
        return f"Cohere Chat: Error occurred while processing chat request with Cohere: {str(e)}"


# https://console.groq.com/docs/quickstart
def chat_with_groq(api_key, input_data, custom_prompt_arg, temp=None, system_message=None):
    logging.debug("Groq: Summarization process starting...")
    try:
        logging.debug("Groq: Loading and validating configurations")
        loaded_config_data = load_and_log_configs()
        if loaded_config_data is None:
            logging.error("Failed to load configuration data")
            groq_api_key = None
        else:
            # Prioritize the API key passed as a parameter
            if api_key and api_key.strip():
                groq_api_key = api_key
                logging.info("Groq: Using API key provided as parameter")
            else:
                # If no parameter is provided, use the key from the config
                groq_api_key = loaded_config_data['api_keys'].get('groq')
                if groq_api_key:
                    logging.info("Groq: Using API key from config file")
                else:
                    logging.warning("Groq: No API key found in config file")

        # Final check to ensure we have a valid API key
        if not groq_api_key or not groq_api_key.strip():
            logging.error("Anthropic: No valid API key available")
            # You might want to raise an exception here or handle this case as appropriate for your application
            # For example: raise ValueError("No valid Anthropic API key available")

        logging.debug(f"Groq: Using API Key: {groq_api_key[:5]}...{groq_api_key[-5:]}")

        # Transcript data handling & Validation
        if isinstance(input_data, str) and os.path.isfile(input_data):
            logging.debug("Groq: Loading json data for summarization")
            with open(input_data, 'r') as file:
                data = json.load(file)
        else:
            logging.debug("Groq: Using provided string data for summarization")
            data = input_data

        # DEBUG - Debug logging to identify sent data
        logging.debug(f"Groq: Loaded data: {data[:500]}...(snipped to first 500 chars)")
        logging.debug(f"Groq: Type of data: {type(data)}")

        if isinstance(data, dict) and 'summary' in data:
            # If the loaded data is a dictionary and already contains a summary, return it
            logging.debug("Groq: Summary already exists in the loaded data")
            return data['summary']

        # If the loaded data is a list of segment dictionaries or a string, proceed with summarization
        if isinstance(data, list):
            segments = data
            text = extract_text_from_segments(segments)
        elif isinstance(data, str):
            text = data
        else:
            raise ValueError("Groq: Invalid input data format")

        # Set the model to be used
        groq_model = loaded_config_data['models']['groq']

        if temp is None:
            temp = 0.2
        temp = float(temp)
        if system_message is None:
            system_message = "You are a helpful AI assistant who does whatever the user requests."

        headers = {
            'Authorization': f'Bearer {groq_api_key}',
            'Content-Type': 'application/json'
        }

        groq_prompt = f"{text} \n\n\n\n{custom_prompt_arg}"
        logging.debug("groq: Prompt being sent is {groq_prompt}")

        data = {
            "messages": [
                {
                    "role": "system",
                    "content": system_message,
                },
                {
                    "role": "user",
                    "content": groq_prompt,
                }
            ],
            "model": groq_model,
            "temperature": temp
        }

        logging.debug("groq: Submitting request to API endpoint")
        print("groq: Submitting request to API endpoint")
        response = requests.post('https://api.groq.com/openai/v1/chat/completions', headers=headers, json=data)

        response_data = response.json()
        logging.debug(f"Full API response data: {response_data}")

        if response.status_code == 200:
            logging.debug(response_data)
            if 'choices' in response_data and len(response_data['choices']) > 0:
                summary = response_data['choices'][0]['message']['content'].strip()
                logging.debug("groq: Chat request successful")
                print("Groq: Chat request successful.")
                return summary
            else:
                logging.error("Groq(chat): Expected data not found in API response.")
                return "Groq(chat): Expected data not found in API response."
        else:
            logging.error(f"groq: API request failed with status code {response.status_code}: {response.text}")
            return f"groq: API request failed: {response.text}"

    except Exception as e:
        logging.error("groq: Error in processing: %s", str(e))
        return f"groq: Error occurred while processing summary with groq: {str(e)}"


def chat_with_openrouter(api_key, input_data, custom_prompt_arg, temp=None, system_message=None):
    import requests
    import json
    global openrouter_model, openrouter_api_key
    try:
        logging.debug("OpenRouter: Loading and validating configurations")
        loaded_config_data = load_and_log_configs()
        if loaded_config_data is None:
            logging.error("Failed to load configuration data")
            openrouter_api_key = None
        else:
            # Prioritize the API key passed as a parameter
            if api_key and api_key.strip():
                openrouter_api_key = api_key
                logging.info("OpenRouter: Using API key provided as parameter")
            else:
                # If no parameter is provided, use the key from the config
                openrouter_api_key = loaded_config_data['api_keys'].get('openrouter')
                if openrouter_api_key:
                    logging.info("OpenRouter: Using API key from config file")
                else:
                    logging.warning("OpenRouter: No API key found in config file")

        # Model Selection validation
        logging.debug("OpenRouter: Validating model selection")
        loaded_config_data = load_and_log_configs()
        openrouter_model = loaded_config_data['models']['openrouter']
        logging.debug(f"OpenRouter: Using model from config file: {openrouter_model}")

        # Final check to ensure we have a valid API key
        if not openrouter_api_key or not openrouter_api_key.strip():
            logging.error("OpenRouter: No valid API key available")
            raise ValueError("No valid Anthropic API key available")
    except Exception as e:
        logging.error("OpenRouter: Error in processing: %s", str(e))
        return f"OpenRouter: Error occurred while processing config file with OpenRouter: {str(e)}"

    logging.debug(f"OpenRouter: Using API Key: {openrouter_api_key[:5]}...{openrouter_api_key[-5:]}")

    logging.debug(f"OpenRouter: Using Model: {openrouter_model}")

    if isinstance(input_data, str) and os.path.isfile(input_data):
        logging.debug("OpenRouter: Loading json data for summarization")
        with open(input_data, 'r') as file:
            data = json.load(file)
    else:
        logging.debug("OpenRouter: Using provided string data for summarization")
        data = input_data

    # DEBUG - Debug logging to identify sent data
    logging.debug(f"OpenRouter: Loaded data: {data[:500]}...(snipped to first 500 chars)")
    logging.debug(f"OpenRouter: Type of data: {type(data)}")

    if isinstance(data, dict) and 'summary' in data:
        # If the loaded data is a dictionary and already contains a summary, return it
        logging.debug("OpenRouter: Summary already exists in the loaded data")
        return data['summary']

    # If the loaded data is a list of segment dictionaries or a string, proceed with summarization
    if isinstance(data, list):
        segments = data
        text = extract_text_from_segments(segments)
    elif isinstance(data, str):
        text = data
    else:
        raise ValueError("OpenRouter: Invalid input data format")

    openrouter_prompt = f"{input_data} \n\n\n\n{custom_prompt_arg}"
    logging.debug(f"openrouter: User Prompt being sent is {openrouter_prompt}")

    if temp is None:
        temp = 0.1
    temp = float(temp)
    if system_message is None:
        system_message = "You are a helpful AI assistant who does whatever the user requests."

    try:
        logging.debug("OpenRouter: Submitting request to API endpoint")
        print("OpenRouter: Submitting request to API endpoint")
        response = requests.post(
            url="https://openrouter.ai/api/v1/chat/completions",
            headers={
                "Authorization": f"Bearer {openrouter_api_key}",
            },
            data=json.dumps({
                "model": openrouter_model,
                "messages": [
                    {"role": "system", "content": system_message},
                    {"role": "user", "content": openrouter_prompt}
                ],
                "temperature": temp
            })
        )

        response_data = response.json()
        logging.debug("Full API Response Data: %s", response_data)

        if response.status_code == 200:
            if 'choices' in response_data and len(response_data['choices']) > 0:
                summary = response_data['choices'][0]['message']['content'].strip()
                logging.debug("openrouter: Chat request successful")
                print("openrouter: Chat request successful.")
                return summary
            else:
                logging.error("openrouter: Expected data not found in API response.")
                return "openrouter: Expected data not found in API response."
        else:
            logging.error(f"openrouter:  API request failed with status code {response.status_code}: {response.text}")
            return f"openrouter: API request failed: {response.text}"
    except Exception as e:
        logging.error("openrouter: Error in processing: %s", str(e))
        return f"openrouter: Error occurred while processing chat request with openrouter: {str(e)}"


# FIXME: This function is not yet implemented properly
def chat_with_huggingface(api_key, input_data, custom_prompt_arg, system_prompt=None, temp=None):
    loaded_config_data = load_and_log_configs()
    logging.debug(f"huggingface Chat: Chat request process starting...")
    try:
        # API key validation
        if not api_key or api_key.strip() == "":
            logging.info("HuggingFace Chat: API key not provided as parameter")
            logging.info("HuggingFace Chat: Attempting to use API key from config file")

        huggingface_api_key = loaded_config_data['api_keys'].get('huggingface')
        logging.debug(f"HuggingFace Chat: API key from config: {huggingface_api_key[:5]}...{huggingface_api_key[-5:]}")

        if huggingface_api_key is None or huggingface_api_key.strip() == "":
            logging.error("HuggingFace Chat: API key not found or is empty")
            return "HuggingFace Chat: API Key Not Provided/Found in Config file or is empty"
        if huggingface_api_key:
            logging.info("HuggingFace Chat: Using API key from config file")
        headers = {
            "Authorization": f"Bearer {huggingface_api_key}"
        }

        # Setup model
        huggingface_model = loaded_config_data['models']['huggingface']

        API_URL = f"https://api-inference.huggingface.co/models/{huggingface_model}/v1/chat/completions"
        if temp is None:
            temp = 1.0
        temp = float(temp)
        huggingface_prompt = f"{custom_prompt_arg}\n\n\n{input_data}"
        logging.debug(f"HuggingFace chat: Prompt being sent is {huggingface_prompt}")
        data = {
            "model": f"{huggingface_model}",
            "messages": [{"role": "user", "content": f"{huggingface_prompt}"}],
            "max_tokens": 4096,
            "stream": False,
            "temperature": temp
        }

        logging.debug("HuggingFace Chat: Submitting request...")
        response = requests.post(API_URL, headers=headers, json=data)
        logging.debug(f"Full API response data: {response.text}")

        if response.status_code == 200:
            response_json = response.json()
            if "choices" in response_json and len(response_json["choices"]) > 0:
                generated_text = response_json["choices"][0]["message"]["content"]
                logging.debug("HuggingFace Chat: Chat request successful")
                print("HuggingFace Chat: Chat request successful.")
                return generated_text.strip()
            else:
                logging.error("HuggingFace Chat: No generated text in the response")
                return "HuggingFace Chat: No generated text in the response"
        else:
            logging.error(
                f"HuggingFace Chat: Chat request failed with status code {response.status_code}: {response.text}")
            return f"HuggingFace Chat: Failed to process chat request, status code {response.status_code}: {response.text}"
    except Exception as e:
        logging.error(f"HuggingFace Chat: Error in processing: {str(e)}")
        print(f"HuggingFace Chat: Error occurred while processing chat request with huggingface: {str(e)}")
        return None


def chat_with_deepseek(api_key, input_data, custom_prompt_arg, temp=0.1, system_message="You are a helpful AI assistant who does whatever the user requests.", max_retries=3, retry_delay=5):
    """

    Interacts with the DeepSeek API to generate summaries based on input data.



    Parameters:

        api_key (str): DeepSeek API key. If not provided, the key from the config is used.

        input_data (str or list): The data to summarize. Can be a string or a list of segments.

        custom_prompt_arg (str): Custom prompt to append to the input data.

        temp (float, optional): Temperature setting for the model. Defaults to 0.1.

        system_message (str, optional): System prompt for the assistant. Defaults to a helpful assistant message.

        max_retries (int, optional): Maximum number of retries for failed API calls. Defaults to 3.

        retry_delay (int, optional): Delay between retries in seconds. Defaults to 5.



    Returns:

        str: The summary generated by DeepSeek or an error message.

    """
    logging.debug("DeepSeek: Summarization process starting...")
    try:
        logging.debug("DeepSeek: Loading and validating configurations")
        loaded_config_data = load_and_log_configs()
        if loaded_config_data is None:
            logging.error("DeepSeek: Failed to load configuration data")
            return "DeepSeek: Failed to load configuration data."

        # Prioritize the API key passed as a parameter
        if api_key and api_key.strip():
            deepseek_api_key = api_key.strip()
            logging.info("DeepSeek: Using API key provided as parameter")
        else:
            # If no parameter is provided, use the key from the config
            deepseek_api_key = loaded_config_data['api_keys'].get('deepseek')
            if deepseek_api_key and deepseek_api_key.strip():
                deepseek_api_key = deepseek_api_key.strip()
                logging.info("DeepSeek: Using API key from config file")
            else:
                logging.error("DeepSeek: No valid API key available")
                return "DeepSeek: API Key Not Provided/Found in Config file or is empty"

        logging.debug("DeepSeek: Using API Key")

        # Input data handling
        if isinstance(input_data, str) and os.path.isfile(input_data):
            logging.debug("DeepSeek: Loading JSON data for summarization")
            with open(input_data, 'r', encoding='utf-8') as file:
                try:
                    data = json.load(file)
                except json.JSONDecodeError as e:
                    logging.error(f"DeepSeek: JSON decoding failed: {str(e)}")
                    return f"DeepSeek: Invalid JSON file. Error: {str(e)}"
        else:
            logging.debug("DeepSeek: Using provided string data for summarization")
            data = input_data

        # DEBUG - Debug logging to identify sent data
        if isinstance(data, str):
            snipped_data = data[:500] + "..." if len(data) > 500 else data
            logging.debug(f"DeepSeek: Loaded data (snipped to first 500 chars): {snipped_data}")
        elif isinstance(data, list):
            snipped_data = json.dumps(data[:2], indent=2) + "..." if len(data) > 2 else json.dumps(data, indent=2)
            logging.debug(f"DeepSeek: Loaded data (snipped to first 2 segments): {snipped_data}")
        else:
            logging.debug(f"DeepSeek: Loaded data: {data}")

        logging.debug(f"DeepSeek: Type of data: {type(data)}")

        if isinstance(data, dict) and 'summary' in data:
            # If the loaded data is a dictionary and already contains a summary, return it
            logging.debug("DeepSeek: Summary already exists in the loaded data")
            return data['summary']

        # Text extraction
        if isinstance(data, list):
            segments = data
            try:
                text = extract_text_from_segments(segments)
                logging.debug("DeepSeek: Extracted text from segments")
            except Exception as e:
                logging.error(f"DeepSeek: Error extracting text from segments: {str(e)}")
                return f"DeepSeek: Error extracting text from segments: {str(e)}"
        elif isinstance(data, str):
            text = data
            logging.debug("DeepSeek: Using string data directly")
        else:
            raise ValueError("DeepSeek: Invalid input data format")

        # Retrieve the model from config if not provided
        deepseek_model = loaded_config_data['models'].get('deepseek', "deepseek-chat")
        logging.debug(f"DeepSeek: Using model: {deepseek_model}")

        # Ensure temperature is a float within acceptable range
        try:
            temp = float(temp)
            if not (0.0 <= temp <= 1.0):
                logging.warning("DeepSeek: Temperature out of bounds (0.0 - 1.0). Setting to default 0.1")
                temp = 0.1
        except (ValueError, TypeError):
            logging.warning("DeepSeek: Invalid temperature value. Setting to default 0.1")
            temp = 0.1

        # Set default system prompt if not provided
        if system_message is not None:
            logging.debug("DeepSeek: Using provided system prompt")
        else:
            system_message = "You are a helpful AI assistant who does whatever the user requests."
            logging.debug("DeepSeek: Using default system prompt")

        headers = {
            'Authorization': f'Bearer {deepseek_api_key}',
            'Content-Type': 'application/json'
        }

        logging.debug("DeepSeek: Preparing data and prompt for submittal")
        deepseek_prompt = f"{text}\n\n\n\n{custom_prompt_arg}"
        payload = {
            "model": deepseek_model,
            "messages": [
                {"role": "system", "content": system_message},
                {"role": "user", "content": deepseek_prompt}
            ],
            "stream": False,
            "temperature": temp
        }

        logging.debug("DeepSeek: Posting request to API")
        for attempt in range(1, max_retries + 1):
            try:
                response = requests.post('https://api.deepseek.com/chat/completions', headers=headers, json=payload, timeout=30)
                logging.debug(f"DeepSeek: Full API response: {response.status_code} - {response.text}")

                if response.status_code == 200:
                    response_data = response.json()
                    logging.debug(f"DeepSeek: Response JSON: {json.dumps(response_data, indent=2)}")

                    # Adjust parsing based on actual API response structure
                    if 'choices' in response_data:
                        if len(response_data['choices']) > 0:
                            summary = response_data['choices'][0]['message']['content'].strip()
                            logging.debug("DeepSeek: Chat request successful")
                            return summary
                        else:
                            logging.error("DeepSeek: 'choices' key is empty in response")
                    else:
                        logging.error("DeepSeek: 'choices' key missing in response")
                    return "DeepSeek: Unexpected response format from API."
                elif 500 <= response.status_code < 600:
                    logging.error(f"DeepSeek: Server error (status code {response.status_code}). Attempt {attempt} of {max_retries}. Retrying in {retry_delay} seconds...")
                else:
                    logging.error(f"DeepSeek: Request failed with status code {response.status_code}. Response: {response.text}")
                    return f"DeepSeek: Failed to process chat request. Status code: {response.status_code}"

            except requests.Timeout:
                logging.error(f"DeepSeek: Request timed out. Attempt {attempt} of {max_retries}. Retrying in {retry_delay} seconds...")
            except requests.RequestException as e:
                logging.error(f"DeepSeek: Request exception occurred: {str(e)}. Attempt {attempt} of {max_retries}. Retrying in {retry_delay} seconds...")

            if attempt < max_retries:
                time.sleep(retry_delay)
            else:
                logging.error("DeepSeek: Max retries reached. Failed to get a successful response.")
                return "DeepSeek: Failed to get a successful response from API after multiple attempts."

    except Exception as e:
        logging.error(f"DeepSeek: Unexpected error in processing: {str(e)}", exc_info=True)
        return f"DeepSeek: Error occurred while processing chat request: {str(e)}"




def chat_with_mistral(api_key, input_data, custom_prompt_arg, temp=None, system_message=None):
    logging.debug("Mistral: Chat request made")
    try:
        logging.debug("Mistral: Loading and validating configurations")
        loaded_config_data = load_and_log_configs()
        if loaded_config_data is None:
            logging.error("Failed to load configuration data")
            mistral_api_key = None
        else:
            # Prioritize the API key passed as a parameter
            if api_key and api_key.strip():
                mistral_api_key = api_key
                logging.info("Mistral: Using API key provided as parameter")
            else:
                # If no parameter is provided, use the key from the config
                mistral_api_key = loaded_config_data['api_keys'].get('mistral')
                if mistral_api_key:
                    logging.info("Mistral: Using API key from config file")
                else:
                    logging.warning("Mistral: No API key found in config file")

        # Final check to ensure we have a valid API key
        if not mistral_api_key or not mistral_api_key.strip():
            logging.error("Mistral: No valid API key available")
            return "Mistral: No valid API key available"

        logging.debug(f"Mistral: Using API Key: {mistral_api_key[:5]}...{mistral_api_key[-5:]}")

        logging.debug("Mistral: Using provided string data")
        data = input_data

        # Text extraction
        if isinstance(input_data, list):
            text = extract_text_from_segments(input_data)
        elif isinstance(input_data, str):
            text = input_data
        else:
            raise ValueError("Mistral: Invalid input data format")

        mistral_model = loaded_config_data['models'].get('mistral', "mistral-large-latest")

        temp = float(temp) if temp is not None else 0.2
        if system_message is None:
            system_message = "You are a helpful AI assistant who does whatever the user requests."

        headers = {
            'Authorization': f'Bearer {mistral_api_key}',
            'Content-Type': 'application/json'
        }

        logging.debug(
            f"Deepseek API Key: {mistral_api_key[:5]}...{mistral_api_key[-5:] if mistral_api_key else None}")
        logging.debug("Mistral: Preparing data + prompt for submittal")
        mistral_prompt = f"{custom_prompt_arg}\n\n\n\n{text} "
        data = {
            "model": mistral_model,
            "messages": [
                {"role": "system",
                 "content": system_message},
                {"role": "user",
                "content": mistral_prompt}
            ],
            "temperature": temp,
            "top_p": 1,
            "max_tokens": 4096,
            "stream": False,
            "safe_prompt": False
        }

        logging.debug("Mistral: Posting request")
        response = requests.post('https://api.mistral.ai/v1/chat/completions', headers=headers, json=data)
        logging.debug(f"Full API response data: {response}")
        if response.status_code == 200:
            response_data = response.json()
            logging.debug(response_data)
            if 'choices' in response_data and len(response_data['choices']) > 0:
                summary = response_data['choices'][0]['message']['content'].strip()
                logging.debug("Mistral: request successful")
                return summary
            else:
                logging.warning("Mistral: Chat response not found in the response data")
                return "Mistral: Chat response not available"
        else:
            logging.error(f"Mistral: Chat request failed with status code {response.status_code}")
            logging.error(f"Mistral: Error response: {response.text}")
            return f"Mistral: Failed to process summary. Status code: {response.status_code}. Error: {response.text}"
    except Exception as e:
        logging.error(f"Mistral: Error in processing: {str(e)}", exc_info=True)
        return f"Mistral: Error occurred while processing Chat: {str(e)}"



# Stashed in here since OpenAI usage.... #FIXME
# FIXME - https://docs.vllm.ai/en/latest/getting_started/quickstart.html .... Great docs.
# def chat_with_vllm(input_data, custom_prompt_input, api_key=None, vllm_api_url="http://127.0.0.1:8000/v1/chat/completions", system_prompt=None):
#     loaded_config_data = load_and_log_configs()
#     llm_model = loaded_config_data['models']['vllm']
#     # API key validation
#     if api_key is None:
#         logging.info("vLLM: API key not provided as parameter")
#         logging.info("vLLM: Attempting to use API key from config file")
#         api_key = loaded_config_data['api_keys']['llama']
#
#     if api_key is None or api_key.strip() == "":
#         logging.info("vLLM: API key not found or is empty")
#     vllm_client = OpenAI(
#         base_url=vllm_api_url,
#         api_key=custom_prompt_input
#     )
#
#     if isinstance(input_data, str) and os.path.isfile(input_data):
#         logging.debug("vLLM: Loading json data for summarization")
#         with open(input_data, 'r') as file:
#             data = json.load(file)
#     else:
#         logging.debug("vLLM: Using provided string data for summarization")
#         data = input_data
#
#     logging.debug(f"vLLM: Loaded data: {data}")
#     logging.debug(f"vLLM: Type of data: {type(data)}")
#
#     if isinstance(data, dict) and 'summary' in data:
#         # If the loaded data is a dictionary and already contains a summary, return it
#         logging.debug("vLLM: Summary already exists in the loaded data")
#         return data['summary']
#
#     # If the loaded data is a list of segment dictionaries or a string, proceed with summarization
#     if isinstance(data, list):
#         segments = data
#         text = extract_text_from_segments(segments)
#     elif isinstance(data, str):
#         text = data
#     else:
#         raise ValueError("Invalid input data format")
#
#
#     custom_prompt = custom_prompt_input
#
#     completion = client.chat.completions.create(
#         model=llm_model,
#         messages=[
#             {"role": "system", "content": f"{system_prompt}"},
#             {"role": "user", "content": f"{text} \n\n\n\n{custom_prompt}"}
#         ]
#     )
#     vllm_summary = completion.choices[0].message.content
#     return vllm_summary



#
#
#######################################################################################################################