Spaces:

traversaal-internal
/

pro-search-api

Sleeping

File size: 2,542 Bytes

b9ffe6a
500c1ba
 
 
b9ffe6a
500c1ba
a45abf6
500c1ba
 
 
b9ffe6a
 
 
1963268
3bfdeb9
b9ffe6a
46fb1dc
 
 
 
 
 
0c1bd23
46fb1dc
 
 
 
500c1ba
 
 
 
 
 
 
 
b687ff9
500c1ba
 
 
 
 
 
 
 
 
 
b687ff9
c6afeca
 
500c1ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b9ffe6a


import logging
import os
from openai import OpenAI
from openai import AzureOpenAI
from openai import OpenAIError, RateLimitError
# from config import OPENAI_API_KEY, AZURE_OPENAI_KEY

# Initialize the OpenAI client with the API key from the environment variable
#api_key = os.getenv('OPENAI_API_KEY')
# client = OpenAI(api_key=OPENAI_API_KEY)

AZURE_OPENAI_ENDPOINT = os.getenv('AZURE_OPENAI_ENDPOINT')
AZURE_API_VERSION = "2023-03-15-preview"  # API version for Azure OpenAI
AZURE_OPENAI_KEY = os.getenv('AZURE_OPENAI_API_KEY')
# Initialize the Azure OpenAI client with the endpoint and API key
try:
    client = AzureOpenAI(
        azure_endpoint=AZURE_OPENAI_ENDPOINT,
        api_key=AZURE_OPENAI_KEY,
        api_version=AZURE_API_VERSION
    )
    print('Azure OpenAI working')
except Exception as e:
    logging.error(f"Azure OpenAI API error: {e}")
    
    

def generate_rag_response(json_output, user_query):
    logging.info("Generating RAG response")
    # Extract text from the JSON output
    context_texts = [hit['chunk_text'] for hit in json_output]

    # Create the context for the prompt
    context = "\n".join(context_texts)
    prompt = f"Based on the given context, answer the user query: {user_query}\nContext:\n{context} and Employ references to the ID of articles provided [ID], ensuring their relevance to the query. The referencing should always be in the format of [1][2]... etc. </instructions> "

    main_prompt = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ]

    try:
        # Create a chat completion request
        chat_completion = client.chat.completions.create(
            messages=main_prompt,
            model="urdu-llama",  # Use the gpt-4o-mini model
            max_tokens=2000,  # Limit the maximum number of tokens in the response    
            temperature=0.5  
        )
        # Log the response from the model
        logging.info("RAG response generation completed")
        logging.info(f"RAG response: {chat_completion.choices[0].message.content}")
        return chat_completion.choices[0].message.content, None

    except RateLimitError as e:
        logging.error(f"Rate limit exceeded: {e}")
        return None, "Rate limit exceeded. Please try again later."
    except OpenAIError as e:
        logging.error(f"OpenAI API error: {e}")
        return None, f"An error occurred: {str(e)}"
    except Exception as e:
        logging.error(f"Unexpected error: {e}")
        return None, str(e)