File size: 2,542 Bytes
b9ffe6a
500c1ba
 
 
b9ffe6a
500c1ba
a45abf6
500c1ba
 
 
b9ffe6a
 
 
1963268
3bfdeb9
b9ffe6a
46fb1dc
 
 
 
 
 
0c1bd23
46fb1dc
 
 
 
500c1ba
 
 
 
 
 
 
 
b687ff9
500c1ba
 
 
 
 
 
 
 
 
 
b687ff9
c6afeca
 
500c1ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b9ffe6a
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70

import logging
import os
from openai import OpenAI
from openai import AzureOpenAI
from openai import OpenAIError, RateLimitError
# from config import OPENAI_API_KEY, AZURE_OPENAI_KEY

# Initialize the OpenAI client with the API key from the environment variable
#api_key = os.getenv('OPENAI_API_KEY')
# client = OpenAI(api_key=OPENAI_API_KEY)

AZURE_OPENAI_ENDPOINT = os.getenv('AZURE_OPENAI_ENDPOINT')
AZURE_API_VERSION = "2023-03-15-preview"  # API version for Azure OpenAI
AZURE_OPENAI_KEY = os.getenv('AZURE_OPENAI_API_KEY')
# Initialize the Azure OpenAI client with the endpoint and API key
try:
    client = AzureOpenAI(
        azure_endpoint=AZURE_OPENAI_ENDPOINT,
        api_key=AZURE_OPENAI_KEY,
        api_version=AZURE_API_VERSION
    )
    print('Azure OpenAI working')
except Exception as e:
    logging.error(f"Azure OpenAI API error: {e}")
    
    

def generate_rag_response(json_output, user_query):
    logging.info("Generating RAG response")
    # Extract text from the JSON output
    context_texts = [hit['chunk_text'] for hit in json_output]

    # Create the context for the prompt
    context = "\n".join(context_texts)
    prompt = f"Based on the given context, answer the user query: {user_query}\nContext:\n{context} and Employ references to the ID of articles provided [ID], ensuring their relevance to the query. The referencing should always be in the format of [1][2]... etc. </instructions> "

    main_prompt = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ]

    try:
        # Create a chat completion request
        chat_completion = client.chat.completions.create(
            messages=main_prompt,
            model="urdu-llama",  # Use the gpt-4o-mini model
            max_tokens=2000,  # Limit the maximum number of tokens in the response    
            temperature=0.5  
        )
        # Log the response from the model
        logging.info("RAG response generation completed")
        logging.info(f"RAG response: {chat_completion.choices[0].message.content}")
        return chat_completion.choices[0].message.content, None

    except RateLimitError as e:
        logging.error(f"Rate limit exceeded: {e}")
        return None, "Rate limit exceeded. Please try again later."
    except OpenAIError as e:
        logging.error(f"OpenAI API error: {e}")
        return None, f"An error occurred: {str(e)}"
    except Exception as e:
        logging.error(f"Unexpected error: {e}")
        return None, str(e)