File size: 2,881 Bytes
2217335
 
 
 
 
 
 
 
 
 
 
f5848c0
2217335
1cdd61e
 
2217335
 
 
 
 
 
 
 
 
395860b
2217335
 
 
015c03e
2217335
 
 
c774338
2217335
c8bd9ca
2217335
 
 
 
 
 
 
 
 
 
15c94ad
47523db
2217335
 
 
47523db
c8bd9ca
2217335
 
 
 
359b142
c774338
 
 
 
 
 
1983ef1
c774338
 
 
843fee2
084159d
1983ef1
c774338
1983ef1
2217335
c8bd9ca
2217335
c774338
1983ef1
c774338
 
2217335
c774338
2217335
 
 
 
1983ef1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import logging
import os
import requests



from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings


class RAG:
    NO_ANSWER_MESSAGE: str = "Ho sento, no he pogut respondre la teva pregunta."

    vectorstore = "index-intfloat_multilingual-e5-small-500-100-CA-ES" # mixed
    #vectorstore = "vectorestore" # CA only

    def __init__(self, hf_token, embeddings_model, model_name):


        self.model_name = model_name
        self.hf_token = hf_token
        
        # load vectore store
        embeddings = HuggingFaceEmbeddings(model_name=embeddings_model, model_kwargs={'device': 'cpu'})
        self.vectore_store = FAISS.load_local("index-intfloat_multilingual-e5-small-500-100-CA-ES", embeddings, allow_dangerous_deserialization=True)#, allow_dangerous_deserialization=True)

        logging.info("RAG loaded!")
    
    def get_context(self, instruction, number_of_contexts=1):

        documentos = self.vectore_store.similarity_search_with_score(instruction, k=number_of_contexts)

        return documentos
        
    def predict(self, instruction, context, model_parameters):

        api_key = os.getenv("HF_TOKEN")


        headers = {
        "Accept" : "application/json",
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json" 
        }

        query = f"### Instruction\n{instruction}\n\n### Context\n{context}\n\n### Answer\n "
        #prompt = "You are a helpful assistant. Answer the question using only the context you are provided with. If it is not possible to do it with the context, just say 'I can't answer'. <|endoftext|>"


        payload = {
        "inputs": query,
        "parameters": model_parameters
        }
        
        response = requests.post(self.model_name, headers=headers, json=payload)

        return response.json()[0]["generated_text"].split("###")[-1][8:]
    
    def beautiful_context(self, docs):

        text_context = ""

        full_context = ""
        source_context = []
        for doc in docs:
            text_context += doc[0].page_content
            full_context += doc[0].page_content + "\n"
            full_context += doc[0].metadata["Títol de la norma"] + "\n\n"
            full_context += doc[0].metadata["url"] + "\n\n"
            source_context.append(doc[0].metadata["url"])

        return text_context, full_context, source_context

    def get_response(self, prompt: str, model_parameters: dict) -> str:
        
        docs = self.get_context(prompt, model_parameters["NUM_CHUNKS"])
        text_context, full_context, source = self.beautiful_context(docs)

        del model_parameters["NUM_CHUNKS"]

        response = self.predict(prompt, text_context, model_parameters)

        if not response:
            return self.NO_ANSWER_MESSAGE

        return response, full_context, source