Spaces:

himel06
/

BanglaRAG

Runtime error

App Files Files Community

himel06 commited on Aug 17

Commit

83ffd72

•

1 Parent(s): 0b0e325

Update BanglaRAG/bangla_rag_pipeline.py

Browse files

Files changed (1) hide show

BanglaRAG/bangla_rag_pipeline.py +23 -30

BanglaRAG/bangla_rag_pipeline.py CHANGED Viewed

@@ -26,7 +26,6 @@ warnings.filterwarnings("ignore")
 class BanglaRAGChain:
     """
     Bangla Retrieval-Augmented Generation (RAG) Chain for question answering.
     This class uses a HuggingFace/local language model for text generation, a Chroma vector database for
     document retrieval, and a custom prompt template to create a RAG chain that can generate
     responses to user queries in Bengali.
@@ -74,7 +73,6 @@ class BanglaRAGChain:
     ):
         """
         Loads the required models and data for the RAG chain.
         Args:
             chat_model_id (str): The Hugging Face model ID for the chat model.
             embed_model_id (str): The Hugging Face model ID for the embedding model.
@@ -119,6 +117,7 @@ class BanglaRAGChain:
         rprint(Panel("[bold green]Initializing LLM...", expand=False))
         self._get_llm()
         rprint(Panel("[bold green]Creating chain...", expand=False))
         self._create_chain()
@@ -141,17 +140,14 @@ class BanglaRAGChain:
                     low_cpu_mem_usage=True,
                     quantization_config=bnb_config,
                     device_map="auto",
-                    # cache_dir=CACHE_DIR,  # Removed cache_dir to use default caching
                 )
                 rprint(Panel("[bold green]Applied 4bit quantization successfully", expand=False))
             else:
                 self.chat_model = AutoModelForCausalLM.from_pretrained(
                     self.chat_model_id,
                     torch_dtype=torch.float16,
                     low_cpu_mem_usage=True,
                     device_map="auto",
-                    # cache_dir=CACHE_DIR,  # Removed cache_dir to use default caching
                 )
             rprint(Panel("[bold green]Chat Model loaded successfully!", expand=False))
         except Exception as e:
@@ -194,9 +190,8 @@ class BanglaRAGChain:
                 )
                 rprint(Panel(f"[bold green]Loaded embedding model successfully!", expand=False))
             except Exception as e:
-                rprint(Panel("f[red]embedding model loading failed: {e}", expand=False))
             self._db = Chroma.from_texts(texts=self._documents, embedding=embeddings)
             rprint(
                 Panel("[bold green]Chroma database updated successfully!", expand=False)
@@ -207,13 +202,10 @@ class BanglaRAGChain:
     def _create_chain(self):
         """Creates the retrieval-augmented generation (RAG) chain."""
         template = """Below is an instruction in Bengali language that describes a task, paired with an input also in Bengali language that provides further context. Write a response in Bengali that appropriately completes the request.
         ### Instruction:
         {question}
         ### Input:
         {context}
         ### Response:
         """
         prompt_template = ChatPromptTemplate(
@@ -256,7 +248,13 @@ class BanglaRAGChain:
     def _get_retriever(self):
         """Creates a retriever for the vector database."""
-        self._retriever = self._db.as_retriever(search_kwargs={"k": self.k})
     def _get_llm(self):
         """Initializes the language model using the Hugging Face pipeline."""
@@ -280,24 +278,19 @@ class BanglaRAGChain:
             rprint(Panel("[bold green]LLM initialized successfully!", expand=False))
         except Exception as e:
             rprint(Panel(f"[red]LLM initialization failed: {e}", expand=False))
-    def _format_docs(self, docs):
-        """Formats the retrieved documents for the prompt."""
-        formatted_docs = "\n".join([re.sub(r"\s+", " ", doc) for doc in docs])
-        return formatted_docs
-    def query(self, prompt: str) -> str:
-        """
-        Queries the RAG chain with a given prompt.
-        Args:
-            prompt (str): The input prompt to query the RAG chain.
-        Returns:
-            str: The generated response from the RAG chain.
-        """
-        return self._chain.invoke({"question": prompt})
-    def __call__(self, prompt: str) -> str:
-        """Alias for the query method."""
-        return self.query(prompt)

 class BanglaRAGChain:
     """
     Bangla Retrieval-Augmented Generation (RAG) Chain for question answering.
     This class uses a HuggingFace/local language model for text generation, a Chroma vector database for
     document retrieval, and a custom prompt template to create a RAG chain that can generate
     responses to user queries in Bengali.
     ):
         """
         Loads the required models and data for the RAG chain.
         Args:
             chat_model_id (str): The Hugging Face model ID for the chat model.
             embed_model_id (str): The Hugging Face model ID for the embedding model.
         rprint(Panel("[bold green]Initializing LLM...", expand=False))
         self._get_llm()
         rprint(Panel("[bold green]Creating chain...", expand=False))
         self._create_chain()
                     low_cpu_mem_usage=True,
                     quantization_config=bnb_config,
                     device_map="auto",
                 )
                 rprint(Panel("[bold green]Applied 4bit quantization successfully", expand=False))
             else:
                 self.chat_model = AutoModelForCausalLM.from_pretrained(
                     self.chat_model_id,
                     torch_dtype=torch.float16,
                     low_cpu_mem_usage=True,
                     device_map="auto",
                 )
             rprint(Panel("[bold green]Chat Model loaded successfully!", expand=False))
         except Exception as e:
                 )
                 rprint(Panel(f"[bold green]Loaded embedding model successfully!", expand=False))
             except Exception as e:
+                rprint(Panel(f"[red]embedding model loading failed: {e}", expand=False))
             self._db = Chroma.from_texts(texts=self._documents, embedding=embeddings)
             rprint(
                 Panel("[bold green]Chroma database updated successfully!", expand=False)
     def _create_chain(self):
         """Creates the retrieval-augmented generation (RAG) chain."""
         template = """Below is an instruction in Bengali language that describes a task, paired with an input also in Bengali language that provides further context. Write a response in Bengali that appropriately completes the request.
         ### Instruction:
         {question}
         ### Input:
         {context}
         ### Response:
         """
         prompt_template = ChatPromptTemplate(
     def _get_retriever(self):
         """Creates a retriever for the vector database."""
+        try:
+            self._retriever = self._db.as_retriever(
+                search_type="similarity", search_kwargs={"k": self.k}
+            )
+            rprint(Panel("[bold green]Retriever created successfully!", expand=False))
+        except Exception as e:
+            rprint(Panel(f"[red]Retriever creation failed: {e}", expand=False))
     def _get_llm(self):
         """Initializes the language model using the Hugging Face pipeline."""
             rprint(Panel("[bold green]LLM initialized successfully!", expand=False))
         except Exception as e:
             rprint(Panel(f"[red]LLM initialization failed: {e}", expand=False))
+            self._llm = None  # Ensure it’s set to None on failure
+    def __call__(self, query):
+        """Runs the RAG chain on a user query and returns the generated answer."""
+        if not self._chain:
+            raise ValueError("The chain has not been initialized.")
+        if self._chain:
+            result = self._chain.invoke({"question": query})
+            return result["answer"], result["context"]
+    def _format_docs(self, docs):
+        """Formats retrieved documents into a string format."""
+        context = ""
+        for i, doc in enumerate(docs):
+            context += f"\nDocument {i + 1}:\n{doc.page_content}\n\n"
+        return context