hf-qa-demo

Runtime error

App Files Files Community

KonradSzafer commited on Feb 23

Commit

988981a

•

1 Parent(s): 5195c5a

config update

Browse files

Files changed (11) hide show

api/__main__.py +1 -11
app.py +2 -15
benchmark/__main__.py +1 -10
config/.env.example +11 -3
discord_bot/__main__.py +2 -16
discord_bot/client/client.py +17 -33
qa_engine/config.py +9 -1
qa_engine/logger.py +4 -78
qa_engine/mocks.py +1 -1
qa_engine/qa_engine.py +54 -82
requirements.txt +0 -1

api/__main__.py CHANGED Viewed

@@ -6,17 +6,7 @@ from qa_engine import logger, Config, QAEngine
 config = Config()
 app = FastAPI()
-qa_engine = QAEngine(
-    llm_model_id=config.question_answering_model_id,
-    embedding_model_id=config.embedding_model_id,
-    index_repo_id=config.index_repo_id,
-    prompt_template=config.prompt_template,
-    use_docs_for_context=config.use_docs_for_context,
-    num_relevant_docs=config.num_relevant_docs,
-    add_sources_to_response=config.add_sources_to_response,
-    use_messages_for_context=config.use_messages_in_context,
-    debug=config.debug
-)
 @app.get('/')

 config = Config()
 app = FastAPI()
+qa_engine = QAEngine(config=config)
 @app.get('/')

app.py CHANGED Viewed

@@ -8,16 +8,7 @@ from discord_bot import DiscordClient
 config = Config()
-qa_engine = QAEngine(
-    llm_model_id=config.question_answering_model_id,
-    embedding_model_id=config.embedding_model_id,
-    index_repo_id=config.index_repo_id,
-    prompt_template=config.prompt_template,
-    use_docs_for_context=config.use_docs_for_context,
-    add_sources_to_response=config.add_sources_to_response,
-    use_messages_for_context=config.use_messages_in_context,
-    debug=config.debug
-)
 def gradio_interface():
@@ -41,11 +32,7 @@ def gradio_interface():
 def discord_bot_inference_thread():
     client = DiscordClient(
         qa_engine=qa_engine,
-        channel_ids=config.discord_channel_ids,
-        num_last_messages=config.num_last_messages,
-        use_names_in_context=config.use_names_in_context,
-        enable_commands=config.enable_commands,
-        debug=config.debug
     )
     client.run(config.discord_token)

 config = Config()
+qa_engine = QAEngine(config=config)
 def gradio_interface():
 def discord_bot_inference_thread():
     client = DiscordClient(
         qa_engine=qa_engine,
+        config=config
     )
     client.run(config.discord_token)

benchmark/__main__.py CHANGED Viewed

@@ -10,16 +10,7 @@ from qa_engine import logger, Config, QAEngine
 QUESTIONS_FILENAME = 'benchmark/questions.json'
 config = Config()
-qa_engine = QAEngine(
-    llm_model_id=config.question_answering_model_id,
-    embedding_model_id=config.embedding_model_id,
-    index_repo_id=config.index_repo_id,
-    prompt_template=config.prompt_template,
-    use_docs_for_context=config.use_docs_for_context,
-    add_sources_to_response=config.add_sources_to_response,
-    use_messages_for_context=config.use_messages_in_context,
-    debug=config.debug
-)
 def main():

 QUESTIONS_FILENAME = 'benchmark/questions.json'
 config = Config()
+qa_engine = QAEngine(config=config)
 def main():

config/.env.example CHANGED Viewed

@@ -1,7 +1,7 @@
 # QA engine settings
-QUESTION_ANSWERING_MODEL_ID=hf-question-answering-model-ID
-EMBEDDING_MODEL_ID=hf-embedding-model-ID
-INDEX_REPO_ID=hf-index-ID
 PROMPT_TEMPLATE_NAME=llama
 USE_DOCS_FOR_CONTEXT=True
 NUM_RELEVANT_DOCS=4
@@ -9,6 +9,14 @@ ADD_SOURCES_TO_RESPONSE=True
 USE_MESSAGES_IN_CONTEXT=True
 DEBUG=True
 # Discord settings
 DISCORD_TOKEN=your-bot-token
 NUM_LAST_MESSAGES=1

 # QA engine settings
+QUESTION_ANSWERING_MODEL_ID=mock
+EMBEDDING_MODEL_ID=hkunlp/instructor-large
+INDEX_REPO_ID=KonradSzafer/index-instructor-large-812-m512-all_repos_above_50_stars
 PROMPT_TEMPLATE_NAME=llama
 USE_DOCS_FOR_CONTEXT=True
 NUM_RELEVANT_DOCS=4
 USE_MESSAGES_IN_CONTEXT=True
 DEBUG=True
+# Model settings
+MIN_NEW_TOKENS=64
+MAX_NEW_TOKENS=800
+TEMPERATURE=0.6
+TOP_K=50
+TOP_P=0.9
+DO_SAMPLE=True
 # Discord settings
 DISCORD_TOKEN=your-bot-token
 NUM_LAST_MESSAGES=1

discord_bot/__main__.py CHANGED Viewed

@@ -3,24 +3,10 @@ from discord_bot.client import DiscordClient
 config = Config()
-qa_engine = QAEngine(
-    llm_model_id=config.question_answering_model_id,
-    embedding_model_id=config.embedding_model_id,
-    index_repo_id=config.index_repo_id,
-    prompt_template=config.prompt_template,
-    use_docs_for_context=config.use_docs_for_context,
-    num_relevant_docs=config.num_relevant_docs,
-    add_sources_to_response=config.add_sources_to_response,
-    use_messages_for_context=config.use_messages_in_context,
-    debug=config.debug
-)
 client = DiscordClient(
     qa_engine=qa_engine,
-    channel_ids=config.discord_channel_ids,
-    num_last_messages=config.num_last_messages,
-    use_names_in_context=config.use_names_in_context,
-    enable_commands=config.enable_commands,
-    debug=config.debug
 )

 config = Config()
+qa_engine = QAEngine(config=config)
 client = DiscordClient(
     qa_engine=qa_engine,
+    config=config
 )

discord_bot/client/client.py CHANGED Viewed

@@ -4,56 +4,40 @@ from urllib.parse import quote
 import discord
 from typing import List
-from qa_engine import logger, QAEngine
 from discord_bot.client.utils import split_text_into_chunks
 class DiscordClient(discord.Client):
     """
     Discord Client class, used for interacting with a Discord server.
-    Args:
-        qa_service_url (str): The URL of the question answering service.
-        num_last_messages (int, optional): The number of previous messages to use as context for generating answers.
-        Defaults to 5.
-        use_names_in_context (bool, optional): Whether to include user names in the message context. Defaults to True.
-        enable_commands (bool, optional): Whether to enable commands for the bot. Defaults to True.
-    Attributes:
-        qa_service_url (str): The URL of the question answering service.
-        num_last_messages (int): The number of previous messages to use as context for generating answers.
-        use_names_in_context (bool): Whether to include user names in the message context.
-        enable_commands (bool): Whether to enable commands for the bot.
-        max_message_len (int): The maximum length of a message.
-        system_prompt (str): The system prompt to be used.
     """
     def __init__(
         self,
         qa_engine: QAEngine,
-        channel_ids: list[int] = [],
-        num_last_messages: int = 5,
-        use_names_in_context: bool = True,
-        enable_commands: bool = True,
-        debug: bool = False
-    ):
         logger.info('Initializing Discord client...')
         intents = discord.Intents.all()
         intents.message_content = True
         super().__init__(intents=intents, command_prefix='!')
-        assert num_last_messages >= 1, \
-            'The number of last messages in context should be at least 1'
         self.qa_engine: QAEngine = qa_engine
-        self.channel_ids: list[int] = DiscordClient._process_channel_ids(channel_ids)
-        self.num_last_messages: int = num_last_messages
-        self.use_names_in_context: bool = use_names_in_context
-        self.enable_commands: bool = enable_commands
-        self.debug: bool = debug
-        self.min_messgae_len: int = 1800
         self.max_message_len: int = 2000
     @staticmethod
     def _process_channel_ids(channel_ids) -> list[int]:
@@ -103,7 +87,7 @@ class DiscordClient(discord.Client):
         chunks = split_text_into_chunks(
             text=answer,
             split_characters=['. ', ', ', '\n'],
-            min_size=self.min_messgae_len,
             max_size=self.max_message_len
         )
         for chunk in chunks:

 import discord
 from typing import List
+from qa_engine import logger, Config, QAEngine
 from discord_bot.client.utils import split_text_into_chunks
 class DiscordClient(discord.Client):
     """
     Discord Client class, used for interacting with a Discord server.
     """
     def __init__(
         self,
         qa_engine: QAEngine,
+        config: Config,
+    ):
         logger.info('Initializing Discord client...')
         intents = discord.Intents.all()
         intents.message_content = True
         super().__init__(intents=intents, command_prefix='!')
         self.qa_engine: QAEngine = qa_engine
+        self.channel_ids: list[int] = DiscordClient._process_channel_ids(
+            config.discord_channel_ids
+        )
+        self.num_last_messages: int = config.num_last_messages
+        self.use_names_in_context: bool = config.use_names_in_context
+        self.enable_commands: bool = config.enable_commands
+        self.debug: bool = config.debug
+        self.min_message_len: int = 1800
         self.max_message_len: int = 2000
+        assert all([isinstance(id, int) for id in self.channel_ids]), \
+            'All channel ids should be of type int'
+        assert self.num_last_messages >= 1, \
+            'The number of last messages in context should be at least 1'
     @staticmethod
     def _process_channel_ids(channel_ids) -> list[int]:
         chunks = split_text_into_chunks(
             text=answer,
             split_characters=['. ', ', ', '\n'],
+            min_size=self.min_message_len,
             max_size=self.max_message_len
         )
         for chunk in chunks:

qa_engine/config.py CHANGED Viewed

@@ -11,7 +11,7 @@ def get_env(env_name: str, default: Any = None, warn: bool = True) -> str:
         if default is not None:
             if warn:
                 logger.warning(
-                    f'Environment variable {env_name} not found. ' \
                     f'Using the default value: {default}.'
                 )
             return default
@@ -34,6 +34,14 @@ class Config:
     use_messages_in_context: bool = eval(get_env('USE_MESSAGES_IN_CONTEXT', 'True'))
     debug: bool = eval(get_env('DEBUG', 'True'))
     # Discord bot config - optional
     discord_token: str = get_env('DISCORD_TOKEN', '-', warn=False)
     discord_channel_ids: list[int] = get_env('DISCORD_CHANNEL_IDS', field(default_factory=list), warn=False)

         if default is not None:
             if warn:
                 logger.warning(
+                    f'Environment variable {env_name} not found.' \
                     f'Using the default value: {default}.'
                 )
             return default
     use_messages_in_context: bool = eval(get_env('USE_MESSAGES_IN_CONTEXT', 'True'))
     debug: bool = eval(get_env('DEBUG', 'True'))
+    # Model config
+    min_new_tokens: int = int(get_env('MIN_NEW_TOKENS', 64))
+    max_new_tokens: int = int(get_env('MAX_NEW_TOKENS', 800))
+    temperature: float = float(get_env('TEMPERATURE', 0.6))
+    top_k: int = int(get_env('TOP_K', 50))
+    top_p: float = float(get_env('TOP_P', 0.95))
+    do_sample: bool = eval(get_env('DO_SAMPLE', 'True'))
     # Discord bot config - optional
     discord_token: str = get_env('DISCORD_TOKEN', '-', warn=False)
     discord_channel_ids: list[int] = get_env('DISCORD_CHANNEL_IDS', field(default_factory=list), warn=False)

qa_engine/logger.py CHANGED Viewed

@@ -1,88 +1,14 @@
 import logging
-import os
-import io
-import json
-from google.cloud import bigquery
-from google.oauth2 import service_account
-from google.api_core.exceptions import GoogleAPIError
-job_config = bigquery.LoadJobConfig(
-    schema=[
-        bigquery.SchemaField("timestamp", "TIMESTAMP", mode="REQUIRED"),
-        bigquery.SchemaField("log_entry", "STRING", mode="REQUIRED"),
-    ],
-    write_disposition="WRITE_APPEND",
-)
-class BigQueryLoggingHandler(logging.Handler):
-    def __init__(self):
-        super().__init__()
-        try:
-            project_id = os.getenv("BIGQUERY_PROJECT_ID")
-            dataset_id = os.getenv("BIGQUERY_DATASET_ID")
-            table_id = os.getenv("BIGQUERY_TABLE_ID")
-            print(f"project_id: {project_id}")
-            print(f"dataset_id: {dataset_id}")
-            print(f"table_id: {table_id}")
-            service_account_info = json.loads(
-                os.getenv("GOOGLE_SERVICE_ACCOUNT_JSON")
-                .replace('"', "")
-                .replace("'", '"')
-            )
-            print(f"service_account_info: {service_account_info}")
-            print(f"service_account_info type: {type(service_account_info)}")
-            print(f"service_account_info keys: {service_account_info.keys()}")
-            credentials = service_account.Credentials.from_service_account_info(
-                service_account_info
-            )
-            self.client = bigquery.Client(credentials=credentials, project=project_id)
-            self.table_ref = self.client.dataset(dataset_id).table(table_id)
-        except Exception as e:
-            print(f"Error: {e}")
-            self.handleError(e)
-    def emit(self, record):
-        try:
-            recordstr = f"{self.format(record)}"
-            body = io.BytesIO(recordstr.encode("utf-8"))
-            job = self.client.load_table_from_file(
-                body, self.table_ref, job_config=job_config
-            )
-            job.result()
-        except GoogleAPIError as e:
-            self.handleError(e)
-        except Exception as e:
-            self.handleError(e)
-    def handleError(self, record):
-        """
-        Handle errors associated with logging.
-        This method prevents logging-related exceptions from propagating.
-        Optionally, implement more sophisticated error handling here.
-        """
-        if isinstance(record, logging.LogRecord):
-            super().handleError(record)
-        else:
-            print(f"Logging error: {record}")
 logger = logging.getLogger(__name__)
 def setup_logger() -> None:
     """
     Logger setup.
     """
     logger.setLevel(logging.DEBUG)
-    stream_formatter = logging.Formatter(
-        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
-    )
-    stream_handler = logging.StreamHandler()
-    stream_handler.setFormatter(stream_formatter)
-    logger.addHandler(stream_handler)
-    bq_handler = BigQueryLoggingHandler()
-    bq_handler.setFormatter(stream_formatter)
-    logger.addHandler(bq_handler)

 import logging
 logger = logging.getLogger(__name__)
 def setup_logger() -> None:
     """
     Logger setup.
     """
     logger.setLevel(logging.DEBUG)
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    handler = logging.StreamHandler()
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)

qa_engine/mocks.py CHANGED Viewed

@@ -10,7 +10,7 @@ class MockLocalBinaryModel(LLM):
     """
     model_path: str = None
-    llm: str = 'Mocked Response'
     def __init__(self):
         super().__init__()

     """
     model_path: str = None
+    llm: str = 'Warsaw'
     def __init__(self):
         super().__init__()

qa_engine/qa_engine.py CHANGED Viewed

@@ -16,7 +16,7 @@ from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceHubEmbeddings
 from langchain.vectorstores import FAISS
 from sentence_transformers import CrossEncoder
-from qa_engine import logger
 from qa_engine.response import Response
 from qa_engine.mocks import MockLocalBinaryModel
@@ -25,16 +25,16 @@ class LocalBinaryModel(LLM):
     model_id: str = None
     llm: None = None
-    def __init__(self, model_id: str = None):
         super().__init__()
         # pip install llama_cpp_python==0.1.39
         from llama_cpp import Llama
-        model_path = f'qa_engine/{model_id}'
-        if not os.path.exists(model_path):
-            raise ValueError(f'{model_path} does not exist')
-        self.model_id = model_id
-        self.llm = Llama(model_path=model_path, n_ctx=4096)
     def _call(self, prompt: str, stop: Optional[list[str]] = None) -> str:
         output = self.llm(
@@ -58,13 +58,19 @@ class TransformersPipelineModel(LLM):
     model_id: str = None
     pipeline: str = None
-    def __init__(self, model_id: str = None):
         super().__init__()
-        self.model_id = model_id
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
         model = AutoModelForCausalLM.from_pretrained(
-            model_id,
             torch_dtype=torch.bfloat16,
             trust_remote_code=True,
             load_in_8bit=False,
@@ -79,10 +85,12 @@ class TransformersPipelineModel(LLM):
             device_map='auto',
             eos_token_id=tokenizer.eos_token_id,
             pad_token_id=tokenizer.eos_token_id,
-            min_new_tokens=64,
-            max_new_tokens=800,
-            temperature=0.1,
-            do_sample=True,
         )
     def _call(self, prompt: str, stop: Optional[list[str]] = None) -> str:
@@ -103,7 +111,7 @@ class APIServedModel(LLM):
     model_url: str = None
     debug: bool = None
-    def __init__(self, model_url: str = None, debug: bool = None):
         super().__init__()
         if model_url[-1] == '/':
             raise ValueError('URL should not end with a slash - "/"')
@@ -132,66 +140,36 @@ class APIServedModel(LLM):
         return 'api_model'
 class QAEngine():
     """
     QAEngine class, used for generating answers to questions.
-    Args:
-        llm_model_id (str): The ID of the LLM model to be used.
-        embedding_model_id (str): The ID of the embedding model to be used.
-        index_repo_id (str): The ID of the index repository to be used.
-        run_locally (bool, optional): Whether to run the models locally or on the Hugging Face hub. Defaults to True.
-        use_docs_for_context (bool, optional): Whether to use relevant documents as context for generating answers.
-        Defaults to True.
-        use_messages_for_context (bool, optional): Whether to use previous messages as context for generating answers.
-        Defaults to True.
-        debug (bool, optional): Whether to log debug information. Defaults to False.
-    Attributes:
-        use_docs_for_context (bool): Whether to use relevant documents as context for generating answers.
-        use_messages_for_context (bool): Whether to use previous messages as context for generating answers.
-        debug (bool): Whether to log debug information.
-        llm_model (Union[LocalBinaryModel, HuggingFacePipeline, HuggingFaceHub]): The LLM model to be used.
-        embedding_model (Union[HuggingFaceInstructEmbeddings, HuggingFaceHubEmbeddings]): The embedding model to be used.
-        prompt_template (PromptTemplate): The prompt template to be used.
-        llm_chain (LLMChain): The LLM chain to be used.
-        knowledge_index (FAISS): The FAISS index to be used.
     """
-    def __init__(
-        self,
-        llm_model_id: str,
-        embedding_model_id: str,
-        index_repo_id: str,
-        prompt_template: str,
-        use_docs_for_context: bool = True,
-        num_relevant_docs: int = 3,
-        add_sources_to_response: bool = True,
-        use_messages_for_context: bool = True,
-        first_stage_docs: int = 50,
-        debug: bool = False
-    ):
         super().__init__()
-        self.prompt_template = prompt_template
-        self.use_docs_for_context = use_docs_for_context
-        self.num_relevant_docs = num_relevant_docs
-        self.add_sources_to_response = add_sources_to_response
-        self.use_messages_for_context = use_messages_for_context
-        self.first_stage_docs = first_stage_docs
-        self.debug = debug
         prompt = PromptTemplate(
-            template=prompt_template,
             input_variables=['question', 'context']
         )
-        self.llm_model = QAEngine._get_model(llm_model_id)
         self.llm_chain = LLMChain(prompt=prompt, llm=self.llm_model)
         if self.use_docs_for_context:
-            logger.info(f'Downloading {index_repo_id}')
             snapshot_download(
-                repo_id=index_repo_id,
                 allow_patterns=['*.faiss', '*.pkl'],
                 repo_type='dataset',
                 local_dir='indexes/run/'
@@ -200,7 +178,7 @@ class QAEngine():
             embed_instruction = 'Represent the Hugging Face library documentation'
             query_instruction = 'Query the most relevant piece of information from the Hugging Face documentation'
             embedding_model = HuggingFaceInstructEmbeddings(
-                model_name=embedding_model_id,
                 embed_instruction=embed_instruction,
                 query_instruction=query_instruction
             )
@@ -209,27 +187,22 @@ class QAEngine():
             self.reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')
-    @staticmethod
-    def _get_model(llm_model_id: str):
-        if 'local_models/' in llm_model_id:
             logger.info('using local binary model')
-            return LocalBinaryModel(
-                model_id=llm_model_id
-            )
-        elif 'api_models/' in llm_model_id:
             logger.info('using api served model')
             return APIServedModel(
-                model_url=llm_model_id.replace('api_models/', ''),
                 debug=self.debug
             )
-        elif llm_model_id == 'mock':
             logger.info('using mock model')
             return MockLocalBinaryModel()
         else:
             logger.info('using transformers pipeline model')
-            return TransformersPipelineModel(
-                model_id=llm_model_id
-            )
     @staticmethod
@@ -245,7 +218,8 @@ class QAEngine():
         Preprocess the answer by removing unnecessary sequences and stop sequences.
         '''
         SEQUENCES_TO_REMOVE = [
-            'Factually: ', 'Answer: ', '<<SYS>>', '<</SYS>>', '[INST]', '[/INST]'
         ]
         SEQUENCES_TO_STOP = [
             'User:', 'You:', 'Question:'
@@ -296,9 +270,8 @@ class QAEngine():
                 )
             ]
             relevant_docs = relevant_docs[:self.num_relevant_docs]
-            context += '\nEXTRACTED DOCUMENTS:\n'
-            for i, (doc) in enumerate(relevant_docs):
-                context += f'\n\n<DOCUMENT_{i}>\n {doc.page_content} \n</DOCUMENT_{i}>'
             metadata = [doc.metadata for doc in relevant_docs]
             response.set_sources(sources=[str(m['source']) for m in metadata])
@@ -314,7 +287,6 @@ class QAEngine():
             sep = '\n' + '-' * 100
             logger.info(f'question len: {len(question)} {sep}')
             logger.info(f'question: {question} {sep}')
-            logger.info(f'question processed: {question} {sep}')
             logger.info(f'answer len: {len(response.get_answer())} {sep}')
             logger.info(f'answer original: {answer} {sep}')
             logger.info(f'answer postprocessed: {response.get_answer()} {sep}')

 from langchain.vectorstores import FAISS
 from sentence_transformers import CrossEncoder
+from qa_engine import logger, Config
 from qa_engine.response import Response
 from qa_engine.mocks import MockLocalBinaryModel
     model_id: str = None
     llm: None = None
+    def __init__(self, config: Config):
         super().__init__()
         # pip install llama_cpp_python==0.1.39
         from llama_cpp import Llama
+        self.model_id = config.question_answering_model_id
+        self.model_path = f'qa_engine/{self.model_id}'
+        if not os.path.exists(self.model_path):
+            raise ValueError(f'{self.model_path} does not exist')
+        self.llm = Llama(model_path=self.model_path, n_ctx=4096)
     def _call(self, prompt: str, stop: Optional[list[str]] = None) -> str:
         output = self.llm(
     model_id: str = None
     pipeline: str = None
+    def __init__(self, config: Config):
         super().__init__()
+        self.model_id = config.question_answering_model_id
+        self.min_new_tokens = config.min_new_tokens
+        self.max_new_tokens = config.max_new_tokens
+        self.temperature = config.temperature
+        self.top_k = config.top_k
+        self.top_p = config.top_p
+        self.do_sample = config.do_sample
+        tokenizer = AutoTokenizer.from_pretrained(self.model_id)
         model = AutoModelForCausalLM.from_pretrained(
+            self.model_id,
             torch_dtype=torch.bfloat16,
             trust_remote_code=True,
             load_in_8bit=False,
             device_map='auto',
             eos_token_id=tokenizer.eos_token_id,
             pad_token_id=tokenizer.eos_token_id,
+            min_new_tokens=self.min_new_tokens,
+            max_new_tokens=self.max_new_tokens,
+            temperature=self.temperature,
+            top_k=self.top_k,
+            top_p=self.top_p,
+            do_sample=self.do_sample,
         )
     def _call(self, prompt: str, stop: Optional[list[str]] = None) -> str:
     model_url: str = None
     debug: bool = None
+    def __init__(self, model_url: str, debug: bool = False):
         super().__init__()
         if model_url[-1] == '/':
             raise ValueError('URL should not end with a slash - "/"')
         return 'api_model'
 class QAEngine():
     """
     QAEngine class, used for generating answers to questions.
     """
+    def __init__(self, config: Config):
         super().__init__()
+        self.config = config
+        self.question_answering_model_id=config.question_answering_model_id
+        self.embedding_model_id=config.embedding_model_id
+        self.index_repo_id=config.index_repo_id
+        self.prompt_template=config.prompt_template
+        self.use_docs_for_context=config.use_docs_for_context
+        self.num_relevant_docs=config.num_relevant_docs
+        self.add_sources_to_response=config.add_sources_to_response
+        self.use_messages_for_context=config.use_messages_in_context
+        self.debug=config.debug
+        self.first_stage_docs: int = 50
         prompt = PromptTemplate(
+            template=self.prompt_template,
             input_variables=['question', 'context']
         )
+        self.llm_model = self._get_model()
         self.llm_chain = LLMChain(prompt=prompt, llm=self.llm_model)
         if self.use_docs_for_context:
+            logger.info(f'Downloading {self.index_repo_id}')
             snapshot_download(
+                repo_id=self.index_repo_id,
                 allow_patterns=['*.faiss', '*.pkl'],
                 repo_type='dataset',
                 local_dir='indexes/run/'
             embed_instruction = 'Represent the Hugging Face library documentation'
             query_instruction = 'Query the most relevant piece of information from the Hugging Face documentation'
             embedding_model = HuggingFaceInstructEmbeddings(
+                model_name=self.embedding_model_id,
                 embed_instruction=embed_instruction,
                 query_instruction=query_instruction
             )
             self.reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')
+    def _get_model(self):
+        if 'local_models/' in self.question_answering_model_id:
             logger.info('using local binary model')
+            return LocalBinaryModel(self.config)
+        elif 'api_models/' in self.question_answering_model_id:
             logger.info('using api served model')
             return APIServedModel(
+                model_url=self.question_answering_model_id.replace('api_models/', ''),
                 debug=self.debug
             )
+        elif self.question_answering_model_id == 'mock':
             logger.info('using mock model')
             return MockLocalBinaryModel()
         else:
             logger.info('using transformers pipeline model')
+            return TransformersPipelineModel(self.config)
     @staticmethod
         Preprocess the answer by removing unnecessary sequences and stop sequences.
         '''
         SEQUENCES_TO_REMOVE = [
+            'Factually: ', 'Answer: ', '<<SYS>>', '<</SYS>>', '[INST]', '[/INST]',
+            '<context>', '<\context>', '<question>', '<\question>',
         ]
         SEQUENCES_TO_STOP = [
             'User:', 'You:', 'Question:'
                 )
             ]
             relevant_docs = relevant_docs[:self.num_relevant_docs]
+            context += '\nExtracted documents:\n'
+            context += ''.join([doc.page_content for doc in relevant_docs])
             metadata = [doc.metadata for doc in relevant_docs]
             response.set_sources(sources=[str(m['source']) for m in metadata])
             sep = '\n' + '-' * 100
             logger.info(f'question len: {len(question)} {sep}')
             logger.info(f'question: {question} {sep}')
             logger.info(f'answer len: {len(response.get_answer())} {sep}')
             logger.info(f'answer original: {answer} {sep}')
             logger.info(f'answer postprocessed: {response.get_answer()} {sep}')

requirements.txt CHANGED Viewed

@@ -26,4 +26,3 @@ InstructorEmbedding==1.0.0
 faiss_cpu==1.7.3
 uvicorn==0.22.0
 pytest==7.3.1
-google-cloud-bigquery==3.17.2

 faiss_cpu==1.7.3
 uvicorn==0.22.0
 pytest==7.3.1