import gradio as gr from langchain.prompts import PromptTemplate from langchain.embeddings import SentenceTransformerEmbeddings # Set model_kwargs with trust_remote_code=True embeddings = SentenceTransformerEmbeddings( model_name="nomic-ai/nomic-embed-text-v1.5", model_kwargs={"trust_remote_code": True} ) print('Embeddings loaded successfully') from langchain_community.vectorstores import FAISS from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain.document_loaders import TextLoader, PyPDFLoader loader = PyPDFLoader("fibromyalgia-information-booklet-july2021.pdf") documents = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0) docs = text_splitter.split_documents(documents) vector_store = FAISS.from_documents(docs, embeddings) retriever = vector_store.as_retriever() print('Retriever loaded successfully') from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig base_model_name = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit" tokenizer = AutoTokenizer.from_pretrained(base_model_name) # Load in 4-bit with CPU offload using quantization_config # Removed load_in_4bit as it's redundant when using quantization_config model = AutoModelForCausalLM.from_pretrained( base_model_name, device_map="cpu", trust_remote_code=True, # Required for some models quantization_config=BitsAndBytesConfig( load_in_4bit=True, # Specify 4-bit quantization within BitsAndBytesConfig load_in_8bit_fp32_cpu_offload=True # Enable CPU offload ) ) adapter_path = "mohamedalcafory/PubMed_Llama3.1_Based_model" model.load_adapter(adapter_path) # tokenizer = AutoTokenizer.from_pretrained("mohamedalcafory/PubMed_Llama3.1_Based_model") # model = AutoModelForCausalLM.from_pretrained("mohamedalcafory/PubMed_Llama3.1_Based_model") print(f'Model loaded successfully: {model}') from transformers import pipeline from langchain_huggingface import HuggingFacePipeline pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512, temperature=0.7, top_p=0.95, repetition_penalty=1.15 ) llm = HuggingFacePipeline(pipeline=pipe) prompt = PromptTemplate( input_variables=["query"], template="{query}" ) # Define the retrieval chain retrieve_docs = (lambda x: retriever.get_relevant_documents(x["query"])) # Define the generator chain generator_chain = ( prompt | llm | StrOutputParser() ) def format_docs(docs): # Check if docs is a list of Document objects or just strings if docs and hasattr(docs[0], 'page_content'): return "\n\n".join(doc.page_content for doc in docs) else: return "\n\n".join(str(doc) for doc in docs) # Create the full RAG chain rag_chain = ( RunnablePassthrough.assign(context=retrieve_docs) | RunnablePassthrough.assign( formatted_context=lambda x: format_docs(x["context"]) ) | prompt | llm | StrOutputParser() ) def process_query(query): try: response = rag_chain.invoke({"query": query}) return response except Exception as e: return f"An error occurred: {str(e)}" # Create Gradio interface demo = gr.Interface( fn=process_query, inputs=gr.Textbox(label= "Your question", lines=2, placeholder="Enter your question here..."), outputs=gr.Textbox(label="Response"), title="Fibromyalgia Q&A Assistant", description="Ask questions and get answers based on the retrieved context.", examples=[ ["How does Physiotherapy work with Fibromyalgia?"], ["What are the common treatments for chronic pain?"], ] ) if __name__ == "__main__": demo.launch()