Spaces:

alvinhenrick
/

medirag

Running

App Files Files Community

alvinhenrick commited on Aug 27

Commit

63f14b9

•

1 Parent(s): b101c5c

Add streaming support

Browse files

Files changed (2) hide show

app.py +40 -11
medirag/rag/wf.py +11 -11

app.py CHANGED Viewed

@@ -1,12 +1,14 @@
 from pathlib import Path
-import dspy
 import gradio as gr
 from dotenv import load_dotenv
 from medirag.cache.local import SemanticCaching
 from medirag.index.local import DailyMedIndexer
 from medirag.rag.qa import RAG, DailyMedRetrieve
 load_dotenv()
@@ -19,19 +21,43 @@ rm = DailyMedRetrieve(daily_med_indexer=indexer)
 turbo = dspy.OpenAI(model='gpt-3.5-turbo', max_tokens=4000)
 dspy.settings.configure(lm=turbo, rm=rm)
-rag = RAG(k=5)
 sm = SemanticCaching(model_name='sentence-transformers/all-mpnet-base-v2', dimension=768,
                      json_file='rag_test_cache.json', cosine_threshold=.90)
 sm.load_cache()
-def ask_med_question(query):
     response = sm.lookup(question=query)
-    if not response:
-        response = rag(query).answer
-        sm.save(query, response)
-    return response
 css = """
@@ -41,8 +67,8 @@ h1 {
 }
 #md {margin-top: 70px}
 """
-# Set up the Gradio interface
 with gr.Blocks(css=css) as app:
     gr.Markdown("# DailyMed RAG")
     with gr.Row():
@@ -54,9 +80,12 @@ with gr.Blocks(css=css) as app:
             gr.Markdown("### Ask any question about medication usage and get answers based on DailyMed data.",
                         elem_id="md")
     input_text = gr.Textbox(lines=2, label="Question", placeholder="Enter your question about a drug...")
     output_text = gr.Textbox(interactive=False, label="Response", lines=10)
     button = gr.Button("Submit")
-    button.click(fn=ask_med_question, inputs=input_text, outputs=output_text)
 app.launch()

 from pathlib import Path
 import gradio as gr
 from dotenv import load_dotenv
 from medirag.cache.local import SemanticCaching
 from medirag.index.local import DailyMedIndexer
 from medirag.rag.qa import RAG, DailyMedRetrieve
+from medirag.rag.wf import RAGWorkflow
+from llama_index.llms.openai import OpenAI
+from llama_index.core import Settings
+import dspy
 load_dotenv()
 turbo = dspy.OpenAI(model='gpt-3.5-turbo', max_tokens=4000)
 dspy.settings.configure(lm=turbo, rm=rm)
+# Set the LLM model
+Settings.llm = OpenAI(model='gpt-3.5-turbo')
 sm = SemanticCaching(model_name='sentence-transformers/all-mpnet-base-v2', dimension=768,
                      json_file='rag_test_cache.json', cosine_threshold=.90)
 sm.load_cache()
+# Initialize RAGWorkflow with indexer
+rag = RAG(k=5)
+rag_workflow = RAGWorkflow(indexer=indexer, timeout=60, top_k=10, top_n=5)
+async def ask_med_question(query, enable_stream):
+    # Check the cache first
     response = sm.lookup(question=query)
+    if response:
+        # Return cached response if found
+        yield response
+    else:
+        if enable_stream:
+            # Stream response using RAGWorkflow
+            result = await rag_workflow.run(query=query)
+            accumulated_response = ""
+            async for chunk in result.async_response_gen():
+                accumulated_response += chunk
+                yield accumulated_response  # Accumulate and yield the updated response
+            # Save the accumulated response to the cache after streaming is complete
+            sm.save(query, accumulated_response)
+        else:
+            # Use RAG without streaming
+            response = rag(query).answer
+            yield response
+            # Save the response in the cache
+            sm.save(query, response)
 css = """
 }
 #md {margin-top: 70px}
 """
+# Set up the Gradio interface with a checkbox for enabling streaming
 with gr.Blocks(css=css) as app:
     gr.Markdown("# DailyMed RAG")
     with gr.Row():
             gr.Markdown("### Ask any question about medication usage and get answers based on DailyMed data.",
                         elem_id="md")
+    enable_stream = gr.Checkbox(label="Enable Streaming", value=False)
     input_text = gr.Textbox(lines=2, label="Question", placeholder="Enter your question about a drug...")
     output_text = gr.Textbox(interactive=False, label="Response", lines=10)
     button = gr.Button("Submit")
+    # Update the button click function to include the checkbox value
+    button.click(fn=ask_med_question, inputs=[input_text, enable_stream], outputs=output_text)
 app.launch()

medirag/rag/wf.py CHANGED Viewed

@@ -1,22 +1,16 @@
 import asyncio
 from pathlib import Path
-from dotenv import load_dotenv
-from llama_index.core import PromptTemplate, Settings
-from llama_index.core.response_synthesizers import CompactAndRefine, TreeSummarize
 from llama_index.core.postprocessor.llm_rerank import LLMRerank
 from llama_index.core.workflow import Context, Workflow, StartEvent, StopEvent, step
-from llama_index.llms.openai import OpenAI
 from llama_index.core.workflow import Event
-from llama_index.core.schema import NodeWithScore
 from pydantic import BaseModel
 from medirag.index.local import DailyMedIndexer
-load_dotenv()
-# Set the LLM model
-Settings.llm = OpenAI(model='gpt-3.5-turbo')
 # Event classes
 class RetrieverEvent(Event):
@@ -88,7 +82,6 @@ class RAGWorkflow(Workflow):
     @step
     async def retrieve(self, ctx: Context, ev: QueryEvent) -> RetrieverEvent | None:
         query = ctx.data["query"]
         print(f"Query the database with: {query}")
         if not self.indexer:
@@ -115,6 +108,13 @@ class RAGWorkflow(Workflow):
 # Main function
 async def main():
     data_dir = Path("../../data")
     index_path = data_dir.joinpath("dm_spl_release_human_rx_part1")

 import asyncio
 from pathlib import Path
+from llama_index.core import PromptTemplate
 from llama_index.core.postprocessor.llm_rerank import LLMRerank
+from llama_index.core.response_synthesizers import CompactAndRefine, TreeSummarize
+from llama_index.core.schema import NodeWithScore
 from llama_index.core.workflow import Context, Workflow, StartEvent, StopEvent, step
 from llama_index.core.workflow import Event
 from pydantic import BaseModel
 from medirag.index.local import DailyMedIndexer
 # Event classes
 class RetrieverEvent(Event):
     @step
     async def retrieve(self, ctx: Context, ev: QueryEvent) -> RetrieverEvent | None:
         query = ctx.data["query"]
         print(f"Query the database with: {query}")
         if not self.indexer:
 # Main function
 async def main():
+    from llama_index.llms.openai import OpenAI
+    from llama_index.core import Settings
+    from dotenv import load_dotenv
+    load_dotenv()
+    Settings.llm = OpenAI(model='gpt-3.5-turbo')
     data_dir = Path("../../data")
     index_path = data_dir.joinpath("dm_spl_release_human_rx_part1")