Spaces:

jharrison27
/

arxiv-RAG

Runtime error

App Files Files Community

arxiv-RAG / app.py

jharrison27

Handle no colbert indexes

bae5df9 verified 8 months ago

raw

history blame contribute delete

6.17 kB

	import torch
	import transformers
	import gradio as gr
	from ragatouille import RAGPretrainedModel
	import re
	from datetime import datetime
	import json
	import arxiv

	from helper import rag_cleaner, get_prompt_text, get_references, get_rag, SaveResponseAndRead, get_md_text_abstract, search_cleaner, get_arxiv_live_search

	# Constants
	RETRIEVE_RESULTS = 20
	LLM_MODELS = ['mistralai/Mixtral-8x7B-Instruct-v0.1', 'mistralai/Mistral-7B-Instruct-v0.2', 'google/gemma-7b-it', 'None']
	DEFAULT_LLM_MODEL = 'mistralai/Mistral-7B-Instruct-v0.2'
	GENERATE_KWARGS = {
	"temperature": None,
	"max_new_tokens": 512,
	"top_p": None,
	"do_sample": False,
	}

	try:
	# RAG Model setup
	RAG = RAGPretrainedModel.from_index("colbert/indexes/arxiv_colbert")
	semantic_search_available = True

	try:
	gr.Info("Setting up retriever, please wait...")
	rag_initial_output = RAG.search("What is Generative AI in Healthcare?", k=1)
	gr.Info("Retriever working successfully!")
	except Exception as e:
	gr.Warning(f"Retriever not working: {str(e)}")

	except FileNotFoundError:
	RAG = None
	semantic_search_available = False
	gr.Warning("Colbert index not found. Semantic search will be unavailable.")

	# Header setup
	mark_text = '# 🩺🔍 Search Results\n'
	header_text = "## Arxiv Paper Summary With QA Retrieval Augmented Generation \n"

	try:
	with open("README.md", "r") as f:
	mdfile = f.read()
	date_pattern = r'Index Last Updated : \d{4}-\d{2}-\d{2}'
	match = re.search(date_pattern, mdfile)
	date = match.group().split(': ')[1]
	formatted_date = datetime.strptime(date, '%Y-%m-%d').strftime('%d %b %Y')
	header_text += f'Index Last Updated: {formatted_date}\n'
	index_info = f"Semantic Search - up to {formatted_date}"
	except FileNotFoundError:
	index_info = "Semantic Search"

	if semantic_search_available:
	database_choices = [index_info, 'Arxiv Search - Latest']
	else:
	database_choices = ['Arxiv Search - Latest']

	# Arxiv API setup
	arx_client = arxiv.Client()
	is_arxiv_available = True
	check_arxiv_result = get_arxiv_live_search("What is Self Rewarding AI and how can it be used in Multi-Agent Systems?", arx_client, RETRIEVE_RESULTS)
	if len(check_arxiv_result) == 0:
	is_arxiv_available = False
	print("Arxiv search not working, switching to default search ...")
	database_choices = [index_info]

	# Gradio UI setup
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	header = gr.Markdown(header_text)

	with gr.Group():
	search_query = gr.Textbox(label='Search', placeholder='What is Generative AI in Healthcare?')

	with gr.Accordion("Advanced Settings", open=False):
	with gr.Row(equal_height=True):
	llm_model = gr.Dropdown(choices=LLM_MODELS, value=DEFAULT_LLM_MODEL, label='LLM Model')
	llm_results = gr.Slider(minimum=4, maximum=10, value=5, step=1, interactive=True, label="Top n results as context")
	database_src = gr.Dropdown(choices=database_choices, value=index_info, label='Search Source')
	stream_results = gr.Checkbox(value=True, label="Stream output", visible=False)

	output_text = gr.Textbox(show_label=True, container=True, label='LLM Answer', visible=True)
	input = gr.Textbox(show_label=False, visible=False)
	gr_md = gr.Markdown(mark_text)

	def update_with_rag_md(search_query, llm_results_use=5, database_choice=index_info, llm_model_picked=DEFAULT_LLM_MODEL):
	prompt_text_from_data = ""

	if database_choice == index_info and semantic_search_available:
	rag_out = get_rag(search_query, RAG, RETRIEVE_RESULTS)
	database_to_use = 'Semantic Search'
	else:
	arxiv_search_success = True
	try:
	rag_out = get_arxiv_live_search(search_query, arx_client, RETRIEVE_RESULTS)
	if len(rag_out) == 0:
	arxiv_search_success = False
	except Exception as e:
	arxiv_search_success = False
	gr.Warning(f"Arxiv Search not working: {str(e)}")

	if not arxiv_search_success:
	gr.Warning("Arxiv search failed. Please try again later.")
	return "", ""

	database_to_use = 'Arxiv Search'

	md_text_updated = mark_text
	for i, rag_answer in enumerate(rag_out):
	if i < llm_results_use:
	md_text_paper, prompt_text = get_md_text_abstract(rag_answer, source=database_to_use, return_prompt_formatting=True)
	prompt_text_from_data += f"{i+1}. {prompt_text}"
	else:
	md_text_paper = get_md_text_abstract(rag_answer, source=database_to_use)
	md_text_updated += md_text_paper

	prompt = get_prompt_text(search_query, prompt_text_from_data, llm_model_picked=llm_model_picked)
	return md_text_updated, prompt

	def ask_llm(prompt, llm_model_picked=DEFAULT_LLM_MODEL, stream_outputs=False):
	model_disabled_text = "LLM Model is disabled"
	output = ""

	if llm_model_picked == 'None':
	if stream_outputs:
	for out in model_disabled_text:
	output += out
	yield output
	else:
	return model_disabled_text

	client = InferenceClient(llm_model_picked)
	try:
	response = client.text_generation(prompt, stream=stream_outputs, details=False, return_full_text=False, **GENERATE_KWARGS)

	if stream_outputs:
	for token in response:
	output += token
	yield SaveResponseAndRead(output)
	else:
	output = response
	except Exception as e:
	gr.Warning(f"LLM Inference failed: {str(e)}")
	output = ""

	return output

	search_query.submit(update_with_rag_md, [search_query, llm_results, database_src, llm_model], [gr_md, input]).success(ask_llm, [input, llm_model, stream_results], output_text)

	demo.queue().launch()