import gradio as gr from transformers import pipeline import PyPDF2 import markdown import matplotlib.pyplot as plt import io import base64 from fpdf import FPDF # Preload models models = { "distilbert-base-uncased-distilled-squad": "distilbert-base-uncased-distilled-squad", "roberta-base-squad2": "deepset/roberta-base-squad2", "bert-large-uncased-whole-word-masking-finetuned-squad": "bert-large-uncased-whole-word-masking-finetuned-squad", "albert-base-v2": "twmkn9/albert-base-v2-squad2", "xlm-roberta-large-squad2": "deepset/xlm-roberta-large-squad2" } loaded_models = {} def load_model(model_name): if model_name not in loaded_models: loaded_models[model_name] = pipeline("question-answering", model=models[model_name]) return loaded_models[model_name] def generate_score_chart(score): plt.figure(figsize=(6, 4)) plt.bar(["Confidence Score"], [score], color='skyblue') plt.ylim(0, 1) plt.ylabel("Score") plt.title("Confidence Score") buf = io.BytesIO() plt.savefig(buf, format='png') plt.close() buf.seek(0) return base64.b64encode(buf.getvalue()).decode() def highlight_relevant_text(context, start, end): highlighted_text = ( context[:start] + '' + context[start:end] + '' + context[end:] ) return highlighted_text def generate_pdf_report(question, answer, score, score_explanation, score_chart, highlighted_context): pdf = FPDF() pdf.add_page() pdf.set_font("Arial", size=12) pdf.multi_cell(0, 10, f"Question: {question}") pdf.ln() pdf.set_font("Arial", size=12) pdf.multi_cell(0, 10, f"Answer: {answer}") pdf.ln() pdf.set_font("Arial", size=12) pdf.multi_cell(0, 10, f"Confidence Score: {score}") pdf.ln() pdf.set_font("Arial", size=12) pdf.multi_cell(0, 10, f"Score Explanation: {score_explanation}") pdf.ln() pdf.set_font("Arial", size=12) pdf.multi_cell(0, 10, "Highlighted Context:") pdf.ln() pdf.set_font("Arial", size=10) pdf.multi_cell(0, 10, highlighted_context) pdf.ln() # Add score chart image to PDF score_chart_image = io.BytesIO(base64.b64decode(score_chart)) pdf.image(score_chart_image, x=10, y=pdf.get_y(), w=100) # Save PDF to memory pdf_output = io.BytesIO() pdf.output(pdf_output) pdf_output.seek(0) return pdf_output def answer_question(model_name, file, question, status): status = "Loading model..." model = load_model(model_name) if file is not None: file_name = file.name if file_name.endswith(".pdf"): pdf_reader = PyPDF2.PdfReader(file) context = "" for page_num in range(len(pdf_reader.pages)): context += pdf_reader.pages[page_num].extract_text() elif file_name.endswith(".md"): context = file.read().decode('utf-8') context = markdown.markdown(context) else: context = file.read().decode('utf-8') else: context = "" result = model(question=question, context=context) answer = result['answer'] score = result['score'] start = result['start'] end = result['end'] # Highlight relevant text highlighted_context = highlight_relevant_text(context, start, end) # Generate the score chart score_chart = generate_score_chart(score) # Explain score score_explanation = f"The confidence score ranges from 0 to 1, where a higher score indicates higher confidence in the answer's correctness. In this case, the score is {score:.2f}. A score closer to 1 implies the model is very confident about the answer." # Generate the PDF report pdf_report = generate_pdf_report(question, answer, f"{score:.2f}", score_explanation, score_chart, highlighted_context) status = "Model loaded" return highlighted_context, f"{score:.2f}", score_explanation, score_chart, pdf_report, status # Define the Gradio interface with gr.Blocks() as interface: gr.Markdown( """ # Question Answering System Upload a document (text, PDF, or Markdown) and ask questions to get answers based on the context. **Supported File Types**: `.txt`, `.pdf`, `.md` """) with gr.Row(): model_dropdown = gr.Dropdown( choices=list(models.keys()), label="Select Model", value="distilbert-base-uncased-distilled-squad" ) with gr.Row(): file_input = gr.File(label="Upload Document", file_types=["text", "pdf", "markdown"]) question_input = gr.Textbox(lines=2, placeholder="Enter your question here...", label="Question") with gr.Row(): answer_output = gr.HTML(label="Highlighted Answer") score_output = gr.Textbox(label="Confidence Score") explanation_output = gr.Textbox(label="Score Explanation") chart_output = gr.Image(label="Score Chart") pdf_output = gr.File(label="Download PDF Report") with gr.Row(): submit_button = gr.Button("Submit") status_output = gr.Markdown(value="") def on_submit(model_name, file, question): return answer_question(model_name, file, question, status="Loading model...") submit_button.click( on_submit, inputs=[model_dropdown, file_input, question_input], outputs=[answer_output, score_output, explanation_output, chart_output, pdf_output, status_output] ) if __name__ == "__main__": interface.launch(share=True)