import gradio as gr
import re
from pypdf import PdfReader
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline
import docx
import concurrent.futures


def remove_references(text):
    text = re.sub(r'\[\d+\]', '', text) ##[ref]
    text = re.sub(r'\[https?://[^\[\]]+\s[^\[\]]+\]', '', text) ##hyperlink with text
    text = re.sub(r'\[https?://[^\[\]]+\]', '', text) ##just the hyperlink
    # text = html.unescape(text)
    text = re.sub(r'\s+', ' ', text).strip() ##clear out the white spaces
    return text

    
# def extract_text_from_pdf(file_path):
#     text = ""
#     pdf_reader = PdfReader(file_path)
#     for page in pdf_reader.pages:
#         text += page.extract_text() + "\n"
#     return text
  
import fitz  # PyMuPDF

def extract_text_from_pdf(file_path):
    text = ""
    pdf_document = fitz.open(file_path)
    for page_num in range(pdf_document.page_count):
        page = pdf_document[page_num]
        text += page.get_text("text") + "\n"
    pdf_document.close()
    return text

def extract_text_from_txt(file_path):
    text = ""
    with open(file_path, "r", encoding='utf-8') as txt_file:
        # text = txt_file.read()
        return txt_file.read()
    # return text
    
# def extract_text_from_doc(file_path):
#     doc = docx.Document(file_path)
#     fullText = []
#     for para in doc.paragraphs:
#         fullText.append(para.text)
#     return '\n'.join(fullText)
    
def extract_text_from_paragraph(para):
    return para.text

def extract_text_from_doc(file_path):
    doc = docx.Document(file_path)
    with concurrent.futures.ThreadPoolExecutor() as executor:
        results = list(executor.map(extract_text_from_paragraph, doc.paragraphs))
    
    return '\n'.join(results)
    

def model(model_name):
  tokenizer = AutoTokenizer.from_pretrained(model_name)
  model = AutoModelForQuestionAnswering.from_pretrained(model_name,return_dict = False)
  model_pipeline = pipeline("question-answering",model = model,tokenizer = tokenizer)

  return model_pipeline
    
model_name = "timpal0l/mdeberta-v3-base-squad2"
pipe = model(model_name)

def qa_result(context, question, file, pipe=pipe):
    if file is not None:
        allowed_types = [".pdf", ".txt", ".docx"]
        extension = "." + file.name.split(".")[-1].lower()
        if not extension in allowed_types:
            text = "Խնդրում եմ ներբեռնել .pdf, .txt, կամ .docx ֆայլեր"
        else:   
            if len(question) == 0:
                text = "Ես չեմ կարողանալ քեզ օգնել եթե ինձ չտաս հարցը"
            else:
                if extension == allowed_types[0]:
                    context = extract_text_from_pdf(file.name)
                elif extension == allowed_types[1]:
                    context = extract_text_from_txt(file.name)
                else:
                    context = extract_text_from_doc(file.name)
                result = pipe(question=question, context=context)
                answered = result['answer']
                text = remove_references(answered)
    else:
      # if file is None and len(context) == 0 and len(question) == 0:
      #     text = "Որպեսզի ես կարողանամ քեզ օգնել, դու պետք է տրամադրես տեքստ կամ ֆայլը, և հարցեր"
      # elif len(context) == 0:
      #     text = "Ես չեմ կարողանամ քեզ օգնել եթե դու չտրամադրես տեքստը"
      # elif len(question) == 0:
      #     text = "Ես չեմ կարողանամ քեզ օգնել եթե դու չտաս հարցը"
        if len(context) == 0:
            if len(question) == 0:
                  text = "Որպեսզի ես կարողանամ քեզ օգնել, դու պետք է տրամադրես տեքստ կամ ֆայլը, և հարցեր"
            else:
              text = "Ես չեմ կարողանամ քեզ օգնել եթե դու չտրամադրես տեքստը"
        elif len(question) == 0:
              text = "Ես չեմ կարողանամ քեզ օգնել եթե դու չտաս հարցը"
        else:
          result = pipe(question=question, context=context)
          answered = result['answer']
          text = remove_references(answered)

    text = text.replace('(', '', 1)
    text = text.rstrip(',')
    
    return text.capitalize()

theme = gr.themes.Soft().set(
    body_background_fill='*background_fill_secondary',
    body_text_color_subdued='*body_text_color',
    body_text_color_subdued_dark='*chatbot_code_background_color'
)


app = gr.Interface(
    fn=qa_result,
    btn=gr.UploadButton("📁"),
    inputs=['textbox', 'text', gr.inputs.File()],
    outputs='textbox',
    title='Ողջու՛յն։ Ես քո արհեստական բանականությամբ օգնականն եմ',
    theme=theme,
    description='Տու՛ր ինձ տեքստ, ու տեքստին վերաբերող հարցեր, ու ես կօգնեմ քեզ պատասխանել հարցերին'
)
app.launch(inline=False)