import os
import nltk 
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch
import streamlit as st

from src.doc2vec import inference
from src.abstractive_sum import summarize_text_with_model
from src.textrank import custom_textrank_summarizer, get_labels_for_license
from src.clean import clean_license_text
from src.read_data import read_license_text_data
from src.diff import strikethrough_diff
from src.parameters import help_messages, captions, options

nltk.download('punkt')

if __name__ == "__main__":

    CUSTOM_MODEL_NAME = "utkarshsaboo45/ClearlyDefinedLicenseSummarizer"
    SIMILARITY_THRESHOLD = 0.8

    os.environ["TOKENIZERS_PARALLELISM"] = "false"
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

    with st.spinner(captions.LOADING):
        model = AutoModelForSeq2SeqLM.from_pretrained(CUSTOM_MODEL_NAME).to(device)
        tokenizer = AutoTokenizer.from_pretrained(CUSTOM_MODEL_NAME)

    summarization_type = st.sidebar.selectbox(
        captions.SELECT_SUMMARIZATION_TYPE,
        (options.EXTRACTIVE, options.ABSTRACTIVE, options.BOTH),
        help=help_messages.SUMMARIZATION_TYPE
    )

    cleaned_view = None
    exceptions = ""
    definitions = ""

    if summarization_type == options.ABSTRACTIVE:
        st.sidebar.caption(captions.SUMMARY_BY_T5)
        st.sidebar.caption(captions.WARNING_ABSTRACTIVE)
    elif summarization_type == options.EXTRACTIVE:
        st.sidebar.caption(captions.SUMMARY_BY_TEXTRANK)
        summary_len = st.sidebar.slider(
            captions.SUMMARY_LENGTH_PERCENTAGE,
            1,
            100,
            30,
            help=help_messages.SLIDER
        )
        
        summary_view = st.sidebar.selectbox(
            captions.SUMMARY_VIEW, (
                options.DISPLAY_SUMMARY_ONLY,
                options.DISPLAY_HIGHLIGHTED_SUMMARY
            ),
            help=help_messages.SUMMARY_VIEW
        )

        if summary_view == options.DISPLAY_SUMMARY_ONLY:
            st.sidebar.caption(captions.DISPLAY_SUMMARY_ONLY_DESC)
        elif summary_view == options.DISPLAY_HIGHLIGHTED_SUMMARY:
            st.sidebar.caption(captions.DISPLAY_HIGHLIGHTED_SUMMARY_DESC)

        cleaned_view = st.sidebar.selectbox(
            captions.CLEANED_LICENSE_VIEW, (
                options.HIDE_CLEANED_LICENSE,
                options.DISPLAY_CLEANED_LICENSE,
                options.DISPLAY_CLEANED_DIFF
            ),
            help=help_messages.CLEANED_LICENSE_VIEW
        )

        if cleaned_view == options.DISPLAY_CLEANED_LICENSE:
            st.sidebar.caption(captions.CLEANED_LICENSE_ONLY)
        elif cleaned_view == options.DISPLAY_CLEANED_DIFF:
            st.sidebar.caption(captions.CLEANED_LICENSE_WITH_DIFF)
        elif cleaned_view == options.HIDE_CLEANED_LICENSE:
            st.sidebar.caption(captions.HIDE_CLEANED_LICENSE)

    elif summarization_type == options.BOTH:
        st.sidebar.caption(captions.SUMMARY_BY_BOTH)
        st.sidebar.caption(captions.WARNING_BOTH)

    st.title(captions.APP_TITLE)
    st.caption(captions.APP_DISCLAIMER)

    license_input = st.text_area(
        captions.LICENSE_TEXT,
        placeholder=captions.ENTER_LICENSE_CONTENT
    )

    if len(license_input) > 0:
        cleaned_modified_license_text = clean_license_text(license_input)[0]
        with st.spinner(captions.LOADING):
            if summarization_type == options.ABSTRACTIVE:
                summary, definitions = summarize_text_with_model(
                    license_input,
                    model,
                    tokenizer
                )
            if summarization_type == options.EXTRACTIVE:
                if summary_view == options.DISPLAY_SUMMARY_ONLY:
                    summary, definitions, exceptions = custom_textrank_summarizer(
                        license_input,
                        summary_len=summary_len / 100
                    )
                elif summary_view == options.DISPLAY_HIGHLIGHTED_SUMMARY:
                    summary, definitions, exceptions = custom_textrank_summarizer(
                        license_input,
                        summary_len=summary_len / 100,
                        return_summary_only=False
                    )
            if summarization_type == options.BOTH:
                summary, definitions = summarize_text_with_model(
                    license_input,
                    model,
                    tokenizer
                )
                summary, definitions, exceptions = custom_textrank_summarizer(
                    summary,
                    summary_len=1
                )

            st.header(captions.SUMMARY)
            st.markdown(summary, unsafe_allow_html=True)
            
            prediction_scores = inference(license_input)
            top1_result = prediction_scores.loc[0, :]

            st.header(captions.SIMILARITY_INDEX)
            st.caption(captions.SIMILARITY_INDEX_DISCLAIMER)
            st.dataframe(prediction_scores)

            if cleaned_view == options.DISPLAY_CLEANED_DIFF:
                st.header(captions.CLEANED_LICENSE_DIFF)
                if top1_result["Similarity Scores"] > SIMILARITY_THRESHOLD:
                    st.caption("Comparing against the official " + " ".join(
                        top1_result["License"].split("-")
                    ) + " license")

                    top_license_name = top1_result["License"].lower()
                    original_license_text = read_license_text_data(
                        top_license_name
                    )
                    cleaned_original_license_text = clean_license_text(
                        original_license_text
                    )[0]
                    st.markdown(
                        strikethrough_diff(
                            cleaned_original_license_text,
                            cleaned_modified_license_text
                        ),
                        unsafe_allow_html=True
                    )
                else:
                    st.caption(captions.NO_SIMILAR_LICENSE_FOUND)
            elif cleaned_view == options.DISPLAY_CLEANED_LICENSE:
                st.header(captions.CLEANED_LICENSE_TEXT)
                st.write(cleaned_modified_license_text) 

            if st.sidebar.checkbox(
                options.SHOW_LICENSE_PROPERTIES,
                disabled = False if top1_result["Similarity Scores"] > SIMILARITY_THRESHOLD else True,
                value=False,
                help=help_messages.PROPERTIES_CHECKBOX):
                license_properties = get_labels_for_license(top1_result["License"].lower())
                st.header(captions.PROPERTIES)
                st.caption(captions.PROPERTIES_DISCLAIMER)
                st.dataframe(license_properties)
            
            if st.sidebar.checkbox(
                options.SHOW_LICENSE_DEFINITIONS,
                disabled=False if len(definitions.strip()) > 10 else True,
                value=False, 
                help=help_messages.DEFINITIONS_CHECKBOX
            ):
                if len(definitions.strip()) > 10:
                    st.header(captions.DEFINITIONS)
                    st.write(definitions)

            if st.sidebar.checkbox(
                options.SHOW_LICENSE_EXCEPTIONS,
                disabled=False if len(exceptions.strip()) > 10 else True,
                value=False,
                help=help_messages.EXCEPTIONS_CHECKBOX
            ):
                if len(exceptions.strip()) > 10:
                    st.header(captions.EXCEPTIONS)
                    st.write(exceptions)