import dash
from dash import Dash, html, dcc, callback, Output, Input
from dash import dash_table
import plotly.express as px

from app import app
import pandas as pd

import datetime
import requests
from io import StringIO
from datetime import date

import dash_bootstrap_components as dbc
import plotly.express as px


from dateutil.parser import parse
 
def convert_to_datetime(input_str, parserinfo=None):
    return parse(input_str, parserinfo=parserinfo)
 
server = app.server

url='https://drive.google.com/file/d/1NaXOYHQFF5UO5rQr4rn8Lr3bkYMSOq4_/view?usp=sharing'
url='https://drive.google.com/uc?id=' + url.split('/')[-2]

# reading of file
df = pd.read_csv(url)

# removing Aborto
df = df[df["Topic"]!="Aborto"]

# filtering the file for more than 4 tokens
df = df[df['Headline'].str.split().str.len().gt(4)]  

df['date'] = pd.to_datetime(df['date'])

unique_domains = df['domain_folder_name'].unique()
# print(unique_domains)

unique_topics = df['Topic'].unique()
# print(unique_topics)

#copying a column 
df["Veículos de notícias"] = df["domain_folder_name"]

# df = df.rename(columns={df.columns[4]: "Veículos de notícias"})

df['FinBERT_label'] = df['FinBERT_label'].astype(str)
df['FinBERT_label'].replace({
    # '3.0': 'positive',
    # '2.0': 'neutral',
    # '1.0': 'negative'
    '3.0': 'positivo',
    '2.0': 'neutro',
    '1.0': 'negativo'
    
}, inplace=True)


counts = df.groupby(['date', 'Topic', 'domain_folder_name', 'FinBERT_label']).size().reset_index(name='count')
counts['count'] = counts['count'].astype('float64')
counts['rolling_mean_counts'] = counts['count'].rolling(window=30, min_periods=2).mean()

df_pos = counts[[x in ['positivo'] for x in counts.FinBERT_label]]
df_neu = counts[[x in ['neutro'] for x in counts.FinBERT_label]]
df_neg = counts[[x in ['negativo'] for x in counts.FinBERT_label]]


# app.layout 
app.layout = dbc.Container([   
    dbc.Row([ # row 1
        dbc.Col([html.H1('Evolução temporal de sentimento em títulos de notícias')],
        className="text-center mt-3 mb-1")]),
    dbc.Row([ # row 1
        dbc.Col([dcc.Markdown('## [Sobre o projeto](https://github.com/caiocmello/SentDiario)',link_target="_blank")],
        className="text-center mt-3 mb-1")]),
    dbc.Row([ # row 2
        dbc.Label("Selecione um período (mm/dd/aaaa):", className="fw-bold")]),

    dbc.Row([ # row 3
        dcc.DatePickerRange(
            id='date-range',
            min_date_allowed=df['date'].min().date(),
            max_date_allowed=df['date'].max().date(),
            initial_visible_month=df['date'].min().date(),
            start_date=df['date'].min().date(),
            end_date=df['date'].max().date())]),

    dbc.Row([ # row 4
        dbc.Label("Escolha um tópico:", className="fw-bold")
    ]),

    dbc.Row([ # row 5
        dbc.Col(
            dcc.Dropdown(
                id="topic-selector",
                options=[
                    {"label": topic, "value": topic} for topic in unique_topics
                ],
                value="Imigrantes",  # Set the initial value
                style={"width": "50%"})


        )
    ]),
    
    dbc.Row([ # row 6
            dbc.Col(dcc.Graph(id='line-graph-1'))
    ]),

    dbc.Row([ # row 7 but needs to be updated
        dbc.Col(dcc.Graph(id="bar-graph-1"))
    ]),
    
    # html.Div(id='pie-container-1'),
    dbc.Row([ # row 9
             dbc.Col(dcc.Graph(id='pie-graph-1'),
                )
    ]),
    
    dbc.Row([ # row 7
            dbc.Label("Escolha um site de notícias:", className="fw-bold")
    ]),

    dbc.Row([ # row 8
        dbc.Col(
            dcc.Dropdown(
                id="domain-selector",
                options=[
                    {"label": domain, "value": domain} for domain in unique_domains
                ],
                value="expresso-pt",  # Set the initial value
                style={"width": "50%"})


        )
    ]),
    
     dbc.Row([ # row 9
             dbc.Col(dcc.Graph(id='line-graph-2'),
                     )
     ]),
    
    # dbc.Row([ # row 9
    #         dbc.Col(dcc.Graph(id='line-graph-2'),
    #                 )
    # ]),

    # dbc.Row([ # row 10
    #         dbc.Col(dcc.Graph(id='line-graph-3'),
    #                 )
    # ]),
    
    # dbc.Row([ # row 11
    #         dbc.Col(dcc.Graph(id='line-graph-4'),
    #                 )
    # ]),

    # html.Div(id='pie-container-2'),
     dbc.Row([ # row 9
             dbc.Col(dcc.Graph(id='pie-graph-2'),
                )
     ]),
    dbc.Row([ # row 
        dbc.Label('Lista de notícias encontradas para o tópico e meio de comunicação selecionados', className="fw-bold")
    ]),
    dbc.Row([ # row 9
            dbc.Col(
                dash_table.DataTable(
                    id='headlines-table',
                    style_as_list_view=True,
                    columns=[
                        {"name":"Título", "id":"link", "presentation":"markdown"},
                        # {"name": "Headline", "id": "Headline"},
                        # {"name": "URL", "id": "url"},
                        {"name": "Date", "id": "date", "type":"datetime"},
                        {"name": "Etiqueta de sentimento", "id": "FinBERT_label"},
                    ],
                    style_table={'overflowX': 'auto'},
                    style_cell={
                         'textAlign': 'left',
                    #     'whiteSpace': 'normal',
                    #     'height': 'auto',
                    #     'minWidth': '50px', 'width': '180px', 'maxWidth': '180px',
                    },
                    page_action="native",
                    page_current= 0,
                    page_size= 10,
                )
            )
        ])
    
])

# # Create a function to generate pie charts
# def generate_pie_chart(category):
#     labels = data[category]['labels']
#     values = data[category]['values']
#     trace = go.Pie(labels=labels, values=values)
#     layout = go.Layout(title=f'Pie Chart - {category}')
#     return dcc.Graph(
#         figure={
#             'data': [trace],
#             'layout': layout
#         }
#     )
    
# callback decorator
@app.callback(
    Output('line-graph-1', 'figure'),
    Output('bar-graph-1','figure'),
    Output('pie-graph-1', 'figure'),
    Output('line-graph-2', 'figure'),
    Output('pie-graph-2', 'figure'),
    Output('headlines-table', 'data'),
    Input("topic-selector", "value"),
    Input("domain-selector", "value"),
    Input('date-range', 'start_date'),
    Input('date-range', 'end_date')
)
def update_output(selected_topic, selected_domain, start_date, end_date):
    #log
    print("topic:",selected_topic,"domain:",selected_domain,"start:", start_date,"end:", end_date,"\n\n")

    # This is a hack to filter dates to confine to respective topic boundaries
    min_topic_date  = df[df["Topic"] == selected_topic]["date"].min()
    max_topic_date = df[df["Topic"] == selected_topic]["date"].max()
    
    print("min",min_topic_date,"max",max_topic_date)
    
    #if start visualisation from where the topic starts
    start_date = min_topic_date if (min_topic_date > convert_to_datetime(start_date)) else start_date
    end_date = max_topic_date if (max_topic_date < convert_to_datetime(end_date)) else  end_date
    
    print("After: Sd",start_date,"Ed",end_date)
    
    # filter dataframes based on updated data range
    mask_1 = ((df["Topic"] == selected_topic) & (df['date'] >= start_date) & (df['date'] <= end_date))
    df_filtered = df.loc[mask_1]
    # print(df_filtered.shape, df.columns)
    if len(df_filtered)>0:
        #create line graphs based on filtered dataframes
        line_fig_1 = px.line(df_filtered, x="date", y="normalised results",
                         color='Veículos de notícias', title="O gráfico mostra a evolução temporal de sentimento dos títulos de notícias <br> Numa escala de -1 (negativo) a 1 (positivo), sendo 0 (neutro)")
    
        # Veículos de notícias
        #set x-axis title and y-axis title in line graphs
        line_fig_1.update_layout(
                        xaxis_title='Data',
                        yaxis_title='Classificação de Sentimento',
                        title_x=0.5
                        # font=dict(
                        # family="Courier New, monospace",
                        # size=18,  # Set the font size here
                        # color="RebeccaPurple"
                        # )
        )
    
        #set label format on y-axis in line graphs
        line_fig_1.update_xaxes(tickformat="%b %d<br>%Y")
    
        # Bar Graph start
        # Convert 'period' column to datetime
        # df_filtered['period'] = pd.to_datetime(df_filtered['date'], format='%m/%Y')
        df_filtered['period'] = pd.to_datetime(df_filtered['date']).to_numpy().astype('datetime64[M]')
        
        grouped_df = df_filtered.groupby(['period', 'Veículos de notícias']).size().reset_index(name='occurrences')
        
        # Sort DataFrame by 'period' column
        grouped_df = grouped_df.sort_values(by='period')
        
        # Create a list of all unique media
        all_media = df_filtered['domain_folder_name'].unique()
        
        # Create a date range from Jan/2000 to the last month in the dataset
        date_range = pd.date_range(start=df_filtered['date'].min(), end=df_filtered['date'].max(), freq='MS')
        # date_range = pd.date_range(start="2000-01-01", end=df_filtered['date'].max(), freq='MS')
        
        # Create a MultiIndex with all combinations of date_range and all_media
        idx = pd.MultiIndex.from_product([date_range, all_media], names=['period', 'Veículos de notícias'])
        
        # Reindex the DataFrame to include all periods and media
        grouped_df = grouped_df.set_index(['period', 'Veículos de notícias']).reindex(idx, fill_value=0).reset_index()
        
        # print(grouped_df.shape)
        bar_fig_1 = px.bar(grouped_df, x='period', y='occurrences', color='Veículos de notícias',
                 labels={'period': 'Período', 'occurrences': 'Número de notícias', 'Veículos de notícias': 'Portal'},
                 title='Número de notícias por período de tempo')
        bar_fig_1.update_layout(title_x=0.5)
        # bar_fig_1.update_xaxes(tickformat="%b %d<br>%Y")
        # Bar Graph ends
                           
        # line-fig 2 starts
        # filter dataframes based on updated data range
        # Filtering data...
        df_filtered_2 = counts[(counts['Topic'] == selected_topic) &
                             (counts['domain_folder_name'] == selected_domain) &
                             (counts['date'] >= start_date) &
                             (counts['date'] <= end_date)]
    
        # Create a date range for the selected period
        date_range = pd.date_range(start=start_date, end=end_date)
    
        # Create a DataFrame with all possible combinations of classes, topics, and dates
        all_combinations = pd.MultiIndex.from_product([['positivo', 'neutro', 'negativo'],
                                                       [selected_topic],
                                                       [selected_domain],
                                                       date_range],
                                                      names=['FinBERT_label', 'Topic', 'domain_folder_name', 'date'])
        df_all_combinations = pd.DataFrame(index=all_combinations).reset_index()
    
        # Merge filtered DataFrame with DataFrame of all combinations
        merged_df = pd.merge(df_all_combinations, df_filtered_2, on=['FinBERT_label', 'Topic', 'domain_folder_name', 'date'], how='left')
        
        # Map original labels to their translated versions
        label_translation = {'positive': 'positivo', 'neutral': 'neutro', 'negative': 'negativo'}
        
        # merged_df['FinBERT_label_transformed'] = merged_df['FinBERT_label'].map(label_translation)

        # Fill missing values with zeros
        merged_df['count'].fillna(0, inplace=True)
        merged_df['rolling_mean_counts'].fillna(0, inplace=True)

        # Define colors for each label
        label_colors = {'positivo': '#039a4d', 'neutro': '#3c03f4', 'negativo': '#ca3919'}
        
        # Create line graph...
        line_fig_2 = px.line(merged_df, x="date", y="count", color="FinBERT_label",
                       line_group="FinBERT_label", title="Sentimento ao longo do tempo",
                       labels={"count": "Número de notícias", "date": "Date"},
                             color_discrete_sequence=['#039a4d', '#3c03f4', '#ca3919'] #[label_colors[label] for label in all_combinations.index]
                            )
    
    
        # Update layout...
        line_fig_2.update_layout(xaxis_title='Date', yaxis_title='Número de artigos de notícias',
                               xaxis=dict(tickformat="%b %d<br>%Y"), legend_title="Etiqueta de sentimento",title_x=0.5)


        # line-fig 2 ends
        
        # df_filtered['FinBERT_label_transformed'] = df_filtered['FinBERT_label'].map(label_translation)
        
        # Group by FinBERT_label and count occurrences
        label_counts_all = df_filtered['FinBERT_label'].value_counts()
        
        # Calculate percentage of each label
        label_percentages_all = (label_counts_all / label_counts_all.sum()) * 100
        
        
        # Plot general pie chart
        pie_chart_1 = px.pie(
            values=label_percentages_all,
            names=label_percentages_all.index,
            title='Distribuição Geral',
            color_discrete_sequence=[label_colors[label] for label in label_percentages_all.index] #['#039a4d', '#3c03f4', '#ca3919']
        )
        pie_chart_1.update_layout(title_x=0.5)
        # Get unique media categories
        media_categories = df_filtered['Veículos de notícias'].unique()
        
        
        # Filter DataFrame for current media category
        media_df = df_filtered[df_filtered['Veículos de notícias'] == selected_domain]
    
        # Group by FinBERT_label and count occurrences
        label_counts = media_df['FinBERT_label'].value_counts()
    
        # Calculate percentage of each label
        label_percentages = (label_counts / label_counts.sum()) * 100
    
        # Plot pie chart
        pie_chart_2 = px.pie(
            values=label_percentages,
            names=label_percentages.index,
            title=f'Distribuição para {selected_domain}',
            color_discrete_sequence=[label_colors[label] for label in label_percentages.index]
        )
        pie_chart_2.update_layout(title_x=0.5)
        # pie_chart_2 = dcc.Graph(figure=fig)
        # pie_chart_2 = html.Div(fig,className='four columns')
       
        # Convert FinBERT_label to categorical for better sorting
        media_df['FinBERT_label'] = pd.Categorical(media_df['FinBERT_label'],
                                                      categories=['positivo', 'neutro', 'negativo'],
                                                      ordered=True)
        def f(row):
            return "[{0}]({1})".format(row["Headline"],row["url"])

        media_df["link"] = media_df.apply(f, axis=1)
        
        # Sort DataFrame by sentiment label and date
        data_table_1 = media_df.sort_values(by=['date', "FinBERT_label"])
        data_table_1['date'] = pd.to_datetime(data_table_1['date']).dt.strftime('%m-%d-%Y')
    
        return line_fig_1, bar_fig_1, pie_chart_1, line_fig_2, pie_chart_2, data_table_1.to_dict('records')
    else:
        return {'data': []},{'data': []} ,{'data': []} ,{'data': []} , {'data': []}, {'data': []}
    
    # return line_fig_1
    

# df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')


# app.layout = html.Div([
#     html.H1(children='Title of Dash App', style={'textAlign':'center'}),
#     dcc.Dropdown(df.country.unique(), 'Canada', id='dropdown-selection'),
#     dcc.Graph(id='graph-content')
# ])

# @callback(
#     Output('graph-content', 'figure'),
#     Input('dropdown-selection', 'value')
# )
# def update_graph(value):
#     dff = df[df.country==value]
#     return px.line(dff, x='year', y='pop')

# # Define callback function for updating the headlines table
# @app.callback(
#     Output('headlines-table', 'data'),
#     Input("topic-selector", "value"),
#     Input("domain-selector", "value"),
#     Input('date-range', 'start_date'),
#     Input('date-range', 'end_date')
# )
# def update_headlines_table(selected_topic, selected_domain, start_date, end_date):
#     # Filtering data...
# tab_content_2 =  dcc.Markdown('''

#     # Sobre o projeto


# ''')

# app.layout = html.Div(
#     [
#         dbc.Card(
#         [
#             dbc.CardHeader(
#                 dbc.Tabs(
#                     [
#                         dbc.Tab(label="SentDiário", tab_id="tab-1"),
#                         dbc.Tab(label="Sobre o projeto", tab_id="tab-2"),
#                     ],
#                     id="tabs",
#                     active_tab="tab-1",
#                 )
#             ),
#             dbc.CardBody(html.Div(id="content", className="card-text")),
#         ]
#         )
#     ]
# )

# @app.callback(Output("content", "children"), [Input("tabs", "active_tab")])
# def switch_tab(at):
#     if at == "tab-1":
#         return tab_content_1
#     elif at == "tab-2":
#         return tab_content_2
#     return html.P("This shouldn't ever be displayed...")
    
if __name__ == '__main__':
    app.run_server(debug=True)