|
import subprocess |
|
|
|
subprocess.run(["pip", "uninstall", "pdfminer"]) |
|
subprocess.run(["pip", "install", "pdfminer.six==20231228"]) |
|
|
|
|
|
|
|
import gradio as gr |
|
from scrape_3gpp import * |
|
from excel_chat import * |
|
from split_files_to_excel import * |
|
from classification import * |
|
from chart_generation import * |
|
from charts_advanced import * |
|
from users_management import * |
|
from code_df_custom import * |
|
|
|
|
|
global value |
|
value = set() |
|
|
|
def list_attributes_and_values(): |
|
global value |
|
attr = 'temp_files' |
|
new_value = getattr(fi_config, attr) |
|
print(f"value: {value}\nnew value: {new_value}") |
|
tmp = list(new_value - value)[0] |
|
value = set(new_value) |
|
html_script = f""" |
|
<!DOCTYPE html> |
|
<html lang="en"> |
|
<head> |
|
<meta charset="UTF-8"> |
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
<meta http-equiv="refresh" content="0; url=https://organizedprogrammers-standard-intelligence-dev.hf.space/file={tmp}"> |
|
<title>Redirecting to Google</title> |
|
</head> |
|
<body> |
|
<p>If you are not redirected automatically, please <a href="https://organizedprogrammers-standard-intelligence-dev.hf.space/file={tmp}">click here</a>.</p> |
|
</body> |
|
</html> |
|
""" |
|
return html_script |
|
|
|
def retrieve_checkpoint(user): |
|
return user["save_name"] |
|
|
|
with gr.Blocks() as demo: |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
gr.Markdown("## Extraction, Classification and AI tool") |
|
with gr.Column(): |
|
md_username = gr.Markdown(value='## Hi Guest!') |
|
btn_logout = gr.Button("Logout") |
|
with gr.Accordion(label="**Login** to keep user preferences", open=False): |
|
st_user = gr.State(value={"name":"Guest", "hashed_password":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", "history": { "keywords": [ "value1", "value3", "value4"], "prompts": [] }, "save_name" : "7783002652599471029.xlsx"}) |
|
with gr.Column(): |
|
tb_user = gr.Textbox(label='Username') |
|
tb_pwd = gr.Textbox(label='Password', type='password') |
|
with gr.Row(): |
|
btn_login = gr.Button('Login') |
|
|
|
with gr.Tab("File extraction"): |
|
gr.Markdown("### This part aims to extract the most relevant content and information about every contribution from a 3gpp meeting") |
|
gr.Markdown(" Put either just a link, or a link and an excel file with an 'Actions' column") |
|
with gr.Row(): |
|
dd_url = gr.Dropdown(label="(e.g. https://www.3gpp.org/ftp/TSG_SA/WG1_Serv/TSGS1_105_Athens/Docs)", multiselect=False, value="https://www.3gpp.org/ftp/", allow_custom_value=True, scale=9) |
|
btn_search = gr.Button("Search") |
|
with gr.Accordion("Filter by file status", open=False): |
|
with gr.Row(): |
|
dd_status = gr.Dropdown(label="Status to look for (Optional)", allow_custom_value=False, multiselect=True, scale=7) |
|
btn_search_status = gr.Button("Search for status", scale=2) |
|
btn_extract = gr.Button("Extract excel from URL") |
|
|
|
|
|
with gr.Tab("Split Files"): |
|
gr.Markdown("### Upload your standard documentation (pdf, doc, docx) to split it into paragraphs in an Excel file") |
|
radio = gr.Radio( |
|
["Intelligent split", "Intelligent split by keywords", "Non intelligent split"], label="Choose your selection", value = "Intelligent split" |
|
) |
|
dropdown_split = gr.Dropdown(["introduction", "objective", "conclusion", "summary"], multiselect=True, visible=False, allow_custom_value=True, label="Select or add keywords") |
|
nb_split = gr.Number(label="First pages to keep (0 for all)", value=2, interactive=True, visible=True) |
|
|
|
fi_input = gr.File(file_count='multiple') |
|
btn_split = gr.Button("Split") |
|
|
|
with gr.Tab("Ask LLM"): |
|
gr.Markdown("### This section utilizes Large Language Models (LLMs) to query rows in an Excel file") |
|
dd_source_ask = gr.Dropdown(label="Source Column(s)", multiselect=True) |
|
tb_destcol = gr.Textbox(label="Destination column label (e.g. Summary, ELI5, PAB)") |
|
dd_prompt = gr.Dropdown(label="Prompt", allow_custom_value=True, multiselect=True, max_choices=1) |
|
dd_llm = gr.Dropdown(["Mistral Tiny","Mistral Small","Mistral Medium", "Claude Sonnet", "Claude Opus", "GPT 4o" , "Groq Mixtral", "Groq Llama3 70b", "Groq Llama3 8b", "Perplexity Llama3 70b", "Perplexity Llama3 8b", "Perplexity Llama3 Sonar Small", "Perplexity Llama3 Sonar Large"],value="Groq Llama3 70b", label="Choose your LLM") |
|
with gr.Accordion("Filters", open=False): |
|
with gr.Row(): |
|
dd_searchcol = gr.Dropdown(label="Column to look into (Optional)", value='[ALL]', multiselect=False, scale=4) |
|
dd_keywords = gr.Dropdown(label="Words to look for (Optional)", multiselect=True, allow_custom_value=True, scale=5) |
|
mist_button = gr.Button("Ask AI") |
|
fi_checkpoint = gr.File() |
|
btn_checkpoint = gr.Button("Retrieve checkpoint file") |
|
|
|
with gr.Tab("Classification by topic"): |
|
gr.Markdown("### This section will categories each contribution in your own personalized categories") |
|
with gr.Row(): |
|
dd_source_class = gr.Dropdown(label="Source Column", multiselect=False, scale=7) |
|
sl_treshold = gr.Slider(minimum=0, maximum=1, value=0.45, step=0.05, label='Similarity Treshold') |
|
gr.Markdown("### The predefined categories can be modified at any time") |
|
|
|
dd_filter = gr.Dropdown(choices=df_cat_filter, label = "Choose your filters here", multiselect=True, allow_custom_value=True) |
|
|
|
btn_filter = gr.Button("Filter") |
|
|
|
df_category = gr.DataFrame(label='categories', value=df_cate, interactive=True) |
|
df_category_hidden = gr.DataFrame(value=df_cate, visible=False) |
|
|
|
with gr.Row(): |
|
btn_dl_cate = gr.Button('Download Categories', scale=1) |
|
fi_categories = gr.File(visible=False, scale=9) |
|
with gr.Row(): |
|
btn_reset_df = gr.Button("Reset categories") |
|
btn_classif = gr.Button("Categorize") |
|
btn_add_categories = gr.Button("Add categories") |
|
|
|
|
|
with gr.Tab(" Personalised Charts Generation"): |
|
gr.Markdown("### This section will create a chart using two columns of your choice") |
|
with gr.Row(): |
|
dd_label1 = gr.Dropdown(label="Label 1", multiselect=False) |
|
dd_label2 = gr.Dropdown(label="Label 2", value="", multiselect=False) |
|
btn_chart = gr.Button("Generate Bar Plot") |
|
plt_figure = gr.Plot() |
|
|
|
with gr.Tab("Meeting Report (charts)"): |
|
gr.Markdown("### This section will create a report using multiple charts with your columns") |
|
gr.Markdown("Make sure you have an 'Expert', 'Source' and 'Status' column") |
|
with gr.Tab("Overall"): |
|
btn_overall = gr.Button("Overall Review") |
|
with gr.Tab("By Expert"): |
|
dd_exp=gr.Dropdown(label="Experts", multiselect=False, allow_custom_value=True,) |
|
btn_expert = gr.Button("Top 10 by expert") |
|
with gr.Tab("By Company"): |
|
tb_com=gr.Textbox(label="Company Name",info="You can write 1, 2 or 3 company names at the same time") |
|
btn_type = gr.Button("Company info") |
|
with gr.Row(): |
|
plt_chart = gr.Plot(label="Graphique") |
|
plt_chart2 = gr.Plot(label="Graphique") |
|
plt_chart3 = gr.Plot(label="Graphique") |
|
|
|
with gr.Tab("Code on your file"): |
|
gr.Markdown("### This section lets you add your own code to add functions and filters to edit the files") |
|
with gr.Accordion("Input DataFrame Preview", open=False): |
|
df_input = gr.DataFrame(interactive=False) |
|
gr.Markdown("```python\ndf = pd.read_excel(YOUR_FILE)\n```") |
|
cd_code = gr.Code(value="# Create a copy of the original DataFrame\nnew_df = df.copy()\n\n# Add a new column to the copy\nnew_df['NewColumn'] = 'New Value'", language='python') |
|
gr.Markdown("```python\nnew_df.to_excel(YOUR_NEW_FILE)\nreturn YOUR_NEW_FILE\n```") |
|
|
|
btn_run_code = gr.Button() |
|
error_display = gr.Markdown() |
|
df_output_code = gr.DataFrame(interactive=False) |
|
btn_export_df = gr.Button('Export df as excel') |
|
st_filename = gr.State() |
|
|
|
with gr.Accordion("Excel Preview", open=False): |
|
df_output = gr.DataFrame() |
|
fi_excel = gr.File(label="Excel File") |
|
|
|
ht_dl = gr.HTML() |
|
global fi_config |
|
fi_config = gr.File(type='binary', visible=False) |
|
|
|
|
|
btn_login.click(auth_user, inputs=[tb_user, tb_pwd], outputs=[st_user, md_username, dd_prompt, dd_keywords]) |
|
tb_pwd.submit(auth_user, inputs=[tb_user, tb_pwd], outputs=[st_user, md_username, dd_prompt, dd_keywords]) |
|
btn_logout.click(logout, inputs=None, outputs=[st_user, md_username, dd_prompt, dd_keywords]) |
|
|
|
|
|
btn_search_status.click(extract_statuses, inputs=dd_url, outputs=dd_status) |
|
btn_search.click(browse_folder, inputs=dd_url, outputs=dd_url) |
|
dd_url.change(browse_folder, inputs=dd_url, outputs=dd_url) |
|
|
|
fi_excel.change(get_columns, inputs=[fi_excel], outputs=[dd_source_ask, dd_source_class, dd_label1, dd_label2, dd_searchcol, df_output,st_filename, df_input]) |
|
|
|
btn_extract.click(extractionPrincipale, inputs=[dd_url, fi_excel, dd_status], outputs=[fi_excel]) |
|
|
|
|
|
|
|
|
|
fi_input.upload(function_split_call, inputs=[fi_input, dropdown_split, radio, nb_split], outputs=fi_excel) |
|
btn_split.click(function_split_call, inputs=[fi_input, dropdown_split, radio, nb_split], outputs=fi_excel) |
|
radio.change(fn=change_textbox, inputs=[dropdown_split,radio], outputs=[dropdown_split, nb_split]) |
|
|
|
|
|
mist_button.click(chat_with_mistral, inputs=[dd_source_ask, tb_destcol, dd_prompt, fi_excel, dd_url, dd_searchcol, dd_keywords, dd_llm, st_user], outputs=[fi_excel, df_output, dd_prompt, dd_keywords, st_user, fi_config]) |
|
btn_checkpoint.click(retrieve_checkpoint, inputs=st_user, outputs=fi_checkpoint) |
|
|
|
|
|
btn_classif.click(classification, inputs=[dd_source_class, fi_excel, df_category, sl_treshold], outputs=[fi_excel, df_output]) |
|
btn_reset_df.click(reset_cate, inputs=df_category, outputs=df_category) |
|
btn_filter.click(filter_by_topics, inputs=[dd_filter, df_category_hidden], outputs=df_category) |
|
btn_add_categories.click(add_categories, inputs=[df_category, df_category_hidden],outputs=[dd_filter,df_category_hidden]) |
|
btn_dl_cate.click(download_cate, inputs=df_category, outputs=fi_categories) |
|
|
|
|
|
btn_chart.click(create_bar_plot, inputs=[fi_excel, dd_label1, dd_label2], outputs=[plt_figure]) |
|
|
|
|
|
fi_config.change(list_attributes_and_values, inputs=None, outputs=ht_dl) |
|
|
|
btn_run_code.click(run_code, inputs=[fi_excel, cd_code], outputs=[df_output_code, error_display]) |
|
btn_export_df.click(export_df, inputs=[df_output_code, st_filename], outputs=fi_excel) |
|
|
|
|
|
btn_overall.click(generate_company_chart,inputs=[fi_excel], outputs=[plt_chart]) |
|
btn_overall.click(status_chart,inputs=[fi_excel], outputs=[plt_chart2]) |
|
btn_overall.click(category_chart,inputs=[fi_excel], outputs=[plt_chart3]) |
|
btn_expert.click(chart_by_expert,inputs=[fi_excel,dd_exp], outputs=[plt_chart]) |
|
btn_type.click(company_document_type,inputs=[fi_excel,tb_com], outputs=[plt_chart]) |
|
|
|
|
|
|
|
demo.launch(debug=True) |