mlo0ollm commited on
Commit
42d4b45
1 Parent(s): b006e19
Files changed (2) hide show
  1. app.py +143 -0
  2. pandasai_tool.py +47 -0
app.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pandasai_tool import pandas_ai_res
3
+ import time
4
+
5
+ # import csv or xlsx file and use input as df
6
+ # pandas_ai_res function will return resultr
7
+ with gr.Blocks() as demo:
8
+ gr.Label("🗣말로 하는 데이터 분석")
9
+ gr.Markdown("(안내) 데이터 파일을 업로드하고, 분석을 부탁하세요")
10
+ # uploaded_file = gr.File(label="파일 업로드")
11
+ # chatbot = gr.Chatbot()
12
+ # msg = gr.Textbox()
13
+ # clear = gr.ClearButton([msg, chatbot], label="채팅 기록 삭제")
14
+ #
15
+ # def respond(file, message, chat_history):
16
+ # bot_message = pandas_ai_res(file, message)
17
+ # chat_history.append((message, bot_message))
18
+ # # time.sleep(2)
19
+ # return "", chat_history
20
+ #
21
+ # msg.submit(respond, [uploaded_file, msg, chatbot], [msg, chatbot])
22
+
23
+ gr.Interface(
24
+ fn=pandas_ai_res,
25
+ inputs=["file", "text"],
26
+ outputs="text"
27
+ )
28
+
29
+ if __name__ == "__main__":
30
+ demo.launch(share=True)
31
+
32
+ #
33
+ # import gradio as gr
34
+ # import pandas as pd
35
+ # from huggingface_hub.hf_api import create_repo, upload_file, HfApi
36
+ # from huggingface_hub.repository import Repository
37
+ # import subprocess
38
+ # import os
39
+ # import tempfile
40
+ #
41
+ #
42
+ # import sweetviz as sv
43
+ #
44
+ #
45
+ # def analyze_datasets(dataset, dataset_name, token, column=None, pairwise="off"):
46
+ # df = pd.read_csv(dataset.name)
47
+ # username = HfApi().whoami(token=token)["name"]
48
+ # if column is not None:
49
+ # analyze_report = sv.analyze(df, target_feat=column, pairwise_analysis=pairwise)
50
+ # else:
51
+ # analyze_report = sv.analyze(df, pairwise_analysis=pairwise)
52
+ # analyze_report.show_html('./index.html', open_browser=False)
53
+ # repo_url = create_repo(f"{username}/{dataset_name}", repo_type="space", token=token, space_sdk="static",
54
+ # private=False)
55
+ #
56
+ # upload_file(path_or_fileobj="./index.html", path_in_repo="index.html", repo_id=f"{username}/{dataset_name}",
57
+ # repo_type="space", token=token)
58
+ # readme = f"---\ntitle: {dataset_name}\nemoji: ✨\ncolorFrom: green\ncolorTo: red\nsdk: static\npinned: false\ntags:\n- dataset-report\n---"
59
+ # with open("README.md", "w+") as f:
60
+ # f.write(readme)
61
+ # upload_file(path_or_fileobj="./README.md", path_in_repo="README.md", repo_id=f"{username}/{dataset_name}",
62
+ # repo_type="space", token=token)
63
+ #
64
+ # return f"Your dataset report will be ready at {repo_url}"
65
+ #
66
+ #
67
+ #
68
+ #
69
+ #
70
+ #
71
+ # with gr.Blocks() as demo:
72
+ # main_title = gr.Markdown("""# Easy Analysis🪄🌟✨""")
73
+ # main_desc = gr.Markdown(
74
+ # """This app enables you to run three type of dataset analysis and pushes the interactive reports to your Hugging Face Hub profile as a Space. It uses SweetViz in the back.""")
75
+ # with gr.Tabs():
76
+ # with gr.TabItem("Analyze") as analyze:
77
+ # with gr.Row():
78
+ # with gr.Column():
79
+ # title = gr.Markdown(""" ## Analyze Dataset """)
80
+ # description = gr.Markdown(
81
+ # "Analyze a dataset or predictive variables against a target variable in a dataset (enter a column name to column section if you want to compare against target value). You can also do pairwise analysis, but it has quadratic complexity.")
82
+ # dataset = gr.File(label="Dataset")
83
+ # column = gr.Text(label="Compare dataset against a target variable (Optional)")
84
+ # pairwise = gr.Radio(["off", "on"], label="Enable pairwise analysis")
85
+ # token = gr.Textbox(label="Your Hugging Face Token")
86
+ # dataset_name = gr.Textbox(label="Dataset Name")
87
+ # pushing_desc = gr.Markdown(
88
+ # "This app needs your Hugging Face Hub token and a unique name for your dataset report.")
89
+ # inference_run = gr.Button("Infer")
90
+ # inference_progress = gr.StatusTracker(cover_container=True)
91
+ # outcome = gr.outputs.Textbox()
92
+ # inference_run.click(
93
+ # analyze_datasets,
94
+ # inputs=[dataset, dataset_name, token, column, pairwise],
95
+ # outputs=outcome,
96
+ # status_tracker=inference_progress,
97
+ # )
98
+ # with gr.TabItem("Compare Splits") as compare_splits:
99
+ # with gr.Row():
100
+ # with gr.Column():
101
+ # title = gr.Markdown(""" ## Compare Splits""")
102
+ # description = gr.Markdown(
103
+ # "Split a dataset and compare splits. You need to give a fraction, e.g. 0.8.")
104
+ # dataset = gr.File(label="Dataset")
105
+ # split_ratio = gr.Number(label="Split Ratios")
106
+ # pushing_desc = gr.Markdown(
107
+ # "This app needs your Hugging Face Hub token and a unique name for your dataset report.")
108
+ # token = gr.Textbox(label="Your Hugging Face Token")
109
+ # dataset_name = gr.Textbox(label="Dataset Name")
110
+ # inference_run = gr.Button("Infer")
111
+ # inference_progress = gr.StatusTracker(cover_container=True)
112
+ #
113
+ # outcome = gr.outputs.Textbox()
114
+ # inference_run.click(
115
+ # compare_dataset_splits,
116
+ # inputs=[dataset, dataset_name, token, split_ratio],
117
+ # outputs=outcome,
118
+ # status_tracker=inference_progress,
119
+ # )
120
+ #
121
+ # with gr.TabItem("Compare Subsets") as compare_subsets:
122
+ # with gr.Row():
123
+ # with gr.Column():
124
+ # title = gr.Markdown(""" ## Compare Subsets""")
125
+ # description = gr.Markdown(
126
+ # "Compare subsets of a dataset, e.g. you can pick Age Group column and compare adult category against young.")
127
+ # dataset = gr.File(label="Dataset")
128
+ # column = gr.Text(label="Enter column:")
129
+ # category = gr.Text(label="Enter category:")
130
+ # pushing_desc = gr.Markdown(
131
+ # "This app needs your Hugging Face Hub token and a unique name for your dataset report.")
132
+ # token = gr.Textbox(label="Your Hugging Face Token")
133
+ # dataset_name = gr.Textbox(label="Dataset Name")
134
+ # inference_run = gr.Button("Run Analysis")
135
+ # inference_progress = gr.StatusTracker(cover_container=True)
136
+ #
137
+ # outcome = gr.outputs.Textbox()
138
+ # inference_run.click(
139
+ # compare_column_values,
140
+ # inputs=[dataset, dataset_name, token, column, category],
141
+ # outputs=outcome,
142
+ # status_tracker=inference_progress,
143
+ # )
pandasai_tool.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pandasai import SmartDataframe
2
+ import pandas as pd
3
+ from pandasai.llm.google_gemini import GoogleGemini
4
+
5
+ # df = pd.DataFrame({
6
+ # "country": [
7
+ # "United States",
8
+ # "United Kingdom",
9
+ # "France",
10
+ # "Germany",
11
+ # "Italy",
12
+ # "Spain",
13
+ # "Canada",
14
+ # "Australia",
15
+ # "Japan",
16
+ # "China",
17
+ # ],
18
+ # "gdp": [
19
+ # 19294482071552,
20
+ # 2891615567872,
21
+ # 2411255037952,
22
+ # 3435817336832,
23
+ # 1745433788416,
24
+ # 1181205135360,
25
+ # 1607402389504,
26
+ # 1490967855104,
27
+ # 4380756541440,
28
+ # 14631844184064,
29
+ # ],
30
+ # "happiness_index": [6.94, 7.16, 6.66, 7.07, 6.38, 6.4, 7.23, 7.22, 5.87, 5.12],
31
+ # })
32
+ llm = GoogleGemini(api_key="AIzaSyCW-TP3IlbbdQmp_nDMEEaip0uVcK9lbgA")
33
+
34
+ def pandas_ai_res(file, input_text, llm=llm):
35
+ # check if the file is csv or xlsx
36
+ if file.name.endswith(".csv"):
37
+ df = pd.read_csv(file.name)
38
+ elif file.name.endswith(".xlsx"):
39
+ df = pd.read_excel(file.name)
40
+
41
+ sdf = SmartDataframe(df, config={"llm": llm})
42
+
43
+ return sdf.chat(input_text)
44
+
45
+
46
+ # sdf = SmartDataframe(df, config={"llm": llm})
47
+ # sdf.chat("Return the top 5 countries by GDP")