Spaces:
Sleeping
Sleeping
so it begins...
Browse files- .gitignore +11 -0
- app.py +84 -0
- requirements.txt +5 -0
.gitignore
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.Rproj.user
|
2 |
+
.Rhistory
|
3 |
+
.RData
|
4 |
+
.Ruserdata
|
5 |
+
.ipynb_checkpoints
|
6 |
+
*.Rproj
|
7 |
+
*.duckdb
|
8 |
+
*.wal
|
9 |
+
*.vrt
|
10 |
+
.streamlit
|
11 |
+
__pycache__
|
app.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Streamlit app boilerplate
|
2 |
+
import streamlit as st
|
3 |
+
st.title("SQL EFI demo")
|
4 |
+
|
5 |
+
'''
|
6 |
+
A minimal demonstration using chatbots to generate & execute SQL queries run against arbitrary parquet data.
|
7 |
+
|
8 |
+
'''
|
9 |
+
|
10 |
+
|
11 |
+
# Optional: let the user select which LLM they want to use, including self-hosed open models via Ollama
|
12 |
+
from langchain_openai import ChatOpenAI
|
13 |
+
from langchain_community.llms import Ollama
|
14 |
+
models = {
|
15 |
+
"chatgpt3.5": ChatOpenAI(model="gpt-3.5-turbo", temperature=0, api_key=st.secrets["OPENAI_API_KEY"]),
|
16 |
+
"chatgpt-4o": ChatOpenAI(model="gpt-4o", temperature=0, api_key=st.secrets["OPENAI_API_KEY"]),
|
17 |
+
"duckdb-nsql": Ollama(model="duckdb-nsql", temperature=0),
|
18 |
+
"sqlcoder": Ollama(model="mannix/defog-llama3-sqlcoder-8b", temperature=0),
|
19 |
+
"codegemma": Ollama(model="codegemma", temperature=0),
|
20 |
+
"llama3": Ollama(model="llama3", temperature=0),
|
21 |
+
}
|
22 |
+
modes = {
|
23 |
+
"stream data": "view",
|
24 |
+
"download first": "table",
|
25 |
+
}
|
26 |
+
|
27 |
+
with st.sidebar:
|
28 |
+
parquet = st.text_input("parquet url", "https://data.source.coop/cboettig/obis/obis_20240625.parquet")
|
29 |
+
|
30 |
+
choice = st.radio("Select an LLM:", models)
|
31 |
+
llm = models[choice]
|
32 |
+
mode = modes[ st.radio("Set mode", modes) ]
|
33 |
+
|
34 |
+
#import re
|
35 |
+
#tablename = re.sub(r'^.*/|\.([^.]*)$', '', parquet)
|
36 |
+
tablename = "database"
|
37 |
+
|
38 |
+
|
39 |
+
# Initialize a duckdb database connection in langchain:
|
40 |
+
from langchain_community.utilities import SQLDatabase
|
41 |
+
db = SQLDatabase.from_uri("duckdb:///tmp.duckdb", view_support=True)
|
42 |
+
|
43 |
+
# We now have a standard connection to the duckdb SQL engine.
|
44 |
+
# We can execute arbitrary commands, like enable duckdb extensions.
|
45 |
+
db.run("install spatial; load spatial;")
|
46 |
+
|
47 |
+
# Create the connection to the parquet file(s) without downloading:
|
48 |
+
# Change "view" to to "table" to create a (temporary) local copy instead,
|
49 |
+
# triggering a slow initial download but maybe faster queries.
|
50 |
+
db.run(f"create or replace {mode} {tablename}_{mode} as select * from read_parquet('{parquet}');")
|
51 |
+
|
52 |
+
# Test if table is avialable:
|
53 |
+
# print(db.get_usable_table_names()) # confirm table is available
|
54 |
+
|
55 |
+
|
56 |
+
## A SQL Chain
|
57 |
+
from langchain.chains import create_sql_query_chain
|
58 |
+
chain = create_sql_query_chain(llm, db)
|
59 |
+
|
60 |
+
chatbox = st.container()
|
61 |
+
additional_advice = ". Do not use LIMIT in the query unless I explicitly ask for a reduced sample."
|
62 |
+
|
63 |
+
import pandas as pd
|
64 |
+
import ast
|
65 |
+
|
66 |
+
with chatbox:
|
67 |
+
if prompt := st.chat_input(key="chain"):
|
68 |
+
st.chat_message("user").write(prompt)
|
69 |
+
with st.chat_message("assistant"):
|
70 |
+
response = chain.invoke({"question": prompt + additional_advice})
|
71 |
+
st.write(response)
|
72 |
+
result = db.run(response, fetch="all", include_columns=True)
|
73 |
+
df = pd.DataFrame(ast.literal_eval(result))
|
74 |
+
st.dataframe(df)
|
75 |
+
|
76 |
+
st.divider()
|
77 |
+
|
78 |
+
'''
|
79 |
+
|
80 |
+
## Credits
|
81 |
+
|
82 |
+
DRAFT. Open Source Software developed at UC Berkeley.
|
83 |
+
|
84 |
+
'''
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
duckdb-engine
|
2 |
+
langchain
|
3 |
+
langchain-community
|
4 |
+
langchain-openai
|
5 |
+
streamlit
|