cboettig commited on
Commit
b079bc8
1 Parent(s): b4490b6

so it begins...

Browse files
Files changed (3) hide show
  1. .gitignore +11 -0
  2. app.py +84 -0
  3. requirements.txt +5 -0
.gitignore ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .Rproj.user
2
+ .Rhistory
3
+ .RData
4
+ .Ruserdata
5
+ .ipynb_checkpoints
6
+ *.Rproj
7
+ *.duckdb
8
+ *.wal
9
+ *.vrt
10
+ .streamlit
11
+ __pycache__
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Streamlit app boilerplate
2
+ import streamlit as st
3
+ st.title("SQL EFI demo")
4
+
5
+ '''
6
+ A minimal demonstration using chatbots to generate & execute SQL queries run against arbitrary parquet data.
7
+
8
+ '''
9
+
10
+
11
+ # Optional: let the user select which LLM they want to use, including self-hosed open models via Ollama
12
+ from langchain_openai import ChatOpenAI
13
+ from langchain_community.llms import Ollama
14
+ models = {
15
+ "chatgpt3.5": ChatOpenAI(model="gpt-3.5-turbo", temperature=0, api_key=st.secrets["OPENAI_API_KEY"]),
16
+ "chatgpt-4o": ChatOpenAI(model="gpt-4o", temperature=0, api_key=st.secrets["OPENAI_API_KEY"]),
17
+ "duckdb-nsql": Ollama(model="duckdb-nsql", temperature=0),
18
+ "sqlcoder": Ollama(model="mannix/defog-llama3-sqlcoder-8b", temperature=0),
19
+ "codegemma": Ollama(model="codegemma", temperature=0),
20
+ "llama3": Ollama(model="llama3", temperature=0),
21
+ }
22
+ modes = {
23
+ "stream data": "view",
24
+ "download first": "table",
25
+ }
26
+
27
+ with st.sidebar:
28
+ parquet = st.text_input("parquet url", "https://data.source.coop/cboettig/obis/obis_20240625.parquet")
29
+
30
+ choice = st.radio("Select an LLM:", models)
31
+ llm = models[choice]
32
+ mode = modes[ st.radio("Set mode", modes) ]
33
+
34
+ #import re
35
+ #tablename = re.sub(r'^.*/|\.([^.]*)$', '', parquet)
36
+ tablename = "database"
37
+
38
+
39
+ # Initialize a duckdb database connection in langchain:
40
+ from langchain_community.utilities import SQLDatabase
41
+ db = SQLDatabase.from_uri("duckdb:///tmp.duckdb", view_support=True)
42
+
43
+ # We now have a standard connection to the duckdb SQL engine.
44
+ # We can execute arbitrary commands, like enable duckdb extensions.
45
+ db.run("install spatial; load spatial;")
46
+
47
+ # Create the connection to the parquet file(s) without downloading:
48
+ # Change "view" to to "table" to create a (temporary) local copy instead,
49
+ # triggering a slow initial download but maybe faster queries.
50
+ db.run(f"create or replace {mode} {tablename}_{mode} as select * from read_parquet('{parquet}');")
51
+
52
+ # Test if table is avialable:
53
+ # print(db.get_usable_table_names()) # confirm table is available
54
+
55
+
56
+ ## A SQL Chain
57
+ from langchain.chains import create_sql_query_chain
58
+ chain = create_sql_query_chain(llm, db)
59
+
60
+ chatbox = st.container()
61
+ additional_advice = ". Do not use LIMIT in the query unless I explicitly ask for a reduced sample."
62
+
63
+ import pandas as pd
64
+ import ast
65
+
66
+ with chatbox:
67
+ if prompt := st.chat_input(key="chain"):
68
+ st.chat_message("user").write(prompt)
69
+ with st.chat_message("assistant"):
70
+ response = chain.invoke({"question": prompt + additional_advice})
71
+ st.write(response)
72
+ result = db.run(response, fetch="all", include_columns=True)
73
+ df = pd.DataFrame(ast.literal_eval(result))
74
+ st.dataframe(df)
75
+
76
+ st.divider()
77
+
78
+ '''
79
+
80
+ ## Credits
81
+
82
+ DRAFT. Open Source Software developed at UC Berkeley.
83
+
84
+ '''
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ duckdb-engine
2
+ langchain
3
+ langchain-community
4
+ langchain-openai
5
+ streamlit