Spaces:

boettiger-lab
/

langchain-demos

Sleeping

App Files Files Community

cboettig commited on Jul 24

Commit

b079bc8

•

1 Parent(s): b4490b6

so it begins...

Browse files

Files changed (3) hide show

.gitignore +11 -0
app.py +84 -0
requirements.txt +5 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,11 @@

+.Rproj.user
+.Rhistory
+.RData
+.Ruserdata
+.ipynb_checkpoints
+*.Rproj
+*.duckdb
+*.wal
+*.vrt
+.streamlit
+__pycache__

app.py ADDED Viewed

	@@ -0,0 +1,84 @@

+# Streamlit app boilerplate
+import streamlit as st
+st.title("SQL EFI demo")
+'''
+A minimal demonstration using chatbots to generate & execute SQL queries run against arbitrary parquet data.
+'''
+# Optional: let the user select which LLM they want to use, including self-hosed open models via Ollama
+from langchain_openai import ChatOpenAI
+from langchain_community.llms import Ollama
+models = {
+    "chatgpt3.5": ChatOpenAI(model="gpt-3.5-turbo", temperature=0, api_key=st.secrets["OPENAI_API_KEY"]),
+    "chatgpt-4o": ChatOpenAI(model="gpt-4o", temperature=0, api_key=st.secrets["OPENAI_API_KEY"]),
+    "duckdb-nsql": Ollama(model="duckdb-nsql", temperature=0),
+    "sqlcoder": Ollama(model="mannix/defog-llama3-sqlcoder-8b", temperature=0),
+    "codegemma":  Ollama(model="codegemma", temperature=0),
+    "llama3": Ollama(model="llama3", temperature=0),
+}
+modes = {
+    "stream data": "view",
+    "download first": "table",
+}
+with st.sidebar:
+    parquet = st.text_input("parquet url", "https://data.source.coop/cboettig/obis/obis_20240625.parquet")
+    choice = st.radio("Select an LLM:", models)
+    llm = models[choice]
+    mode = modes[ st.radio("Set mode", modes) ]
+#import re
+#tablename = re.sub(r'^.*/|\.([^.]*)$', '', parquet)
+tablename = "database"
+# Initialize a duckdb database connection in langchain:
+from langchain_community.utilities import SQLDatabase
+db = SQLDatabase.from_uri("duckdb:///tmp.duckdb", view_support=True)
+# We now have a standard connection to the duckdb SQL engine.
+# We can execute arbitrary commands, like enable duckdb extensions.
+db.run("install spatial; load spatial;")
+# Create the connection to the parquet file(s) without downloading:
+# Change "view" to to "table" to create a (temporary) local copy instead,
+# triggering a slow initial download but maybe faster queries.
+db.run(f"create or replace {mode} {tablename}_{mode} as select * from read_parquet('{parquet}');")
+# Test if table is avialable:
+# print(db.get_usable_table_names()) # confirm table is available
+## A SQL Chain
+from langchain.chains import create_sql_query_chain
+chain = create_sql_query_chain(llm, db)
+chatbox = st.container()
+additional_advice = ". Do not use LIMIT in the query unless I explicitly ask for a reduced sample."
+import pandas as pd
+import ast
+with chatbox:
+    if prompt := st.chat_input(key="chain"):
+        st.chat_message("user").write(prompt)
+        with st.chat_message("assistant"):
+            response = chain.invoke({"question": prompt + additional_advice})
+            st.write(response)
+            result = db.run(response, fetch="all", include_columns=True)
+            df = pd.DataFrame(ast.literal_eval(result))
+            st.dataframe(df)
+st.divider()
+'''
+## Credits
+DRAFT.  Open Source Software developed at UC Berkeley.
+'''

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+duckdb-engine
+langchain
+langchain-community
+langchain-openai
+streamlit