{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: mysql-connector-python in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (8.4.0)\n" ] } ], "source": [ "%pip install mysql-connector-python" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Reading package lists... Done\n", "Building dependency tree \n", "Reading state information... Done\n", "The following additional packages will be installed:\n", " libaio1 libcgi-fast-perl libcgi-pm-perl libencode-locale-perl libfcgi-perl\n", " libhtml-parser-perl libhtml-tagset-perl libhtml-template-perl\n", " libhttp-date-perl libhttp-message-perl libio-html-perl\n", " liblwp-mediatypes-perl libmecab2 libtimedate-perl liburi-perl mecab-ipadic\n", " mecab-ipadic-utf8 mecab-utils mysql-client-8.0 mysql-client-core-8.0\n", " mysql-common mysql-server-8.0 mysql-server-core-8.0 psmisc\n", "Suggested packages:\n", " libdata-dump-perl libipc-sharedcache-perl libwww-perl mailx tinyca\n", "The following NEW packages will be installed:\n", " libaio1 libcgi-fast-perl libcgi-pm-perl libencode-locale-perl libfcgi-perl\n", " libhtml-parser-perl libhtml-tagset-perl libhtml-template-perl\n", " libhttp-date-perl libhttp-message-perl libio-html-perl\n", " liblwp-mediatypes-perl libmecab2 libtimedate-perl liburi-perl mecab-ipadic\n", " mecab-ipadic-utf8 mecab-utils mysql-client-8.0 mysql-client-core-8.0\n", " mysql-common mysql-server mysql-server-8.0 mysql-server-core-8.0 psmisc\n", "0 upgraded, 25 newly installed, 0 to remove and 3 not upgraded.\n", "Need to get 36.9 MB of archives.\n", "After this operation, 318 MB of additional disk space will be used.\n", "Get:1 http://archive.ubuntu.com/ubuntu focal/main amd64 mysql-common all 5.8+1.0.5ubuntu2 [7496 B]\n", "Get:2 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 mysql-client-core-8.0 amd64 8.0.37-0ubuntu0.20.04.3 [5085 kB]\n", "Get:3 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 mysql-client-8.0 amd64 8.0.37-0ubuntu0.20.04.3 [22.0 kB]\n", "Get:4 http://archive.ubuntu.com/ubuntu focal/main amd64 libaio1 amd64 0.3.112-5 [7184 B]\n", "Get:5 http://archive.ubuntu.com/ubuntu focal/main amd64 libmecab2 amd64 0.996-10build1 [233 kB]\n", "Get:6 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 mysql-server-core-8.0 amd64 8.0.37-0ubuntu0.20.04.3 [22.8 MB]\n", "Get:7 http://archive.ubuntu.com/ubuntu focal/main amd64 psmisc amd64 23.3-1 [53.3 kB]\n", "Get:8 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 mysql-server-8.0 amd64 8.0.37-0ubuntu0.20.04.3 [1325 kB]\n", "Get:9 http://archive.ubuntu.com/ubuntu focal/main amd64 libhtml-tagset-perl all 3.20-4 [12.5 kB]\n", "Get:10 http://archive.ubuntu.com/ubuntu focal/main amd64 liburi-perl all 1.76-2 [77.5 kB]\n", "Get:11 http://archive.ubuntu.com/ubuntu focal/main amd64 libhtml-parser-perl amd64 3.72-5 [86.3 kB]\n", "Get:12 http://archive.ubuntu.com/ubuntu focal/main amd64 libcgi-pm-perl all 4.46-1 [186 kB]\n", "Get:13 http://archive.ubuntu.com/ubuntu focal/main amd64 libfcgi-perl amd64 0.79-1 [33.1 kB]\n", "Get:14 http://archive.ubuntu.com/ubuntu focal/main amd64 libcgi-fast-perl all 1:2.15-1 [10.5 kB]\n", "Get:15 http://archive.ubuntu.com/ubuntu focal/main amd64 libencode-locale-perl all 1.05-1 [12.3 kB]\n", "Get:16 http://archive.ubuntu.com/ubuntu focal/main amd64 libhtml-template-perl all 2.97-1 [59.0 kB]\n", "Get:17 http://archive.ubuntu.com/ubuntu focal/main amd64 libtimedate-perl all 2.3200-1 [34.0 kB]\n", "Get:18 http://archive.ubuntu.com/ubuntu focal/main amd64 libhttp-date-perl all 6.05-1 [9920 B]\n", "Get:19 http://archive.ubuntu.com/ubuntu focal/main amd64 libio-html-perl all 1.001-1 [14.9 kB]\n", "Get:20 http://archive.ubuntu.com/ubuntu focal/main amd64 liblwp-mediatypes-perl all 6.04-1 [19.5 kB]\n", "Get:21 http://archive.ubuntu.com/ubuntu focal/main amd64 libhttp-message-perl all 6.22-1 [76.1 kB]\n", "Get:22 http://archive.ubuntu.com/ubuntu focal/main amd64 mecab-utils amd64 0.996-10build1 [4912 B]\n", "Get:23 http://archive.ubuntu.com/ubuntu focal/main amd64 mecab-ipadic all 2.7.0-20070801+main-2.1 [6714 kB]\n", "Get:24 http://archive.ubuntu.com/ubuntu focal/main amd64 mecab-ipadic-utf8 all 2.7.0-20070801+main-2.1 [4380 B]\n", "Get:25 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 mysql-server all 8.0.37-0ubuntu0.20.04.3 [9476 B]\n", "Fetched 36.9 MB in 1s (63.5 MB/s) \n", "debconf: delaying package configuration, since apt-utils is not installed\n", "Selecting previously unselected package mysql-common.\n", "(Reading database ... 64485 files and directories currently installed.)\n", "Preparing to unpack .../0-mysql-common_5.8+1.0.5ubuntu2_all.deb ...\n", "Unpacking mysql-common (5.8+1.0.5ubuntu2) ...\n", "Selecting previously unselected package mysql-client-core-8.0.\n", "Preparing to unpack .../1-mysql-client-core-8.0_8.0.37-0ubuntu0.20.04.3_amd64.deb ...\n", "Unpacking mysql-client-core-8.0 (8.0.37-0ubuntu0.20.04.3) ...\n", "Selecting previously unselected package mysql-client-8.0.\n", "Preparing to unpack .../2-mysql-client-8.0_8.0.37-0ubuntu0.20.04.3_amd64.deb ...\n", "Unpacking mysql-client-8.0 (8.0.37-0ubuntu0.20.04.3) ...\n", "Selecting previously unselected package libaio1:amd64.\n", "Preparing to unpack .../3-libaio1_0.3.112-5_amd64.deb ...\n", "Unpacking libaio1:amd64 (0.3.112-5) ...\n", "Selecting previously unselected package libmecab2:amd64.\n", "Preparing to unpack .../4-libmecab2_0.996-10build1_amd64.deb ...\n", "Unpacking libmecab2:amd64 (0.996-10build1) ...\n", "Selecting previously unselected package mysql-server-core-8.0.\n", "Preparing to unpack .../5-mysql-server-core-8.0_8.0.37-0ubuntu0.20.04.3_amd64.deb ...\n", "Unpacking mysql-server-core-8.0 (8.0.37-0ubuntu0.20.04.3) ...\n", "Selecting previously unselected package psmisc.\n", "Preparing to unpack .../6-psmisc_23.3-1_amd64.deb ...\n", "Unpacking psmisc (23.3-1) ...\n", "Setting up mysql-common (5.8+1.0.5ubuntu2) ...\n", "update-alternatives: using /etc/mysql/my.cnf.fallback to provide /etc/mysql/my.cnf (my.cnf) in auto mode\n", "Selecting previously unselected package mysql-server-8.0.\n", "(Reading database ... 64718 files and directories currently installed.)\n", "Preparing to unpack .../00-mysql-server-8.0_8.0.37-0ubuntu0.20.04.3_amd64.deb ...\n", "debconf: unable to initialize frontend: Dialog\n", "debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 76.)\n", "debconf: falling back to frontend: Readline\n", "Unpacking mysql-server-8.0 (8.0.37-0ubuntu0.20.04.3) ...\n", "Selecting previously unselected package libhtml-tagset-perl.\n", "Preparing to unpack .../01-libhtml-tagset-perl_3.20-4_all.deb ...\n", "Unpacking libhtml-tagset-perl (3.20-4) ...\n", "Selecting previously unselected package liburi-perl.\n", "Preparing to unpack .../02-liburi-perl_1.76-2_all.deb ...\n", "Unpacking liburi-perl (1.76-2) ...\n", "Selecting previously unselected package libhtml-parser-perl.\n", "Preparing to unpack .../03-libhtml-parser-perl_3.72-5_amd64.deb ...\n", "Unpacking libhtml-parser-perl (3.72-5) ...\n", "Selecting previously unselected package libcgi-pm-perl.\n", "Preparing to unpack .../04-libcgi-pm-perl_4.46-1_all.deb ...\n", "Unpacking libcgi-pm-perl (4.46-1) ...\n", "Selecting previously unselected package libfcgi-perl.\n", "Preparing to unpack .../05-libfcgi-perl_0.79-1_amd64.deb ...\n", "Unpacking libfcgi-perl (0.79-1) ...\n", "Selecting previously unselected package libcgi-fast-perl.\n", "Preparing to unpack .../06-libcgi-fast-perl_1%3a2.15-1_all.deb ...\n", "Unpacking libcgi-fast-perl (1:2.15-1) ...\n", "Selecting previously unselected package libencode-locale-perl.\n", "Preparing to unpack .../07-libencode-locale-perl_1.05-1_all.deb ...\n", "Unpacking libencode-locale-perl (1.05-1) ...\n", "Selecting previously unselected package libhtml-template-perl.\n", "Preparing to unpack .../08-libhtml-template-perl_2.97-1_all.deb ...\n", "Unpacking libhtml-template-perl (2.97-1) ...\n", "Selecting previously unselected package libtimedate-perl.\n", "Preparing to unpack .../09-libtimedate-perl_2.3200-1_all.deb ...\n", "Unpacking libtimedate-perl (2.3200-1) ...\n", "Selecting previously unselected package libhttp-date-perl.\n", "Preparing to unpack .../10-libhttp-date-perl_6.05-1_all.deb ...\n", "Unpacking libhttp-date-perl (6.05-1) ...\n", "Selecting previously unselected package libio-html-perl.\n", "Preparing to unpack .../11-libio-html-perl_1.001-1_all.deb ...\n", "Unpacking libio-html-perl (1.001-1) ...\n", "Selecting previously unselected package liblwp-mediatypes-perl.\n", "Preparing to unpack .../12-liblwp-mediatypes-perl_6.04-1_all.deb ...\n", "Unpacking liblwp-mediatypes-perl (6.04-1) ...\n", "Selecting previously unselected package libhttp-message-perl.\n", "Preparing to unpack .../13-libhttp-message-perl_6.22-1_all.deb ...\n", "Unpacking libhttp-message-perl (6.22-1) ...\n", "Selecting previously unselected package mecab-utils.\n", "Preparing to unpack .../14-mecab-utils_0.996-10build1_amd64.deb ...\n", "Unpacking mecab-utils (0.996-10build1) ...\n", "Selecting previously unselected package mecab-ipadic.\n", "Preparing to unpack .../15-mecab-ipadic_2.7.0-20070801+main-2.1_all.deb ...\n", "Unpacking mecab-ipadic (2.7.0-20070801+main-2.1) ...\n", "Selecting previously unselected package mecab-ipadic-utf8.\n", "Preparing to unpack .../16-mecab-ipadic-utf8_2.7.0-20070801+main-2.1_all.deb ...\n", "Unpacking mecab-ipadic-utf8 (2.7.0-20070801+main-2.1) ...\n", "Selecting previously unselected package mysql-server.\n", "Preparing to unpack .../17-mysql-server_8.0.37-0ubuntu0.20.04.3_all.deb ...\n", "Unpacking mysql-server (8.0.37-0ubuntu0.20.04.3) ...\n", "Setting up libmecab2:amd64 (0.996-10build1) ...\n", "Setting up mysql-client-core-8.0 (8.0.37-0ubuntu0.20.04.3) ...\n", "Setting up psmisc (23.3-1) ...\n", "Setting up libhtml-tagset-perl (3.20-4) ...\n", "Setting up liblwp-mediatypes-perl (6.04-1) ...\n", "Setting up libencode-locale-perl (1.05-1) ...\n", "Setting up mecab-utils (0.996-10build1) ...\n", "Setting up libio-html-perl (1.001-1) ...\n", "Setting up libtimedate-perl (2.3200-1) ...\n", "Setting up mysql-client-8.0 (8.0.37-0ubuntu0.20.04.3) ...\n", "Setting up libfcgi-perl (0.79-1) ...\n", "Setting up libaio1:amd64 (0.3.112-5) ...\n", "Setting up liburi-perl (1.76-2) ...\n", "Setting up libhttp-date-perl (6.05-1) ...\n", "Setting up mecab-ipadic (2.7.0-20070801+main-2.1) ...\n", "Compiling IPA dictionary for Mecab. This takes long time...\n", "reading /usr/share/mecab/dic/ipadic/unk.def ... 40\n", "emitting double-array: 100% |###########################################| \n", "/usr/share/mecab/dic/ipadic/model.def is not found. skipped.\n", "reading /usr/share/mecab/dic/ipadic/Interjection.csv ... 252\n", "reading /usr/share/mecab/dic/ipadic/Postp-col.csv ... 91\n", "reading /usr/share/mecab/dic/ipadic/Noun.number.csv ... 42\n", "reading /usr/share/mecab/dic/ipadic/Noun.others.csv ... 151\n", "reading /usr/share/mecab/dic/ipadic/Noun.demonst.csv ... 120\n", "reading /usr/share/mecab/dic/ipadic/Suffix.csv ... 1393\n", "reading /usr/share/mecab/dic/ipadic/Postp.csv ... 146\n", "reading /usr/share/mecab/dic/ipadic/Adverb.csv ... 3032\n", "reading /usr/share/mecab/dic/ipadic/Prefix.csv ... 221\n", "reading /usr/share/mecab/dic/ipadic/Conjunction.csv ... 171\n", "reading /usr/share/mecab/dic/ipadic/Others.csv ... 2\n", "reading /usr/share/mecab/dic/ipadic/Noun.adverbal.csv ... 795\n", "reading /usr/share/mecab/dic/ipadic/Noun.proper.csv ... 27328\n", "reading /usr/share/mecab/dic/ipadic/Noun.nai.csv ... 42\n", "reading /usr/share/mecab/dic/ipadic/Verb.csv ... 130750\n", "reading /usr/share/mecab/dic/ipadic/Noun.adjv.csv ... 3328\n", "reading /usr/share/mecab/dic/ipadic/Noun.verbal.csv ... 12146\n", "reading /usr/share/mecab/dic/ipadic/Symbol.csv ... 208\n", "reading /usr/share/mecab/dic/ipadic/Auxil.csv ... 199\n", "reading /usr/share/mecab/dic/ipadic/Noun.name.csv ... 34202\n", "reading /usr/share/mecab/dic/ipadic/Adnominal.csv ... 135\n", "reading /usr/share/mecab/dic/ipadic/Noun.place.csv ... 72999\n", "reading /usr/share/mecab/dic/ipadic/Adj.csv ... 27210\n", "reading /usr/share/mecab/dic/ipadic/Filler.csv ... 19\n", "reading /usr/share/mecab/dic/ipadic/Noun.org.csv ... 16668\n", "reading /usr/share/mecab/dic/ipadic/Noun.csv ... 60477\n", "emitting double-array: 100% |###########################################| \n", "reading /usr/share/mecab/dic/ipadic/matrix.def ... 1316x1316\n", "emitting matrix : 100% |###########################################| \n", "\n", "done!\n", "update-alternatives: using /var/lib/mecab/dic/ipadic to provide /var/lib/mecab/dic/debian (mecab-dictionary) in auto mode\n", "Setting up mysql-server-core-8.0 (8.0.37-0ubuntu0.20.04.3) ...\n", "Setting up mecab-ipadic-utf8 (2.7.0-20070801+main-2.1) ...\n", "Compiling IPA dictionary for Mecab. This takes long time...\n", "reading /usr/share/mecab/dic/ipadic/unk.def ... 40\n", "emitting double-array: 100% |###########################################| \n", "/usr/share/mecab/dic/ipadic/model.def is not found. skipped.\n", "reading /usr/share/mecab/dic/ipadic/Interjection.csv ... 252\n", "reading /usr/share/mecab/dic/ipadic/Postp-col.csv ... 91\n", "reading /usr/share/mecab/dic/ipadic/Noun.number.csv ... 42\n", "reading /usr/share/mecab/dic/ipadic/Noun.others.csv ... 151\n", "reading /usr/share/mecab/dic/ipadic/Noun.demonst.csv ... 120\n", "reading /usr/share/mecab/dic/ipadic/Suffix.csv ... 1393\n", "reading /usr/share/mecab/dic/ipadic/Postp.csv ... 146\n", "reading /usr/share/mecab/dic/ipadic/Adverb.csv ... 3032\n", "reading /usr/share/mecab/dic/ipadic/Prefix.csv ... 221\n", "reading /usr/share/mecab/dic/ipadic/Conjunction.csv ... 171\n", "reading /usr/share/mecab/dic/ipadic/Others.csv ... 2\n", "reading /usr/share/mecab/dic/ipadic/Noun.adverbal.csv ... 795\n", "reading /usr/share/mecab/dic/ipadic/Noun.proper.csv ... 27328\n", "reading /usr/share/mecab/dic/ipadic/Noun.nai.csv ... 42\n", "reading /usr/share/mecab/dic/ipadic/Verb.csv ... 130750\n", "reading /usr/share/mecab/dic/ipadic/Noun.adjv.csv ... 3328\n", "reading /usr/share/mecab/dic/ipadic/Noun.verbal.csv ... 12146\n", "reading /usr/share/mecab/dic/ipadic/Symbol.csv ... 208\n", "reading /usr/share/mecab/dic/ipadic/Auxil.csv ... 199\n", "reading /usr/share/mecab/dic/ipadic/Noun.name.csv ... 34202\n", "reading /usr/share/mecab/dic/ipadic/Adnominal.csv ... 135\n", "reading /usr/share/mecab/dic/ipadic/Noun.place.csv ... 72999\n", "reading /usr/share/mecab/dic/ipadic/Adj.csv ... 27210\n", "reading /usr/share/mecab/dic/ipadic/Filler.csv ... 19\n", "reading /usr/share/mecab/dic/ipadic/Noun.org.csv ... 16668\n", "reading /usr/share/mecab/dic/ipadic/Noun.csv ... 60477\n", "emitting double-array: 100% |###########################################| \n", "reading /usr/share/mecab/dic/ipadic/matrix.def ... 1316x1316\n", "emitting matrix : 100% |###########################################| \n", "\n", "done!\n", "update-alternatives: using /var/lib/mecab/dic/ipadic-utf8 to provide /var/lib/mecab/dic/debian (mecab-dictionary) in auto mode\n", "Setting up libhtml-parser-perl (3.72-5) ...\n", "Setting up libhttp-message-perl (6.22-1) ...\n", "Setting up mysql-server-8.0 (8.0.37-0ubuntu0.20.04.3) ...\n", "debconf: unable to initialize frontend: Dialog\n", "debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 76.)\n", "debconf: falling back to frontend: Readline\n", "invoke-rc.d: policy-rc.d denied execution of stop.\n", "update-alternatives: using /etc/mysql/mysql.cnf to provide /etc/mysql/my.cnf (my.cnf) in auto mode\n", "Renaming removed key_buffer and myisam-recover options (if present)\n", "mysqld will log errors to /var/log/mysql/error.log\n", "mysqld is running as pid 6709\n", "Created symlink /etc/systemd/system/multi-user.target.wants/mysql.service → /lib/systemd/system/mysql.service.\n", "invoke-rc.d: policy-rc.d denied execution of start.\n", "Setting up libcgi-pm-perl (4.46-1) ...\n", "Setting up libhtml-template-perl (2.97-1) ...\n", "Setting up mysql-server (8.0.37-0ubuntu0.20.04.3) ...\n", "Setting up libcgi-fast-perl (1:2.15-1) ...\n", "Processing triggers for systemd (245.4-4ubuntu3.23) ...\n", "Processing triggers for man-db (2.9.1-1) ...\n", "Processing triggers for libc-bin (2.31-0ubuntu9.16) ...\n" ] } ], "source": [ "%sudo apt-get -y install mysql-server" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Password:\n", "sudo: a password is required\n" ] } ], "source": [ "!sudo service mysql start" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "!sudo mysql -e \"ALTER USER 'root'@'localhost' IDENTIFIED WITH 'mysql_native_password' BY 'root';FLUSH PRIVILEGES;\"" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "import mysql.connector\n", "\n", "# Create a connection to the MySQL server\n", "conn = mysql.connector.connect(user='root', host='localhost')\n", "\n", "# Create a cursor to interact with the MySQL server\n", "cursor = conn.cursor()" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(23,)\n" ] } ], "source": [ "import mysql.connector\n", "\n", "# Create a connection to the MySQL server\n", "conn = mysql.connector.connect(user='admin', password = 'avnishk96' ,host='demo-db.cdm44iseol25.us-east-1.rds.amazonaws.com', database='classicmodels')\n", "\n", "# Create a cursor to interact with the MySQL server\n", "cursor = conn.cursor()\n", "\n", "# Execute the SELECT query\n", "cursor.execute(\"SELECT COUNT(*) AS numberOfEmployees FROM employees;\")\n", "\n", "# Fetch all the results\n", "records = cursor.fetchall()\n", "\n", "# Print the records\n", "for record in records:\n", " print(record)\n", "\n", "# Close the cursor and connection\n", "cursor.close()\n", "conn.close()\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Code for LangChain Model Starts here" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mysql\n", "['customers', 'employees', 'offices', 'orderdetails', 'orders', 'payments', 'productlines', 'products']\n", "\n", "CREATE TABLE customers (\n", "\t`customerNumber` INTEGER NOT NULL, \n", "\t`customerName` VARCHAR(50) NOT NULL, \n", "\t`contactLastName` VARCHAR(50) NOT NULL, \n", "\t`contactFirstName` VARCHAR(50) NOT NULL, \n", "\tphone VARCHAR(50) NOT NULL, \n", "\t`addressLine1` VARCHAR(50) NOT NULL, \n", "\t`addressLine2` VARCHAR(50), \n", "\tcity VARCHAR(50) NOT NULL, \n", "\tstate VARCHAR(50), \n", "\t`postalCode` VARCHAR(15), \n", "\tcountry VARCHAR(50) NOT NULL, \n", "\t`salesRepEmployeeNumber` INTEGER, \n", "\t`creditLimit` DECIMAL(10, 2), \n", "\tPRIMARY KEY (`customerNumber`), \n", "\tCONSTRAINT customers_ibfk_1 FOREIGN KEY(`salesRepEmployeeNumber`) REFERENCES employees (`employeeNumber`)\n", ")DEFAULT CHARSET=utf8mb4 ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci\n", "\n", "/*\n", "3 rows from customers table:\n", "customerNumber\tcustomerName\tcontactLastName\tcontactFirstName\tphone\taddressLine1\taddressLine2\tcity\tstate\tpostalCode\tcountry\tsalesRepEmployeeNumber\tcreditLimit\n", "103\tAtelier graphique\tSchmitt\tCarine \t40.32.2555\t54, rue Royale\tNone\tNantes\tNone\t44000\tFrance\t1370\t21000.00\n", "112\tSignal Gift Stores\tKing\tJean\t7025551838\t8489 Strong St.\tNone\tLas Vegas\tNV\t83030\tUSA\t1166\t71800.00\n", "114\tAustralian Collectors, Co.\tFerguson\tPeter\t03 9520 4555\t636 St Kilda Road\tLevel 3\tMelbourne\tVictoria\t3004\tAustralia\t1611\t117300.00\n", "*/\n", "\n", "\n", "CREATE TABLE employees (\n", "\t`employeeNumber` INTEGER NOT NULL, \n", "\t`lastName` VARCHAR(50) NOT NULL, \n", "\t`firstName` VARCHAR(50) NOT NULL, \n", "\textension VARCHAR(10) NOT NULL, \n", "\temail VARCHAR(100) NOT NULL, \n", "\t`officeCode` VARCHAR(10) NOT NULL, \n", "\t`reportsTo` INTEGER, \n", "\t`jobTitle` VARCHAR(50) NOT NULL, \n", "\tPRIMARY KEY (`employeeNumber`), \n", "\tCONSTRAINT employees_ibfk_1 FOREIGN KEY(`reportsTo`) REFERENCES employees (`employeeNumber`), \n", "\tCONSTRAINT employees_ibfk_2 FOREIGN KEY(`officeCode`) REFERENCES offices (`officeCode`)\n", ")DEFAULT CHARSET=utf8mb4 ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci\n", "\n", "/*\n", "3 rows from employees table:\n", "employeeNumber\tlastName\tfirstName\textension\temail\tofficeCode\treportsTo\tjobTitle\n", "1002\tMurphy\tDiane\tx5800\tdmurphy@classicmodelcars.com\t1\tNone\tPresident\n", "1056\tPatterson\tMary\tx4611\tmpatterso@classicmodelcars.com\t1\t1002\tVP Sales\n", "1076\tFirrelli\tJeff\tx9273\tjfirrelli@classicmodelcars.com\t1\t1002\tVP Marketing\n", "*/\n", "\n", "\n", "CREATE TABLE offices (\n", "\t`officeCode` VARCHAR(10) NOT NULL, \n", "\tcity VARCHAR(50) NOT NULL, \n", "\tphone VARCHAR(50) NOT NULL, \n", "\t`addressLine1` VARCHAR(50) NOT NULL, \n", "\t`addressLine2` VARCHAR(50), \n", "\tstate VARCHAR(50), \n", "\tcountry VARCHAR(50) NOT NULL, \n", "\t`postalCode` VARCHAR(15) NOT NULL, \n", "\tterritory VARCHAR(10) NOT NULL, \n", "\tPRIMARY KEY (`officeCode`)\n", ")DEFAULT CHARSET=utf8mb4 ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci\n", "\n", "/*\n", "3 rows from offices table:\n", "officeCode\tcity\tphone\taddressLine1\taddressLine2\tstate\tcountry\tpostalCode\tterritory\n", "1\tSan Francisco\t+1 650 219 4782\t100 Market Street\tSuite 300\tCA\tUSA\t94080\tNA\n", "2\tBoston\t+1 215 837 0825\t1550 Court Place\tSuite 102\tMA\tUSA\t02107\tNA\n", "3\tNYC\t+1 212 555 3000\t523 East 53rd Street\tapt. 5A\tNY\tUSA\t10022\tNA\n", "*/\n", "\n", "\n", "CREATE TABLE orderdetails (\n", "\t`orderNumber` INTEGER NOT NULL, \n", "\t`productCode` VARCHAR(15) NOT NULL, \n", "\t`quantityOrdered` INTEGER NOT NULL, \n", "\t`priceEach` DECIMAL(10, 2) NOT NULL, \n", "\t`orderLineNumber` SMALLINT NOT NULL, \n", "\tPRIMARY KEY (`orderNumber`, `productCode`), \n", "\tCONSTRAINT orderdetails_ibfk_1 FOREIGN KEY(`orderNumber`) REFERENCES orders (`orderNumber`), \n", "\tCONSTRAINT orderdetails_ibfk_2 FOREIGN KEY(`productCode`) REFERENCES products (`productCode`)\n", ")DEFAULT CHARSET=utf8mb4 ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci\n", "\n", "/*\n", "3 rows from orderdetails table:\n", "orderNumber\tproductCode\tquantityOrdered\tpriceEach\torderLineNumber\n", "10100\tS18_1749\t30\t136.00\t3\n", "10100\tS18_2248\t50\t55.09\t2\n", "10100\tS18_4409\t22\t75.46\t4\n", "*/\n", "\n", "\n", "CREATE TABLE orders (\n", "\t`orderNumber` INTEGER NOT NULL, \n", "\t`orderDate` DATE NOT NULL, \n", "\t`requiredDate` DATE NOT NULL, \n", "\t`shippedDate` DATE, \n", "\tstatus VARCHAR(15) NOT NULL, \n", "\tcomments TEXT, \n", "\t`customerNumber` INTEGER NOT NULL, \n", "\tPRIMARY KEY (`orderNumber`), \n", "\tCONSTRAINT orders_ibfk_1 FOREIGN KEY(`customerNumber`) REFERENCES customers (`customerNumber`)\n", ")DEFAULT CHARSET=utf8mb4 ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci\n", "\n", "/*\n", "3 rows from orders table:\n", "orderNumber\torderDate\trequiredDate\tshippedDate\tstatus\tcomments\tcustomerNumber\n", "10100\t2003-01-06\t2003-01-13\t2003-01-10\tShipped\tNone\t363\n", "10101\t2003-01-09\t2003-01-18\t2003-01-11\tShipped\tCheck on availability.\t128\n", "10102\t2003-01-10\t2003-01-18\t2003-01-14\tShipped\tNone\t181\n", "*/\n", "\n", "\n", "CREATE TABLE payments (\n", "\t`customerNumber` INTEGER NOT NULL, \n", "\t`checkNumber` VARCHAR(50) NOT NULL, \n", "\t`paymentDate` DATE NOT NULL, \n", "\tamount DECIMAL(10, 2) NOT NULL, \n", "\tPRIMARY KEY (`customerNumber`, `checkNumber`), \n", "\tCONSTRAINT payments_ibfk_1 FOREIGN KEY(`customerNumber`) REFERENCES customers (`customerNumber`)\n", ")DEFAULT CHARSET=utf8mb4 ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci\n", "\n", "/*\n", "3 rows from payments table:\n", "customerNumber\tcheckNumber\tpaymentDate\tamount\n", "103\tHQ336336\t2004-10-19\t6066.78\n", "103\tJM555205\t2003-06-05\t14571.44\n", "103\tOM314933\t2004-12-18\t1676.14\n", "*/\n", "\n", "\n", "CREATE TABLE productlines (\n", "\t`productLine` VARCHAR(50) NOT NULL, \n", "\t`textDescription` VARCHAR(4000), \n", "\t`htmlDescription` MEDIUMTEXT, \n", "\timage MEDIUMBLOB, \n", "\tPRIMARY KEY (`productLine`)\n", ")DEFAULT CHARSET=utf8mb4 ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci\n", "\n", "/*\n", "3 rows from productlines table:\n", "productLine\ttextDescription\thtmlDescription\timage\n", "Classic Cars\tAttention car enthusiasts: Make your wildest car ownership dreams come true. Whether you are looking\tNone\tNone\n", "Motorcycles\tOur motorcycles are state of the art replicas of classic as well as contemporary motorcycle legends \tNone\tNone\n", "Planes\tUnique, diecast airplane and helicopter replicas suitable for collections, as well as home, office o\tNone\tNone\n", "*/\n", "\n", "\n", "CREATE TABLE products (\n", "\t`productCode` VARCHAR(15) NOT NULL, \n", "\t`productName` VARCHAR(70) NOT NULL, \n", "\t`productLine` VARCHAR(50) NOT NULL, \n", "\t`productScale` VARCHAR(10) NOT NULL, \n", "\t`productVendor` VARCHAR(50) NOT NULL, \n", "\t`productDescription` TEXT NOT NULL, \n", "\t`quantityInStock` SMALLINT NOT NULL, \n", "\t`buyPrice` DECIMAL(10, 2) NOT NULL, \n", "\t`MSRP` DECIMAL(10, 2) NOT NULL, \n", "\tPRIMARY KEY (`productCode`), \n", "\tCONSTRAINT products_ibfk_1 FOREIGN KEY(`productLine`) REFERENCES productlines (`productLine`)\n", ")DEFAULT CHARSET=utf8mb4 ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci\n", "\n", "/*\n", "3 rows from products table:\n", "productCode\tproductName\tproductLine\tproductScale\tproductVendor\tproductDescription\tquantityInStock\tbuyPrice\tMSRP\n", "S10_1678\t1969 Harley Davidson Ultimate Chopper\tMotorcycles\t1:10\tMin Lin Diecast\tThis replica features working kickstand, front suspension, gear-shift lever, footbrake lever, drive \t7933\t48.81\t95.70\n", "S10_1949\t1952 Alpine Renault 1300\tClassic Cars\t1:10\tClassic Metal Creations\tTurnable front wheels; steering function; detailed interior; detailed engine; opening hood; opening \t7305\t98.58\t214.30\n", "S10_2016\t1996 Moto Guzzi 1100i\tMotorcycles\t1:10\tHighway 66 Mini Classics\tOfficial Moto Guzzi logos and insignias, saddle bags located on side of motorcycle, detailed engine,\t6625\t68.99\t118.94\n", "*/\n" ] } ], "source": [ "import os\n", "\n", "db_user = \"root\"\n", "db_password = \"\"\n", "db_host = \"localhost\"\n", "db_name = \"classicmodels\"\n", "from langchain_community.utilities.sql_database import SQLDatabase\n", " # db = SQLDatabase.from_uri(f\"mysql+pymysql://{db_user}:{db_password}@{db_host}/{db_name}\",sample_rows_in_table_info=1,include_tables=['customers','orders'],custom_table_info={'customers':\"customer\"})\n", "db = SQLDatabase.from_uri(f\"mysql+pymysql://{db_user}:{db_password}@{db_host}/{db_name}\")\n", "print(db.dialect)\n", "print(db.get_usable_table_names())\n", "print(db.table_info)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.\n", "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:494: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pas_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n", " warnings.warn(\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ce1cd59968d4420fa2ef4235f7a7fde0", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/3 [00:00. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n" ] } ], "source": [ "from langchain.chains import create_sql_query_chain\n", "from transformers import AutoModelForCausalLM, AutoTokenizer,pipeline, LlamaTokenizer, LlamaForCausalLM\n", "from langchain_huggingface import HuggingFacePipeline\n", "from langchain_openai import ChatOpenAI\n", "\n", "# model_id = \"mrm8488/t5-base-finetuned-wikiSQL\"\n", "# tokenizer = AutoTokenizer.from_pretrained(model_id)\n", "\n", "# model = AutoModelForCausalLM.from_pretrained(\n", "# model_id,\n", "# load_in_4bit=True,\n", "# #attn_implementation=\"flash_attention_2\", # if you have an ampere GPU\n", "# )\n", "\n", "base_model = LlamaForCausalLM.from_pretrained(\n", " \"chavinlo/alpaca-native\",\n", " load_in_8bit=True,\n", " device_map='auto',\n", ")\n", "\n", "tokenizer = LlamaTokenizer.from_pretrained(\"chavinlo/alpaca-native\")\n", "\n", "pipe = pipeline(\"text-generation\", model=base_model, tokenizer=tokenizer, max_new_tokens=500, top_p=0.95, temperature=0.3, repetition_penalty=1.2)\n", "llm = HuggingFacePipeline(pipeline=pipe)\n", "# llm.invoke(\"what is price of `1968 Ford Mustang\")\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "from langchain import PromptTemplate, LLMChain\n", "template = \"\"\"\n", "Write a SQL Query given the table name {Table} and columns as a list {Columns} for the given question : \n", "{question}.\n", "\"\"\"\n", "\n", "prompt = PromptTemplate(template=template, input_variables=[\"Table\",\"question\",\"Columns\"])\n", "\n", "def get_llm_response(tble,question,cols):\n", " llm_chain = LLMChain(prompt=prompt, \n", " llm=llm\n", " )\n", " response= llm_chain.run({\"Table\" : tble,\"question\" :question, \"Columns\" : cols})\n", " print(response)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/langchain_core/_api/deprecation.py:139: LangChainDeprecationWarning: The class `LLMChain` was deprecated in LangChain 0.1.17 and will be removed in 0.3.0. Use RunnableSequence, e.g., `prompt | llm` instead.\n", " warn_deprecated(\n", "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/langchain_core/_api/deprecation.py:139: LangChainDeprecationWarning: The method `Chain.run` was deprecated in langchain 0.1.0 and will be removed in 0.3.0. Use invoke instead.\n", " warn_deprecated(\n", "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:515: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.3` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.\n", " warnings.warn(\n", "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:520: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.95` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.\n", " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Write a SQL Query given the table name employee and columns as a list ['id', 'name', 'date_of_birth', 'band', 'manager_id'] for the given question : \n", "Query the count of employees in band L6 with 239045 as the manager ID.\n", "Answer: SELECT COUNT(*) FROM employee WHERE band = ‘L6’ AND manager_ID = 239045;\n" ] } ], "source": [ "tble = \"employee\"\n", "cols = [\"id\",\"name\",\"date_of_birth\",\"band\",\"manager_id\"]\n", "question = \"Query the count of employees in band L6 with 239045 as the manager ID\"\n", "get_llm_response(tble,question,cols)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SELECT `buyPrice`, `MSRP`\n", "FROM products\n", "WHERE `productName` = '1968 Ford Mustang'\n", "LIMIT 1;\n" ] } ], "source": [ "from langchain.chains import create_sql_query_chain\n", "from transformers import AutoModelForCausalLM, AutoTokenizer,pipeline, LlamaTokenizer, LlamaForCausalLM\n", "from langchain_huggingface import HuggingFacePipeline\n", "from langchain_openai import ChatOpenAI\n", "from langchain_openai import ChatOpenAI\n", "import os\n", "\n", "os.environ[\"OPENAI_API_KEY\"] = \"OPENAI_API_KEY\"\n", "\n", "llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n", "generate_query = create_sql_query_chain(llm, db)\n", "query = generate_query.invoke({\"question\": \"what is price of `1968 Ford Mustang`\"})\n", "# \"what is price of `1968 Ford Mustang`\"\n", "print(query)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"[(Decimal('95.34'), Decimal('194.57'))]\"" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool\n", "execute_query = QuerySQLDataBaseTool(db=db)\n", "execute_query.invoke(query)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'[(326,)]'" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chain = generate_query | execute_query\n", "chain.invoke({\"question\": \"How many orders are there\"})" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "You are a MySQL expert. Given an input question, first create a syntactically correct MySQL query to run, then look at the results of the query and return the answer to the input question.\n", "Unless the user specifies in the question a specific number of examples to obtain, query for at most 5 results using the LIMIT clause as per MySQL. You can order the results to return the most informative data in the database.\n", "Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in backticks (`) to denote them as delimited identifiers.\n", "Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.\n", "Pay attention to use CURDATE() function to get the current date, if the question involves \"today\".\n", "\n", "Use the following format:\n", "\n", "Question: Question here\n", "SQLQuery: SQL Query to run\n", "SQLResult: Result of the SQLQuery\n", "Answer: Final answer here\n", "\n", "Only use the following tables:\n", "\u001b[33;1m\u001b[1;3m{table_info}\u001b[0m\n", "\n", "Question: \u001b[33;1m\u001b[1;3m{input}\u001b[0m\n" ] } ], "source": [ "chain.get_prompts()[0].pretty_print()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'There are a total of 326 orders.'" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from operator import itemgetter\n", "\n", "from langchain_core.output_parsers import StrOutputParser\n", "from langchain_core.prompts import PromptTemplate\n", "from langchain_core.runnables import RunnablePassthrough\n", "\n", "answer_prompt = PromptTemplate.from_template(\n", " \"\"\"Given the following user question, corresponding SQL query, and SQL result, answer the user question.\n", "\n", "Question: {question}\n", "SQL Query: {query}\n", "SQL Result: {result}\n", "Answer: \"\"\"\n", ")\n", "\n", "rephrase_answer = answer_prompt | llm | StrOutputParser()\n", "\n", "chain = (\n", " RunnablePassthrough.assign(query=generate_query).assign( result=itemgetter(\"query\") | execute_query ) | rephrase_answer\n", ")\n", "\n", "chain.invoke({\"question\": \"How many orders are there\"})" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "examples = [\n", " {\n", " \"input\": \"List all customers in France with a credit limit over 20,000.\",\n", " \"query\": \"SELECT * FROM customers WHERE country = 'France' AND creditLimit > 20000;\"\n", " },\n", " {\n", " \"input\": \"Get the highest payment amount made by any customer.\",\n", " \"query\": \"SELECT MAX(amount) FROM payments;\"\n", " },\n", " {\n", " \"input\": \"Show product details for products in the 'Motorcycles' product line.\",\n", " \"query\": \"SELECT * FROM products WHERE productLine = 'Motorcycles';\"\n", " },\n", " {\n", " \"input\": \"Retrieve the names of employees who report to employee number 1002.\",\n", " \"query\": \"SELECT firstName, lastName FROM employees WHERE reportsTo = 1002;\"\n", " },\n", " {\n", " \"input\": \"List all products with a stock quantity less than 7000.\",\n", " \"query\": \"SELECT productName, quantityInStock FROM products WHERE quantityInStock < 7000;\"\n", " },\n", " {\n", " 'input':\"what is price of `1968 Ford Mustang`\",\n", " \"query\": \"SELECT `buyPrice`, `MSRP` FROM products WHERE `productName` = '1968 Ford Mustang' LIMIT 1;\"\n", " }\n", "]" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Human: List all customers in France with a credit limit over 20,000.\n", "SQLQuery:\n", "AI: SELECT * FROM customers WHERE country = 'France' AND creditLimit > 20000;\n", "Human: Get the highest payment amount made by any customer.\n", "SQLQuery:\n", "AI: SELECT MAX(amount) FROM payments;\n", "Human: Show product details for products in the 'Motorcycles' product line.\n", "SQLQuery:\n", "AI: SELECT * FROM products WHERE productLine = 'Motorcycles';\n", "Human: Retrieve the names of employees who report to employee number 1002.\n", "SQLQuery:\n", "AI: SELECT firstName, lastName FROM employees WHERE reportsTo = 1002;\n", "Human: List all products with a stock quantity less than 7000.\n", "SQLQuery:\n", "AI: SELECT productName, quantityInStock FROM products WHERE quantityInStock < 7000;\n", "Human: what is price of `1968 Ford Mustang`\n", "SQLQuery:\n", "AI: SELECT `buyPrice`, `MSRP` FROM products WHERE `productName` = '1968 Ford Mustang' LIMIT 1;\n" ] } ], "source": [ "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder,FewShotChatMessagePromptTemplate,PromptTemplate\n", "import json\n", "\n", "with open('/teamspace/studios/this_studio/few_shot_samples.json', 'r') as file:\n", " data = json.load(file)\n", "\n", "# Store the examples array in a variable\n", "examples = data[\"examples\"]\n", "\n", "# Print the examples to verify\n", "\n", "example_prompt = ChatPromptTemplate.from_messages(\n", " [\n", " (\"human\", \"{input}\\nSQLQuery:\"),\n", " (\"ai\", \"{query}\"),\n", " ]\n", ")\n", "few_shot_prompt = FewShotChatMessagePromptTemplate(\n", " example_prompt=example_prompt,\n", " examples=examples,\n", " # input_variables=[\"input\",\"top_k\"],\n", " input_variables=[\"input\"],\n", ")\n", "print(few_shot_prompt.format(input1=\"How many products are there?\"))" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'input': 'Retrieve the names of employees who report to employee number 1002.',\n", " 'query': 'SELECT firstName, lastName FROM employees WHERE reportsTo = 1002;'},\n", " {'input': 'List all customers in France with a credit limit over 20,000.',\n", " 'query': \"SELECT * FROM customers WHERE country = 'France' AND creditLimit > 20000;\"}]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from langchain_community.vectorstores import Chroma\n", "from langchain_core.example_selectors import SemanticSimilarityExampleSelector\n", "from langchain_openai import OpenAIEmbeddings\n", "\n", "vectorstore = Chroma()\n", "vectorstore.delete_collection()\n", "example_selector = SemanticSimilarityExampleSelector.from_examples(\n", " examples,\n", " OpenAIEmbeddings(),\n", " vectorstore,\n", " k=2,\n", " input_keys=[\"input\"],\n", ")\n", "example_selector.select_examples({\"input\": \"how many employees we have?\"})\n", "# example_selector.select_examples({\"input\": \"How many employees?\"})" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Human: List all products with a stock quantity less than 7000.\n", "SQLQuery:\n", "AI: SELECT productName, quantityInStock FROM products WHERE quantityInStock < 7000;\n", "Human: Show product details for products in the 'Motorcycles' product line.\n", "SQLQuery:\n", "AI: SELECT * FROM products WHERE productLine = 'Motorcycles';\n" ] } ], "source": [ "few_shot_prompt = FewShotChatMessagePromptTemplate(\n", " example_prompt=example_prompt,\n", " example_selector=example_selector,\n", " input_variables=[\"input\",\"top_k\"],\n", ")\n", "print(few_shot_prompt.format(input=\"How many products are there?\"))" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "System: You are a MySQL expert. Given an input question, create a syntactically correct MySQL query to run. Unless otherwise specificed.\n", "\n", "Here is the relevant table info: \n", "CREATE TABLE customers (\n", "\t`customerNumber` INTEGER NOT NULL, \n", "\t`customerName` VARCHAR(50) NOT NULL, \n", "\t`contactLastName` VARCHAR(50) NOT NULL, \n", "\t`contactFirstName` VARCHAR(50) NOT NULL, \n", "\tphone VARCHAR(50) NOT NULL, \n", "\t`addressLine1` VARCHAR(50) NOT NULL, \n", "\t`addressLine2` VARCHAR(50), \n", "\tcity VARCHAR(50) NOT NULL, \n", "\tstate VARCHAR(50), \n", "\t`postalCode` VARCHAR(15), \n", "\tcountry VARCHAR(50) NOT NULL, \n", "\t`salesRepEmployeeNumber` INTEGER, \n", "\t`creditLimit` DECIMAL(10, 2), \n", "\tPRIMARY KEY (`customerNumber`), \n", "\tCONSTRAINT customers_ibfk_1 FOREIGN KEY(`salesRepEmployeeNumber`) REFERENCES employees (`employeeNumber`)\n", ")ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci DEFAULT CHARSET=utf8mb4\n", "\n", "/*\n", "3 rows from customers table:\n", "customerNumber\tcustomerName\tcontactLastName\tcontactFirstName\tphone\taddressLine1\taddressLine2\tcity\tstate\tpostalCode\tcountry\tsalesRepEmployeeNumber\tcreditLimit\n", "103\tAtelier graphique\tSchmitt\tCarine \t40.32.2555\t54, rue Royale\tNone\tNantes\tNone\t44000\tFrance\t1370\t21000.00\n", "112\tSignal Gift Stores\tKing\tJean\t7025551838\t8489 Strong St.\tNone\tLas Vegas\tNV\t83030\tUSA\t1166\t71800.00\n", "114\tAustralian Collectors, Co.\tFerguson\tPeter\t03 9520 4555\t636 St Kilda Road\tLevel 3\tMelbourne\tVictoria\t3004\tAustralia\t1611\t117300.00\n", "*/\n", "\n", "\n", "CREATE TABLE employees (\n", "\t`employeeNumber` INTEGER NOT NULL, \n", "\t`lastName` VARCHAR(50) NOT NULL, \n", "\t`firstName` VARCHAR(50) NOT NULL, \n", "\textension VARCHAR(10) NOT NULL, \n", "\temail VARCHAR(100) NOT NULL, \n", "\t`officeCode` VARCHAR(10) NOT NULL, \n", "\t`reportsTo` INTEGER, \n", "\t`jobTitle` VARCHAR(50) NOT NULL, \n", "\tPRIMARY KEY (`employeeNumber`), \n", "\tCONSTRAINT employees_ibfk_1 FOREIGN KEY(`reportsTo`) REFERENCES employees (`employeeNumber`), \n", "\tCONSTRAINT employees_ibfk_2 FOREIGN KEY(`officeCode`) REFERENCES offices (`officeCode`)\n", ")ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci DEFAULT CHARSET=utf8mb4\n", "\n", "/*\n", "3 rows from employees table:\n", "employeeNumber\tlastName\tfirstName\textension\temail\tofficeCode\treportsTo\tjobTitle\n", "1002\tMurphy\tDiane\tx5800\tdmurphy@classicmodelcars.com\t1\tNone\tPresident\n", "1056\tPatterson\tMary\tx4611\tmpatterso@classicmodelcars.com\t1\t1002\tVP Sales\n", "1076\tFirrelli\tJeff\tx9273\tjfirrelli@classicmodelcars.com\t1\t1002\tVP Marketing\n", "*/\n", "\n", "\n", "CREATE TABLE offices (\n", "\t`officeCode` VARCHAR(10) NOT NULL, \n", "\tcity VARCHAR(50) NOT NULL, \n", "\tphone VARCHAR(50) NOT NULL, \n", "\t`addressLine1` VARCHAR(50) NOT NULL, \n", "\t`addressLine2` VARCHAR(50), \n", "\tstate VARCHAR(50), \n", "\tcountry VARCHAR(50) NOT NULL, \n", "\t`postalCode` VARCHAR(15) NOT NULL, \n", "\tterritory VARCHAR(10) NOT NULL, \n", "\tPRIMARY KEY (`officeCode`)\n", ")ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci DEFAULT CHARSET=utf8mb4\n", "\n", "/*\n", "3 rows from offices table:\n", "officeCode\tcity\tphone\taddressLine1\taddressLine2\tstate\tcountry\tpostalCode\tterritory\n", "1\tSan Francisco\t+1 650 219 4782\t100 Market Street\tSuite 300\tCA\tUSA\t94080\tNA\n", "2\tBoston\t+1 215 837 0825\t1550 Court Place\tSuite 102\tMA\tUSA\t02107\tNA\n", "3\tNYC\t+1 212 555 3000\t523 East 53rd Street\tapt. 5A\tNY\tUSA\t10022\tNA\n", "*/\n", "\n", "\n", "CREATE TABLE orderdetails (\n", "\t`orderNumber` INTEGER NOT NULL, \n", "\t`productCode` VARCHAR(15) NOT NULL, \n", "\t`quantityOrdered` INTEGER NOT NULL, \n", "\t`priceEach` DECIMAL(10, 2) NOT NULL, \n", "\t`orderLineNumber` SMALLINT NOT NULL, \n", "\tPRIMARY KEY (`orderNumber`, `productCode`), \n", "\tCONSTRAINT orderdetails_ibfk_1 FOREIGN KEY(`orderNumber`) REFERENCES orders (`orderNumber`), \n", "\tCONSTRAINT orderdetails_ibfk_2 FOREIGN KEY(`productCode`) REFERENCES products (`productCode`)\n", ")ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci DEFAULT CHARSET=utf8mb4\n", "\n", "/*\n", "3 rows from orderdetails table:\n", "orderNumber\tproductCode\tquantityOrdered\tpriceEach\torderLineNumber\n", "10100\tS18_1749\t30\t136.00\t3\n", "10100\tS18_2248\t50\t55.09\t2\n", "10100\tS18_4409\t22\t75.46\t4\n", "*/\n", "\n", "\n", "CREATE TABLE orders (\n", "\t`orderNumber` INTEGER NOT NULL, \n", "\t`orderDate` DATE NOT NULL, \n", "\t`requiredDate` DATE NOT NULL, \n", "\t`shippedDate` DATE, \n", "\tstatus VARCHAR(15) NOT NULL, \n", "\tcomments TEXT, \n", "\t`customerNumber` INTEGER NOT NULL, \n", "\tPRIMARY KEY (`orderNumber`), \n", "\tCONSTRAINT orders_ibfk_1 FOREIGN KEY(`customerNumber`) REFERENCES customers (`customerNumber`)\n", ")ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci DEFAULT CHARSET=utf8mb4\n", "\n", "/*\n", "3 rows from orders table:\n", "orderNumber\torderDate\trequiredDate\tshippedDate\tstatus\tcomments\tcustomerNumber\n", "10100\t2003-01-06\t2003-01-13\t2003-01-10\tShipped\tNone\t363\n", "10101\t2003-01-09\t2003-01-18\t2003-01-11\tShipped\tCheck on availability.\t128\n", "10102\t2003-01-10\t2003-01-18\t2003-01-14\tShipped\tNone\t181\n", "*/\n", "\n", "\n", "CREATE TABLE payments (\n", "\t`customerNumber` INTEGER NOT NULL, \n", "\t`checkNumber` VARCHAR(50) NOT NULL, \n", "\t`paymentDate` DATE NOT NULL, \n", "\tamount DECIMAL(10, 2) NOT NULL, \n", "\tPRIMARY KEY (`customerNumber`, `checkNumber`), \n", "\tCONSTRAINT payments_ibfk_1 FOREIGN KEY(`customerNumber`) REFERENCES customers (`customerNumber`)\n", ")ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci DEFAULT CHARSET=utf8mb4\n", "\n", "/*\n", "3 rows from payments table:\n", "customerNumber\tcheckNumber\tpaymentDate\tamount\n", "103\tHQ336336\t2004-10-19\t6066.78\n", "103\tJM555205\t2003-06-05\t14571.44\n", "103\tOM314933\t2004-12-18\t1676.14\n", "*/\n", "\n", "\n", "CREATE TABLE productlines (\n", "\t`productLine` VARCHAR(50) NOT NULL, \n", "\t`textDescription` VARCHAR(4000), \n", "\t`htmlDescription` MEDIUMTEXT, \n", "\timage MEDIUMBLOB, \n", "\tPRIMARY KEY (`productLine`)\n", ")ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci DEFAULT CHARSET=utf8mb4\n", "\n", "/*\n", "3 rows from productlines table:\n", "productLine\ttextDescription\thtmlDescription\timage\n", "Classic Cars\tAttention car enthusiasts: Make your wildest car ownership dreams come true. Whether you are looking\tNone\tNone\n", "Motorcycles\tOur motorcycles are state of the art replicas of classic as well as contemporary motorcycle legends \tNone\tNone\n", "Planes\tUnique, diecast airplane and helicopter replicas suitable for collections, as well as home, office o\tNone\tNone\n", "*/\n", "\n", "\n", "CREATE TABLE products (\n", "\t`productCode` VARCHAR(15) NOT NULL, \n", "\t`productName` VARCHAR(70) NOT NULL, \n", "\t`productLine` VARCHAR(50) NOT NULL, \n", "\t`productScale` VARCHAR(10) NOT NULL, \n", "\t`productVendor` VARCHAR(50) NOT NULL, \n", "\t`productDescription` TEXT NOT NULL, \n", "\t`quantityInStock` SMALLINT NOT NULL, \n", "\t`buyPrice` DECIMAL(10, 2) NOT NULL, \n", "\t`MSRP` DECIMAL(10, 2) NOT NULL, \n", "\tPRIMARY KEY (`productCode`), \n", "\tCONSTRAINT products_ibfk_1 FOREIGN KEY(`productLine`) REFERENCES productlines (`productLine`)\n", ")ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci DEFAULT CHARSET=utf8mb4\n", "\n", "/*\n", "3 rows from products table:\n", "productCode\tproductName\tproductLine\tproductScale\tproductVendor\tproductDescription\tquantityInStock\tbuyPrice\tMSRP\n", "S10_1678\t1969 Harley Davidson Ultimate Chopper\tMotorcycles\t1:10\tMin Lin Diecast\tThis replica features working kickstand, front suspension, gear-shift lever, footbrake lever, drive \t7933\t48.81\t95.70\n", "S10_1949\t1952 Alpine Renault 1300\tClassic Cars\t1:10\tClassic Metal Creations\tTurnable front wheels; steering function; detailed interior; detailed engine; opening hood; opening \t7305\t98.58\t214.30\n", "S10_2016\t1996 Moto Guzzi 1100i\tMotorcycles\t1:10\tHighway 66 Mini Classics\tOfficial Moto Guzzi logos and insignias, saddle bags located on side of motorcycle, detailed engine,\t6625\t68.99\t118.94\n", "*/\n", "\n", "Below are a number of examples of questions and their corresponding SQL queries.\n", "Human: List all products with a stock quantity less than 7000.\n", "SQLQuery:\n", "AI: SELECT productName, quantityInStock FROM products WHERE quantityInStock < 7000;\n", "Human: Show product details for products in the 'Motorcycles' product line.\n", "SQLQuery:\n", "AI: SELECT * FROM products WHERE productLine = 'Motorcycles';\n", "Human: How many products are there?\n" ] } ], "source": [ "final_prompt = ChatPromptTemplate.from_messages(\n", " [\n", " (\"system\", \"You are a MySQL expert. Given an input question, create a syntactically correct MySQL query to run. Unless otherwise specificed.\\n\\nHere is the relevant table info: {table_info}\\n\\nBelow are a number of examples of questions and their corresponding SQL queries.\"),\n", " few_shot_prompt,\n", " (\"human\", \"{input}\"),\n", " ]\n", ")\n", "print(final_prompt.format(input=\"How many products are there?\",table_info=db.table_info))" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'The office of the employee with the last name Murphy is located in San Francisco.'" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "generate_query = create_sql_query_chain(llm, db,final_prompt)\n", "chain = (\n", "RunnablePassthrough.assign(query=generate_query).assign(\n", " result=itemgetter(\"query\") | execute_query\n", ")\n", "| rephrase_answer\n", ")\n", "chain.invoke({\"question\": \"in what city is the office of the employee with the last name murphy\"})" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Table Name:productlines\n", "Table Description:Stores information about the different product lines offered by the company, including a unique name, textual description, HTML description, and image. Categorizes products into different lines.\n", "\n", "Table Name:products\n", "Table Description:Contains details of each product sold by the company, including code, name, product line, scale, vendor, description, stock quantity, buy price, and MSRP. Linked to the productlines table.\n", "\n", "Table Name:offices\n", "Table Description:Holds data on the company's sales offices, including office code, city, phone number, address, state, country, postal code, and territory. Each office is uniquely identified by its office code.\n", "\n", "Table Name:employees\n", "Table Description:Stores information about employees, including number, last name, first name, job title, contact info, and office code. Links to offices and maps organizational structure through the reportsTo attribute.\n", "\n", "Table Name:customers\n", "Table Description:Captures data on customers, including customer number, name, contact details, address, assigned sales rep, and credit limit. Central to managing customer relationships and sales processes.\n", "\n", "Table Name:payments\n", "Table Description:Records payments made by customers, tracking the customer number, check number, payment date, and amount. Linked to the customers table for financial tracking and account management.\n", "\n", "Table Name:orders\n", "Table Description:Details each sales order placed by customers, including order number, dates, status, comments, and customer number. Linked to the customers table, tracking sales transactions.\n", "\n", "Table Name:orderdetails\n", "Table Description:Describes individual line items for each sales order, including order number, product code, quantity, price, and order line number. Links orders to products, detailing the items sold.\n", "\n", "\n" ] } ], "source": [ "from operator import itemgetter\n", "from langchain.chains.openai_tools import create_extraction_chain_pydantic\n", "from langchain_core.pydantic_v1 import BaseModel, Field\n", "from typing import List\n", "import pandas as pd\n", "\n", "def get_table_details():\n", " # Read the CSV file into a DataFrame\n", " table_description = pd.read_csv(\"/teamspace/studios/this_studio/database_table_descriptions.csv\")\n", " table_docs = []\n", "\n", " # Iterate over the DataFrame rows to create Document objects\n", " table_details = \"\"\n", " for index, row in table_description.iterrows():\n", " table_details = table_details + \"Table Name:\" + row['Table'] + \"\\n\" + \"Table Description:\" + row['Description'] + \"\\n\\n\"\n", "\n", " return table_details\n", "\n", "\n", "class Table(BaseModel):\n", " \"\"\"Table in SQL database.\"\"\"\n", "\n", " name: str = Field(description=\"Name of table in SQL database.\")\n", "\n", "# table_names = \"\\n\".join(db.get_usable_table_names())\n", "table_details = get_table_details()\n", "print(table_details)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Table(name='employees'),\n", " Table(name='offices'),\n", " Table(name='orders'),\n", " Table(name='orderdetails')]" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "table_details_prompt = f\"\"\"Return the names of ALL the SQL tables that MIGHT be relevant to the user question. \\\n", "The tables are:\n", "\n", "{table_details}\n", "\n", "Remember to include ALL POTENTIALLY RELEVANT tables, even if you're not sure that they're needed.\"\"\"\n", "\n", "table_chain = create_extraction_chain_pydantic(Table, llm, system_message=table_details_prompt)\n", "tables = table_chain.invoke({\"input\": \"give me details of the employees and their office city and the sales made by them\"})\n", "tables" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['customers', 'orders']" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def get_tables(tables: List[Table]) -> List[str]:\n", " tables = [table.name for table in tables]\n", " return tables\n", "\n", "select_table = {\"input\": itemgetter(\"question\")} | create_extraction_chain_pydantic(Table, llm, system_message=table_details_prompt) | get_tables\n", "select_table.invoke({\"question\": \"give me details of customer and their order count\"})" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'The office of the employee with the last name Murphy is located in San Francisco.'" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chain = (\n", "RunnablePassthrough.assign(table_names_to_use=select_table) |\n", "RunnablePassthrough.assign(query=generate_query).assign(\n", " result=itemgetter(\"query\") | execute_query\n", ")\n", "| rephrase_answer\n", ")\n", "chain.invoke({\"question\": \"in what city is the office of the employee with the last name murphy\"})" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "final_prompt = ChatPromptTemplate.from_messages(\n", " [\n", " (\"system\", \"You are a MySQL expert. Given an input question, create a syntactically correct MySQL query to run. Unless otherwise specificed.\\n\\nHere is the relevant table info: {table_info}\\n\\nBelow are a number of examples of questions and their corresponding SQL queries.\"),\n", " few_shot_prompt,\n", " MessagesPlaceholder(variable_name=\"messages\"),\n", " (\"human\", \"{input}\"),\n", " ]\n", ")" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "from langchain.memory import ChatMessageHistory\n", "\n", "history = ChatMessageHistory()\n", "\n", "generate_query = create_sql_query_chain(llm, db,final_prompt)\n", "\n", "chain = (\n", "RunnablePassthrough.assign(table_names_to_use=select_table) |\n", "RunnablePassthrough.assign(query=generate_query).assign(\n", " result=itemgetter(\"query\") | execute_query\n", ")\n", "| rephrase_answer\n", ")\n" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'There are 6 employees in Boston.'" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chain.invoke({\"question\": \"how many employees are there in Boston\", \"messages\":history.messages})" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'The names of the employees in Boston are Diane Murphy, Mary Patterson, Jeff Firrelli, Anthony Bow, Leslie Jennings, and Leslie Thompson.'" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chain.invoke({\"question\": \"what are the names of those employees in boston\", \"messages\":history.messages})" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{'input': 'List all customers in France with a credit limit over 20,000.', 'query': \"SELECT * FROM customers WHERE country = 'France' AND creditLimit > 20000;\"}, {'input': 'Get the highest payment amount made by any customer.', 'query': 'SELECT MAX(amount) FROM payments;'}, {'input': \"Show product details for products in the 'Motorcycles' product line.\", 'query': \"SELECT * FROM products WHERE productLine = 'Motorcycles';\"}, {'input': 'Retrieve the names of employees who report to employee number 1002.', 'query': 'SELECT firstName, lastName FROM employees WHERE reportsTo = 1002;'}, {'input': 'List all products with a stock quantity less than 7000.', 'query': 'SELECT productName, quantityInStock FROM products WHERE quantityInStock < 7000;'}, {'input': 'what is price of `1968 Ford Mustang`', 'query': \"SELECT `buyPrice`, `MSRP` FROM products WHERE `productName` = '1968 Ford Mustang' LIMIT 1;\"}]\n" ] } ], "source": [ "import json\n", "\n", "with open('/teamspace/studios/this_studio/few_shot_samples.json', 'r') as file:\n", " data = json.load(file)\n", "\n", "# Store the examples array in a variable\n", "x = data[\"examples\"]\n", "\n", "# Print the examples to verify\n", "print(x)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'/Users/avnishkanungo/Desktop/Purdue/ECE60146/DLStudio/Examples/NLToSQL'" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import os\n", "os.getcwd()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "DLStudioEnv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 2 }