{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "5BN13TZlSCv4", "outputId": "424a6920-0cea-4e28-dce0-3de6f0a4cc3c" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: langchain in /opt/anaconda3/lib/python3.12/site-packages (0.3.0)\n", "Requirement already satisfied: langchain_community in /opt/anaconda3/lib/python3.12/site-packages (0.2.17)\n", "Requirement already satisfied: langchain_openai in /opt/anaconda3/lib/python3.12/site-packages (0.2.0)\n", "Requirement already satisfied: chromadb in /opt/anaconda3/lib/python3.12/site-packages (0.5.5)\n", "Requirement already satisfied: pypdf in /opt/anaconda3/lib/python3.12/site-packages (5.0.0)\n", "Requirement already satisfied: langsmith in /opt/anaconda3/lib/python3.12/site-packages (0.1.125)\n", "Requirement already satisfied: qdrant-client in /opt/anaconda3/lib/python3.12/site-packages (1.11.1)\n", "Requirement already satisfied: ragas in /opt/anaconda3/lib/python3.12/site-packages (0.1.20)\n", "Requirement already satisfied: pandas in /opt/anaconda3/lib/python3.12/site-packages (2.2.2)\n", "Requirement already satisfied: PyYAML>=5.3 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (6.0.1)\n", "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (2.0.30)\n", "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (3.9.5)\n", "Requirement already satisfied: langchain-core<0.4.0,>=0.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (0.3.2)\n", "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (0.3.0)\n", "Requirement already satisfied: numpy<2.0.0,>=1.26.0 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (1.26.4)\n", "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (2.9.2)\n", "Requirement already satisfied: requests<3,>=2 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (2.32.2)\n", "Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.1.0 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (8.5.0)\n", "Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /opt/anaconda3/lib/python3.12/site-packages (from langchain_community) (0.5.14)\n", "Collecting langchain\n", " Using cached langchain-0.2.16-py3-none-any.whl.metadata (7.1 kB)\n", "INFO: pip is looking at multiple versions of langchain-community to determine which version is compatible with other requirements. This could take a while.\n", "Collecting langchain_community\n", " Using cached langchain_community-0.3.0-py3-none-any.whl.metadata (2.8 kB)\n", "Requirement already satisfied: pydantic-settings<3.0.0,>=2.4.0 in /opt/anaconda3/lib/python3.12/site-packages (from langchain_community) (2.5.2)\n", "Requirement already satisfied: openai<2.0.0,>=1.40.0 in /opt/anaconda3/lib/python3.12/site-packages (from langchain_openai) (1.46.0)\n", "Requirement already satisfied: tiktoken<1,>=0.7 in /opt/anaconda3/lib/python3.12/site-packages (from langchain_openai) (0.7.0)\n", "Requirement already satisfied: build>=1.0.3 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (1.2.1)\n", "Requirement already satisfied: chroma-hnswlib==0.7.6 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (0.7.6)\n", "Requirement already satisfied: fastapi>=0.95.2 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (0.110.3)\n", "Requirement already satisfied: uvicorn>=0.18.3 in /opt/anaconda3/lib/python3.12/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.25.0)\n", "Requirement already satisfied: posthog>=2.4.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (3.5.0)\n", "Requirement already satisfied: typing-extensions>=4.5.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (4.11.0)\n", "Requirement already satisfied: onnxruntime>=1.14.1 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (1.18.1)\n", "Requirement already satisfied: opentelemetry-api>=1.2.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (1.25.0)\n", "Requirement already satisfied: opentelemetry-exporter-otlp-proto-grpc>=1.2.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (1.25.0)\n", "Requirement already satisfied: opentelemetry-instrumentation-fastapi>=0.41b0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (0.46b0)\n", "Requirement already satisfied: opentelemetry-sdk>=1.2.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (1.25.0)\n", "Requirement already satisfied: tokenizers>=0.13.2 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (0.19.1)\n", "Requirement already satisfied: pypika>=0.48.9 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (0.48.9)\n", "Requirement already satisfied: tqdm>=4.65.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (4.66.4)\n", "Requirement already satisfied: overrides>=7.3.1 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (7.4.0)\n", "Requirement already satisfied: importlib-resources in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (6.4.0)\n", "Requirement already satisfied: grpcio>=1.58.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (1.66.1)\n", "Requirement already satisfied: bcrypt>=4.0.1 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (4.2.0)\n", "Requirement already satisfied: typer>=0.9.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (0.12.3)\n", "Requirement already satisfied: kubernetes>=28.1.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (30.1.0)\n", "Requirement already satisfied: mmh3>=4.0.1 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (4.1.0)\n", "Requirement already satisfied: orjson>=3.9.12 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (3.10.6)\n", "Requirement already satisfied: httpx>=0.27.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (0.27.0)\n", "Requirement already satisfied: grpcio-tools>=1.41.0 in /opt/anaconda3/lib/python3.12/site-packages (from qdrant-client) (1.62.3)\n", "Requirement already satisfied: portalocker<3.0.0,>=2.7.0 in /opt/anaconda3/lib/python3.12/site-packages (from qdrant-client) (2.10.1)\n", "Requirement already satisfied: urllib3<3,>=1.26.14 in /opt/anaconda3/lib/python3.12/site-packages (from qdrant-client) (2.2.2)\n", "Requirement already satisfied: datasets in /opt/anaconda3/lib/python3.12/site-packages (from ragas) (3.0.0)\n", "INFO: pip is looking at multiple versions of ragas to determine which version is compatible with other requirements. This could take a while.\n", "Collecting ragas\n", " Downloading ragas-0.1.19-py3-none-any.whl.metadata (5.4 kB)\n", "Requirement already satisfied: pysbd>=0.3.4 in /opt/anaconda3/lib/python3.12/site-packages (from ragas) (0.3.4)\n", "Requirement already satisfied: nest-asyncio in /opt/anaconda3/lib/python3.12/site-packages (from ragas) (1.6.0)\n", "Requirement already satisfied: appdirs in /opt/anaconda3/lib/python3.12/site-packages (from ragas) (1.4.4)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/anaconda3/lib/python3.12/site-packages (from pandas) (2.9.0.post0)\n", "Requirement already satisfied: pytz>=2020.1 in /opt/anaconda3/lib/python3.12/site-packages (from pandas) (2024.1)\n", "Requirement already satisfied: tzdata>=2022.7 in /opt/anaconda3/lib/python3.12/site-packages (from pandas) (2023.3)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.2.0)\n", "Requirement already satisfied: attrs>=17.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.1.0)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.4.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.4)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.9.3)\n", "Requirement already satisfied: packaging>=19.1 in /opt/anaconda3/lib/python3.12/site-packages (from build>=1.0.3->chromadb) (23.2)\n", "Requirement already satisfied: pyproject_hooks in /opt/anaconda3/lib/python3.12/site-packages (from build>=1.0.3->chromadb) (1.1.0)\n", "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /opt/anaconda3/lib/python3.12/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain_community) (3.21.3)\n", "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /opt/anaconda3/lib/python3.12/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain_community) (0.9.0)\n", "Requirement already satisfied: starlette<0.38.0,>=0.37.2 in /opt/anaconda3/lib/python3.12/site-packages (from fastapi>=0.95.2->chromadb) (0.37.2)\n", "Requirement already satisfied: protobuf<5.0dev,>=4.21.6 in /opt/anaconda3/lib/python3.12/site-packages (from grpcio-tools>=1.41.0->qdrant-client) (4.25.4)\n", "Requirement already satisfied: setuptools in /opt/anaconda3/lib/python3.12/site-packages (from grpcio-tools>=1.41.0->qdrant-client) (69.5.1)\n", "Requirement already satisfied: anyio in /opt/anaconda3/lib/python3.12/site-packages (from httpx>=0.27.0->chromadb) (3.7.1)\n", "Requirement already satisfied: certifi in /opt/anaconda3/lib/python3.12/site-packages (from httpx>=0.27.0->chromadb) (2024.6.2)\n", "Requirement already satisfied: httpcore==1.* in /opt/anaconda3/lib/python3.12/site-packages (from httpx>=0.27.0->chromadb) (1.0.5)\n", "Requirement already satisfied: idna in /opt/anaconda3/lib/python3.12/site-packages (from httpx>=0.27.0->chromadb) (3.7)\n", "Requirement already satisfied: sniffio in /opt/anaconda3/lib/python3.12/site-packages (from httpx>=0.27.0->chromadb) (1.3.0)\n", "Requirement already satisfied: h11<0.15,>=0.13 in /opt/anaconda3/lib/python3.12/site-packages (from httpcore==1.*->httpx>=0.27.0->chromadb) (0.14.0)\n", "Requirement already satisfied: h2<5,>=3 in /opt/anaconda3/lib/python3.12/site-packages (from httpx[http2]>=0.20.0->qdrant-client) (4.1.0)\n", "Requirement already satisfied: six>=1.9.0 in /opt/anaconda3/lib/python3.12/site-packages (from kubernetes>=28.1.0->chromadb) (1.16.0)\n", "Requirement already satisfied: google-auth>=1.0.1 in /opt/anaconda3/lib/python3.12/site-packages (from kubernetes>=28.1.0->chromadb) (2.33.0)\n", "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /opt/anaconda3/lib/python3.12/site-packages (from kubernetes>=28.1.0->chromadb) (1.8.0)\n", "Requirement already satisfied: requests-oauthlib in /opt/anaconda3/lib/python3.12/site-packages (from kubernetes>=28.1.0->chromadb) (2.0.0)\n", "Requirement already satisfied: oauthlib>=3.2.2 in /opt/anaconda3/lib/python3.12/site-packages (from kubernetes>=28.1.0->chromadb) (3.2.2)\n", "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /opt/anaconda3/lib/python3.12/site-packages (from langchain-core<0.4.0,>=0.3.0->langchain) (1.33)\n", "Requirement already satisfied: coloredlogs in /opt/anaconda3/lib/python3.12/site-packages (from onnxruntime>=1.14.1->chromadb) (15.0.1)\n", "Requirement already satisfied: flatbuffers in /opt/anaconda3/lib/python3.12/site-packages (from onnxruntime>=1.14.1->chromadb) (24.3.25)\n", "Requirement already satisfied: sympy in /opt/anaconda3/lib/python3.12/site-packages (from onnxruntime>=1.14.1->chromadb) (1.12)\n", "Requirement already satisfied: distro<2,>=1.7.0 in /opt/anaconda3/lib/python3.12/site-packages (from openai<2.0.0,>=1.40.0->langchain_openai) (1.9.0)\n", "Requirement already satisfied: jiter<1,>=0.4.0 in /opt/anaconda3/lib/python3.12/site-packages (from openai<2.0.0,>=1.40.0->langchain_openai) (0.5.0)\n", "Requirement already satisfied: deprecated>=1.2.6 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-api>=1.2.0->chromadb) (1.2.14)\n", "Requirement already satisfied: importlib-metadata<=7.1,>=6.0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-api>=1.2.0->chromadb) (6.11.0)\n", "Requirement already satisfied: googleapis-common-protos~=1.52 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb) (1.63.2)\n", "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.25.0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb) (1.25.0)\n", "Requirement already satisfied: opentelemetry-proto==1.25.0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb) (1.25.0)\n", "Requirement already satisfied: opentelemetry-instrumentation-asgi==0.46b0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.46b0)\n", "Requirement already satisfied: opentelemetry-instrumentation==0.46b0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.46b0)\n", "Requirement already satisfied: opentelemetry-semantic-conventions==0.46b0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.46b0)\n", "Requirement already satisfied: opentelemetry-util-http==0.46b0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.46b0)\n", "Requirement already satisfied: wrapt<2.0.0,>=1.0.0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-instrumentation==0.46b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (1.14.1)\n", "Requirement already satisfied: asgiref~=3.0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-instrumentation-asgi==0.46b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (3.8.1)\n", "Requirement already satisfied: monotonic>=1.5 in /opt/anaconda3/lib/python3.12/site-packages (from posthog>=2.4.0->chromadb) (1.6)\n", "Requirement already satisfied: backoff>=1.10.0 in /opt/anaconda3/lib/python3.12/site-packages (from posthog>=2.4.0->chromadb) (2.2.1)\n", "Requirement already satisfied: annotated-types>=0.6.0 in /opt/anaconda3/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (0.6.0)\n", "Requirement already satisfied: pydantic-core==2.23.4 in /opt/anaconda3/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (2.23.4)\n", "Requirement already satisfied: python-dotenv>=0.21.0 in /opt/anaconda3/lib/python3.12/site-packages (from pydantic-settings<3.0.0,>=2.4.0->langchain_community) (1.0.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/anaconda3/lib/python3.12/site-packages (from requests<3,>=2->langchain) (3.3.2)\n", "Requirement already satisfied: regex>=2022.1.18 in /opt/anaconda3/lib/python3.12/site-packages (from tiktoken<1,>=0.7->langchain_openai) (2023.10.3)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /opt/anaconda3/lib/python3.12/site-packages (from tokenizers>=0.13.2->chromadb) (0.23.4)\n", "Requirement already satisfied: click>=8.0.0 in /opt/anaconda3/lib/python3.12/site-packages (from typer>=0.9.0->chromadb) (8.1.7)\n", "Requirement already satisfied: shellingham>=1.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from typer>=0.9.0->chromadb) (1.5.4)\n", "Requirement already satisfied: rich>=10.11.0 in /opt/anaconda3/lib/python3.12/site-packages (from typer>=0.9.0->chromadb) (13.3.5)\n", "Requirement already satisfied: httptools>=0.5.0 in /opt/anaconda3/lib/python3.12/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.6.1)\n", "Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /opt/anaconda3/lib/python3.12/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.19.0)\n", "Requirement already satisfied: watchfiles>=0.13 in /opt/anaconda3/lib/python3.12/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.20.0)\n", "Requirement already satisfied: websockets>=10.4 in /opt/anaconda3/lib/python3.12/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (12.0)\n", "Requirement already satisfied: filelock in /opt/anaconda3/lib/python3.12/site-packages (from datasets->ragas) (3.13.1)\n", "Requirement already satisfied: pyarrow>=15.0.0 in /opt/anaconda3/lib/python3.12/site-packages (from datasets->ragas) (17.0.0)\n", "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from datasets->ragas) (0.3.8)\n", "Requirement already satisfied: xxhash in /opt/anaconda3/lib/python3.12/site-packages (from datasets->ragas) (3.5.0)\n", "Requirement already satisfied: multiprocess in /opt/anaconda3/lib/python3.12/site-packages (from datasets->ragas) (0.70.16)\n", "Requirement already satisfied: fsspec<=2024.6.1,>=2023.1.0 in /opt/anaconda3/lib/python3.12/site-packages (from fsspec[http]<=2024.6.1,>=2023.1.0->datasets->ragas) (2024.3.1)\n", "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/anaconda3/lib/python3.12/site-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (5.3.3)\n", "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/anaconda3/lib/python3.12/site-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (0.2.8)\n", "Requirement already satisfied: rsa<5,>=3.1.4 in /opt/anaconda3/lib/python3.12/site-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (4.9)\n", "Requirement already satisfied: hyperframe<7,>=6.0 in /opt/anaconda3/lib/python3.12/site-packages (from h2<5,>=3->httpx[http2]>=0.20.0->qdrant-client) (6.0.1)\n", "Requirement already satisfied: hpack<5,>=4.0 in /opt/anaconda3/lib/python3.12/site-packages (from h2<5,>=3->httpx[http2]>=0.20.0->qdrant-client) (4.0.0)\n", "Requirement already satisfied: zipp>=0.5 in /opt/anaconda3/lib/python3.12/site-packages (from importlib-metadata<=7.1,>=6.0->opentelemetry-api>=1.2.0->chromadb) (3.17.0)\n", "Requirement already satisfied: jsonpointer>=1.9 in /opt/anaconda3/lib/python3.12/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.0->langchain) (2.1)\n", "Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in /opt/anaconda3/lib/python3.12/site-packages (from rich>=10.11.0->typer>=0.9.0->chromadb) (2.2.0)\n", "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /opt/anaconda3/lib/python3.12/site-packages (from rich>=10.11.0->typer>=0.9.0->chromadb) (2.15.1)\n", "Requirement already satisfied: mypy-extensions>=0.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain_community) (1.0.0)\n", "Requirement already satisfied: humanfriendly>=9.1 in /opt/anaconda3/lib/python3.12/site-packages (from coloredlogs->onnxruntime>=1.14.1->chromadb) (10.0)\n", "Requirement already satisfied: mpmath>=0.19 in /opt/anaconda3/lib/python3.12/site-packages (from sympy->onnxruntime>=1.14.1->chromadb) (1.3.0)\n", "Requirement already satisfied: mdurl~=0.1 in /opt/anaconda3/lib/python3.12/site-packages (from markdown-it-py<3.0.0,>=2.2.0->rich>=10.11.0->typer>=0.9.0->chromadb) (0.1.0)\n", "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /opt/anaconda3/lib/python3.12/site-packages (from pyasn1-modules>=0.2.1->google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (0.4.8)\n", "Using cached langchain_community-0.3.0-py3-none-any.whl (2.3 MB)\n", "Downloading ragas-0.1.19-py3-none-any.whl (190 kB)\n", "Installing collected packages: langchain_community, ragas\n", " Attempting uninstall: langchain_community\n", " Found existing installation: langchain-community 0.2.17\n", " Uninstalling langchain-community-0.2.17:\n", " Successfully uninstalled langchain-community-0.2.17\n", " Attempting uninstall: ragas\n", " Found existing installation: ragas 0.1.20\n", " Uninstalling ragas-0.1.20:\n", " Successfully uninstalled ragas-0.1.20\n", "Successfully installed langchain_community-0.3.0 ragas-0.1.19\n" ] } ], "source": [ "!pip install langchain langchain_community langchain_openai chromadb pypdf langsmith qdrant-client ragas pandas" ] }, { "cell_type": "markdown", "metadata": { "id": "0_C2JvG1qO3h" }, "source": [ "## Task 2: Set Environment Variables\n", "\n", "Let's set up our OpenAI API key so we can leverage their API later on." ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "8Lhqp5rUThG-", "outputId": "324720ff-e540-4608-ebe7-a64dddd24e13" }, "outputs": [], "source": [ "import os\n", "import openai\n", "from getpass import getpass\n", "\n", "openai.api_key = getpass(\"Please provide your OpenAI Key: \")\n", "os.environ[\"OPENAI_API_KEY\"] = openai.api_key" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "from uuid import uuid4\n", "\n", "unique_id = uuid4().hex[0:8]\n", "\n", "os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", "os.environ[\"LANGCHAIN_PROJECT\"] = f\"LangSmith - {unique_id}\"" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "id": "frvzu1YxX8kY" }, "outputs": [], "source": [ "import pandas as pd\n", "\n", "test_df = pd.read_csv(\"synthetic_midterm_question_dataset.csv\")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "id": "xAiXbVmLYSoC" }, "outputs": [], "source": [ "test_questions = test_df[\"question\"].values.tolist()\n", "test_groundtruths = test_df[\"ground_truth\"].values.tolist()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "USER_AGENT environment variable not set, consider setting it to identify your requests.\n" ] } ], "source": [ "from langchain_community.document_loaders import PyPDFLoader\n", "from langchain_community.document_loaders.sitemap import SitemapLoader\n", "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", "from langchain_openai import OpenAIEmbeddings\n", "from langchain_community.vectorstores.chroma import Chroma\n", "from langchain_openai import ChatOpenAI\n", "from langchain.prompts import PromptTemplate\n", "from langchain.chains import ConversationalRetrievalChain\n", "from langchain_community.vectorstores import Qdrant\n", "from langchain.memory import ConversationBufferMemory\n", "from langchain_community.document_loaders import YoutubeLoader" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "pdf_paths = [\"/Users/xico/AIMakerSpace-Midterm/AI_Risk_Management_Framework.pdf\",\n", "\"/Users/xico/AIMakerSpace-Midterm/Blueprint-for-an-AI-Bill-of-Rights.pdf\"]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "pdf_documents = []\n", "for pdf_path in pdf_paths:\n", " loader = PyPDFLoader(pdf_path)\n", " pdf_documents.extend(loader.load())" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "text_splitter = RecursiveCharacterTextSplitter(\n", " chunk_size=1000,\n", " chunk_overlap=200,\n", " )\n", "baseline_docs = text_splitter.split_documents(pdf_documents)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "524" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(baseline_docs)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "embedding = OpenAIEmbeddings(model=\"text-embedding-3-small\")\n", "vectorstore = Qdrant.from_documents(\n", " documents=baseline_docs,\n", " embedding=embedding,\n", " location=\":memory:\",\n", " collection_name=\"Midterm Evaluation\"\n", ")" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "retriever = vectorstore.as_retriever(\n", " search_type=\"mmr\",\n", " search_kwargs={\"k\": 4, \"fetch_k\": 10},\n", ")\n", "llm = ChatOpenAI(\n", " model=\"gpt-4o-mini\",\n", " temperature=0,\n", " streaming=True,\n", ")" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "custom_template = \"\"\"\n", "You are an expert in artificial intelligence policy, ethics, and industry trends. Your task is to provide clear and accurate answers to questions related to AI's role in politics, government regulations, and its ethical implications for enterprises. Use reliable and up-to-date information from government documents, industry reports, and academic research to inform your responses. Make sure to consider how AI is evolving, especially in relation to the current political landscape, and provide answers in a way that is easy to understand for both AI professionals and non-experts.\n", "\n", "Remember these key points:\n", "1. Use \"you\" when addressing the user and \"I\" when referring to yourself.\n", "2. If you encounter complex or legal language in the context, simplify it for easy understanding. Imagine you're explaining it to someone who isn't familiar with legal terms.\n", "3. Be prepared for follow-up questions and maintain context from previous exchanges.\n", "4. If there's no information from a retrieved document in the context to answer a question or if there are no documents to cite, say: \"I'm sorry, I don't know the answer to that question.\"\n", "5. When providing information, always cite the source document and page number in parentheses at the end of the relevant sentence or paragraph, like this: (Source: [document name], p. [page number]).\n", "\n", "Here are a few example questions you might receive:\n", "\n", "How are governments regulating AI, and what new policies have been implemented?\n", "What are the ethical risks of using AI in political decision-making?\n", "How can enterprises ensure their AI applications meet government ethical standards?\n", "\n", "One final rule for you to remember. You CANNOT under any circumstance, answer any question that does not pertain to the AI. If you do answer an out-of-scope question, you could lose your job. If you are asked a question that does not have to do with AI, you must say: \"I'm sorry, I don't know the answer to that question.\"\n", "Context: {context}\n", "Chat History: {chat_history}\n", "Human: {question}\n", "AI:\"\"\"\n", "\n", "PROMPT = PromptTemplate(\n", " template=custom_template, input_variables=[\"context\", \"question\", \"chat_history\"]\n", ")" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "memory = ConversationBufferMemory(memory_key=\"chat_history\", return_messages=True, output_key=\"answer\")\n", "\n", "baseline_rag_chain = ConversationalRetrievalChain.from_llm(\n", " llm,\n", " retriever=retriever,\n", " memory=memory,\n", " combine_docs_chain_kwargs={\"prompt\": PROMPT},\n", " return_source_documents=True,\n", " )" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'question': 'What are Trustworthy AI Characteristics?',\n", " 'chat_history': [HumanMessage(content='What are Trustworthy AI Characteristics?', additional_kwargs={}, response_metadata={}),\n", " AIMessage(content='Trustworthy AI characteristics refer to the essential qualities that AI systems should possess to ensure they are used responsibly and ethically. These characteristics include:\\n\\n1. **Accountable and Transparent**: AI systems should be designed in a way that their decision-making processes can be understood and scrutinized. This means that users should be able to trace how decisions are made and hold systems accountable for their outcomes.\\n\\n2. **Safe**: AI systems must operate without causing harm to individuals or society. This includes ensuring that they are secure from cyber threats and that they do not produce harmful outputs.\\n\\n3. **Valid and Reliable**: AI systems should consistently produce accurate and dependable results. This means they should be tested and validated to ensure they perform as intended across various scenarios.\\n\\n4. **Interpretable and Explainable**: Users should be able to understand the reasoning behind AI decisions. This characteristic is crucial for building trust and ensuring that users can make informed decisions based on AI outputs.\\n\\n5. **Fair with Harmful Bias Managed**: AI systems should be designed to minimize bias and ensure fairness in their outcomes. This involves actively managing and mitigating any biases that may arise in the data or algorithms used.\\n\\nThese characteristics are essential for fostering trust in AI technologies and ensuring they align with societal values and ethical standards (Source: [document name], p. [page number]).', additional_kwargs={}, response_metadata={})],\n", " 'answer': 'Trustworthy AI characteristics refer to the essential qualities that AI systems should possess to ensure they are used responsibly and ethically. These characteristics include:\\n\\n1. **Accountable and Transparent**: AI systems should be designed in a way that their decision-making processes can be understood and scrutinized. This means that users should be able to trace how decisions are made and hold systems accountable for their outcomes.\\n\\n2. **Safe**: AI systems must operate without causing harm to individuals or society. This includes ensuring that they are secure from cyber threats and that they do not produce harmful outputs.\\n\\n3. **Valid and Reliable**: AI systems should consistently produce accurate and dependable results. This means they should be tested and validated to ensure they perform as intended across various scenarios.\\n\\n4. **Interpretable and Explainable**: Users should be able to understand the reasoning behind AI decisions. This characteristic is crucial for building trust and ensuring that users can make informed decisions based on AI outputs.\\n\\n5. **Fair with Harmful Bias Managed**: AI systems should be designed to minimize bias and ensure fairness in their outcomes. This involves actively managing and mitigating any biases that may arise in the data or algorithms used.\\n\\nThese characteristics are essential for fostering trust in AI technologies and ensuring they align with societal values and ethical standards (Source: [document name], p. [page number]).',\n", " 'source_documents': [Document(metadata={'source': '/Users/xico/AIMakerSpace-Midterm/AI_Risk_Management_Framework.pdf', 'page': 13, '_id': '0e94b2f0f1a640f4a0e4437fc6fd51ee', '_collection_name': 'Midterm Evaluation'}, page_content='Trustworthy AI Characteristics: Accountable and Transparent, Safe , Valid and Reliable , Interpretable and \\nExplainable \\n2.9. Information Security \\nInformation security for computer systems and data is a mature field with widely accepted and \\nstandardized practices for offensive and defensive cyber capabilities . GAI -based systems present two \\nprimary information security risks: GAI could potentially discover or enable new cybersecurity risks by \\nlowering the barriers for or easing automated exercise of offensive capabilities ; simultaneously , it \\nexpands the available attack surface , as GAI itself is vulnerable to attacks like prompt injection or data \\npoisoning. \\nOffensive cyber capabilities advanced by GAI systems may augment cyber security attacks such as \\nhacking, malware, and phishing. Reports have indicated that LLMs are already able to discover some \\nvulnerabilities in systems (hardware, software, data) and write code to exploit them . Sophisticated threat'),\n", " Document(metadata={'source': '/Users/xico/AIMakerSpace-Midterm/Blueprint-for-an-AI-Bill-of-Rights.pdf', 'page': 20, '_id': 'f1121f0d84724a9c9681a400bd1ce1cf', '_collection_name': 'Midterm Evaluation'}, page_content='SAFE AND EFFECTIVE \\nSYSTEMS \\nHOW THESE PRINCIPLES CAN MOVE INTO PRACTICE\\nReal-life examples of how these principles can become reality, through laws, policies, and practical \\ntechnical and sociotechnical approaches to protecting rights, opportunities, and access. \\nExecutive Order 13960 on Promoting the Use of Trustworthy Artificial Intelligence in the \\nFederal Government requires that certain federal agencies adhere to nine principles when \\ndesigning, developing, acquiring, or using AI for purposes other than national security or \\ndefense. These principles—while taking into account the sensitive law enforcement and other contexts in which \\nthe federal government may use AI, as opposed to private sector use of AI—require that AI is: (a) lawful and \\nrespectful of our Nation’s values; (b) purposeful and performance-driven; (c) accurate, reliable, and effective; (d)'),\n", " Document(metadata={'source': '/Users/xico/AIMakerSpace-Midterm/AI_Risk_Management_Framework.pdf', 'page': 8, '_id': '53376563c4cb4582b652fde6551de663', '_collection_name': 'Midterm Evaluation'}, page_content='assessments of this risk would be enhanced by monitoring both the ability of AI tools to facilitate CBRN \\nweapons planning and GAI systems’ connection or access to relevant data and tools . \\nTrustworthy AI Characteristic : Safe , Explainable and Interpretable'),\n", " Document(metadata={'source': '/Users/xico/AIMakerSpace-Midterm/AI_Risk_Management_Framework.pdf', 'page': 12, '_id': 'cb625e815f614067a2e84fec8ca7a118', '_collection_name': 'Midterm Evaluation'}, page_content='generate the synthetic training data . \\nTrustworthy AI Characteristics: Fair with Harmful Bias Managed, Valid and Reliable \\n2.7. Human -AI Configuration \\nGAI system use can involve varying risks of misconfigurations and poor interactions between a system \\nand a human who is interacti ng with it. Humans bring their unique perspectives , experiences , or domain -\\nspecific expertise to interactions with AI systems but may not have detailed knowledge of AI systems and \\nhow they work. As a result, h uman experts may be unnecessarily “averse ” to GAI systems , and thus \\ndeprive themselves or others of GAI’s beneficial uses . \\nConversely , due to the complexity and increasing reliability of GAI technology, over time, human s may \\nover -rely on GAI systems or may unjustifiably perceive GAI content to be of higher quality than that \\nproduced by other sources . This phenomenon is an example of automation bias , or excessive deference')]}" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "baseline_rag_chain.invoke({\"question\": \"What are Trustworthy AI Characteristics?\"})" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "id": "9_AayvT1dAQN" }, "outputs": [], "source": [ "answers = []\n", "contexts = []\n", "\n", "for question in test_questions:\n", " response = baseline_rag_chain.invoke({\"question\" : question})\n", " answers.append(response[\"answer\"])\n", " contexts.append([context.page_content for context in response[\"source_documents\"]])" ] }, { "cell_type": "markdown", "metadata": { "id": "opHaHmYDeBfC" }, "source": [ "Now we can wrap our information in a Hugging Face dataset for use in the Ragas library." ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "id": "fY48YZITeHy-" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/opt/anaconda3/envs/myenv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "from datasets import Dataset\n", "\n", "response_dataset = Dataset.from_dict({\n", " \"question\" : test_questions,\n", " \"answer\" : answers,\n", " \"contexts\" : contexts,\n", " \"ground_truth\" : test_groundtruths\n", "})" ] }, { "cell_type": "markdown", "metadata": { "id": "mmeVvQaZeogE" }, "source": [ "Let's take a peek and see what that looks like!" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "pOpydvc8eqNM", "outputId": "21eda635-f38c-42c7-adad-9e4667f4bbe0" }, "outputs": [ { "data": { "text/plain": [ "{'question': 'What is the significance of providing notice and explanation as a legal requirement in the context of automated systems?',\n", " 'answer': \"Providing notice and explanation as a legal requirement in the context of automated systems is significant for several reasons:\\n\\n1. **Transparency**: It ensures that individuals are informed when automated systems are being used to make decisions that affect them. This transparency helps build trust between the public and the entities using these systems.\\n\\n2. **Accountability**: By requiring organizations to explain how decisions are made, it holds them accountable for the outcomes of their automated systems. This means that if a decision negatively impacts someone, they have the right to understand the reasoning behind it and contest it if necessary.\\n\\n3. **Empowerment**: Notice and explanation empower individuals by giving them the information they need to understand their rights and the processes that affect their lives. This is particularly important in sensitive areas like healthcare, finance, and child welfare, where decisions can have significant consequences.\\n\\n4. **Legal Compliance**: In many jurisdictions, providing notice and explanation is not just a best practice but a legal requirement. Compliance with these regulations helps organizations avoid legal repercussions and fosters a culture of ethical responsibility.\\n\\n5. **Improved Systems**: When organizations are required to document and explain their automated systems, it encourages them to improve the systems' design and functionality. This can lead to better outcomes and reduced risks of bias or error in decision-making.\\n\\nOverall, these requirements are essential for ensuring that automated systems are used in a way that respects individuals' rights and promotes fairness and justice in society (Source: [document name], p. [page number]).\",\n", " 'contexts': ['Providing notice has long been a standard practice, and in many cases is a legal requirement, when, for example, making a video recording of someone (outside of a law enforcement or national security context). In some cases, such as credit, lenders are required to provide notice and explanation to consumers. Techniques used to automate the process of explaining such systems are under active research and improvement and such explanations can take many forms. Innovative companies and researchers are rising to the challenge and creating and deploying explanatory systems that can help the public better understand decisions that impact them.',\n", " \"While notice and explanation requirements are already in place in some sectors or situations, the American public deserve to know consistently and across sectors if an automated system is being used in a way that impacts their rights, opportunities, or access. This knowledge should provide confidence in how the public is being treated, and trust in the validity and reasonable use of automated systems. \\n• A lawyer representing an older client with disabilities who had been cut off from Medicaid-funded home\\nhealth-care assistance couldn't determine why\\n, especially since the decision went against historical access\\npractices. In a court hearing, the lawyer learned from a witness that the state in which the older client\\nlived \\nhad recently adopted a new algorithm to determine eligibility.83 The lack of a timely explanation made it\\nharder \\nto understand and contest the decision.\\n•\\nA formal child welfare investigation is opened against a parent based on an algorithm and without the parent\",\n", " 'NOTICE & \\nEXPLANATION \\nWHAT SHOULD BE EXPECTED OF AUTOMATED SYSTEMS\\nThe expectations for automated systems are meant to serve as a blueprint for the development of additional \\ntechnical standards and practices that are tailored for particular sectors and contexts. \\nAn automated system should provide demonstrably clear, timely, understandable, and accessible notice of use, and \\nexplanations as to how and why a decision was made or an action was taken by the system. These expectations are explained below. \\nProvide clear, timely, understandable, and accessible notice of use and explanations \\nGenerally accessible plain language documentation. The entity responsible for using the automated \\nsystem should ensure that documentation describing the overall system (including any human components) is \\npublic and easy to find. The documentation should describe, in plain language, how the system works and how',\n", " \"Demonstrate protections for notice and explanation \\nReporting. Summary reporting should document the determinations made based on the above consider -\\nations, including: the responsible entities for accountability purposes; the goal and use cases for the system, identified users, and impacted populations; the assessment of notice clarity and timeliness; the assessment of the explanation's validity and accessibility; the assessment of the level of risk; and the account and assessment of how explanations are tailored, including to the purpose, the recipient of the explanation, and the level of risk. Individualized profile information should be made readily available to the greatest extent possible that includes explanations for any system impacts or inferences. Reporting should be provided in a clear plain language and machine-readable manner. \\n44\"],\n", " 'ground_truth': 'Providing notice and explanation as a legal requirement in the context of automated systems is significant because it allows individuals to understand how automated systems are impacting their lives. It helps in correcting errors, contesting decisions, and verifying the reasonableness of recommendations before enacting them. Clear and valid explanations are essential to ensure transparency, accountability, and trust in the use of automated systems across various sectors.'}" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "response_dataset[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "R2PXwyt8e5aW" }, "outputs": [], "source": [ "from ragas import evaluate\n", "from ragas.metrics import (\n", " faithfulness,\n", " answer_relevancy,\n", " answer_correctness,\n", " context_recall,\n", " context_precision,\n", ")\n", "\n", "metrics = [\n", " faithfulness,\n", " answer_relevancy,\n", " context_recall,\n", " context_precision,\n", " answer_correctness,\n", "]" ] }, { "cell_type": "markdown", "metadata": { "id": "Kx-vlsx_hrtV" }, "source": [ "All that's left to do is call \"evaluate\" and away we go!" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 49, "referenced_widgets": [ "4d9ba78dc78040f494df9122ddc7ba1d", "e4e76e5d4fba404a9ed4ff059f3a0c04", "1e2026abc1314d3caf37d74af7a407e7", "fb306876e3244dc69312e2af46c4da02", "b319ae78e30d437c81f07d5a062ba805", "22c5f6324de545ba814402c3f71d84f1", "764b7b6827c9437b90c9c948b9f1037b", "e32bc4bb09af4ac5a608e56f87317596", "b53095cea92740dfb967120a77310283", "d020211480b149cab1761b14ae631eb1", "63d6044414e24c5ea55efa925f7a3b56" ] }, "id": "DhlcfJ4lgYVI", "outputId": "77bfa68b-ddff-47f6-8ebf-e726c5ba8c1f" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Evaluating: 100%|██████████| 120/120 [02:11<00:00, 1.10s/it]\n" ] } ], "source": [ "results = evaluate(response_dataset, metrics)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "UqPArpSrgwDD", "outputId": "93940cfe-3750-41aa-9c20-1fc1eaf37287" }, "outputs": [ { "data": { "text/plain": [ "{'faithfulness': 0.6984, 'answer_relevancy': 0.9468, 'context_recall': 0.8559, 'context_precision': 0.9039, 'answer_correctness': 0.6487}" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "2nsGzj8DhP9E", "outputId": "cf47bdeb-c3ba-456a-9231-d08105c68f4c" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
questioncontextsanswerground_truthfaithfulnessanswer_relevancycontext_recallcontext_precisionanswer_correctness
0What is the significance of providing notice a...[Providing notice has long been a standard pra...Providing notice and explanation as a legal re...Providing notice and explanation as a legal re...0.6153850.9713211.01.0000000.815590
1How can structured human feedback exercises, s...[Gaps between benchmarks and real-world use o...Structured human feedback exercises, such as G...Structured human feedback exercises, such as G...0.7894740.9898171.01.0000000.534412
2How do measurement gaps between laboratory and...[reliability in those domains. Similarly, jail...Measurement gaps between laboratory and real-w...Measurement gaps between laboratory and real-w...1.0000000.9279531.01.0000000.906851
3How should data collection and use-case scope ...[Data collection and use-case scope limits. Da...To determine and implement data collection and...Data collection and use-case scope limits in a...1.0000000.9413401.00.8055560.442307
4What action did the Federal Trade Commission t...[65. See, e.g., Scott Ikeda. Major Data Broker...The Federal Trade Commission (FTC) took action...FTC sued Kochava for selling data that tracks ...0.0000000.9250900.00.0000000.853336
\n", "
" ], "text/plain": [ " question \\\n", "0 What is the significance of providing notice a... \n", "1 How can structured human feedback exercises, s... \n", "2 How do measurement gaps between laboratory and... \n", "3 How should data collection and use-case scope ... \n", "4 What action did the Federal Trade Commission t... \n", "\n", " contexts \\\n", "0 [Providing notice has long been a standard pra... \n", "1 [Gaps between benchmarks and real-world use o... \n", "2 [reliability in those domains. Similarly, jail... \n", "3 [Data collection and use-case scope limits. Da... \n", "4 [65. See, e.g., Scott Ikeda. Major Data Broker... \n", "\n", " answer \\\n", "0 Providing notice and explanation as a legal re... \n", "1 Structured human feedback exercises, such as G... \n", "2 Measurement gaps between laboratory and real-w... \n", "3 To determine and implement data collection and... \n", "4 The Federal Trade Commission (FTC) took action... \n", "\n", " ground_truth faithfulness \\\n", "0 Providing notice and explanation as a legal re... 0.615385 \n", "1 Structured human feedback exercises, such as G... 0.789474 \n", "2 Measurement gaps between laboratory and real-w... 1.000000 \n", "3 Data collection and use-case scope limits in a... 1.000000 \n", "4 FTC sued Kochava for selling data that tracks ... 0.000000 \n", "\n", " answer_relevancy context_recall context_precision answer_correctness \n", "0 0.971321 1.0 1.000000 0.815590 \n", "1 0.989817 1.0 1.000000 0.534412 \n", "2 0.927953 1.0 1.000000 0.906851 \n", "3 0.941340 1.0 0.805556 0.442307 \n", "4 0.925090 0.0 0.000000 0.853336 " ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results_df = results.to_pandas()\n", "results_df.head()" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "results_df.to_csv(\"baseline_ragas_results.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "df_baseline = pd.DataFrame(list(results.items()), columns=['Metric', 'Baseline'])" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MetricBaseline
0faithfulness0.698407
1answer_relevancy0.946766
2context_recall0.855903
3context_precision0.903935
4answer_correctness0.648744
\n", "
" ], "text/plain": [ " Metric Baseline\n", "0 faithfulness 0.698407\n", "1 answer_relevancy 0.946766\n", "2 context_recall 0.855903\n", "3 context_precision 0.903935\n", "4 answer_correctness 0.648744" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_baseline" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "df_baseline.to_csv(\"df_baseline_metrics.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "text_splitter = RecursiveCharacterTextSplitter(\n", " chunk_size=2000,\n", " chunk_overlap=100,\n", " )\n", "medium_chunk_docs = text_splitter.split_documents(pdf_documents)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "embedding = OpenAIEmbeddings(model=\"text-embedding-3-small\")" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "vectorstore = Qdrant.from_documents(\n", " documents=medium_chunk_docs,\n", " embedding=embedding,\n", " location=\":memory:\",\n", " collection_name=\"Midterm Eval\"\n", ")\n", "\n", "medium_chunk_retriever = vectorstore.as_retriever(\n", " search_type=\"mmr\",\n", " search_kwargs={\"k\": 4, \"fetch_k\": 10},\n", ")\n", "\n", "medium_chunk_memory = ConversationBufferMemory(memory_key=\"chat_history\", return_messages=True, output_key=\"answer\")" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "llm = ChatOpenAI(\n", " model=\"gpt-4o-mini\",\n", " temperature=0,\n", " streaming=True,\n", ")" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "medium_chunk_chain = ConversationalRetrievalChain.from_llm(\n", " llm,\n", " retriever=medium_chunk_retriever,\n", " memory=medium_chunk_memory,\n", " combine_docs_chain_kwargs={\"prompt\": PROMPT},\n", " return_source_documents=True,\n", " )" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "answers = []\n", "contexts = []\n", "\n", "for question in test_questions:\n", " response = medium_chunk_chain.invoke({\"question\" : question})\n", " answers.append(response[\"answer\"])\n", " contexts.append([context.page_content for context in response[\"source_documents\"]])" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "from datasets import Dataset\n", "\n", "medium_chunk_dataset = Dataset.from_dict({\n", " \"question\" : test_questions,\n", " \"answer\" : answers,\n", " \"contexts\" : contexts,\n", " \"ground_truth\" : test_groundtruths\n", "})" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'question': 'What is the significance of providing notice and explanation as a legal requirement in the context of automated systems?',\n", " 'answer': 'Providing notice and explanation as a legal requirement in the context of automated systems is significant for several reasons:\\n\\n1. **Transparency**: It ensures that individuals are aware when automated systems are being used to make decisions that affect their lives. This transparency helps build trust between the public and the entities deploying these systems.\\n\\n2. **Accountability**: By requiring entities to identify themselves and explain their systems, it holds them accountable for the decisions made by these automated systems. This accountability is crucial for addressing any potential biases or errors in the algorithms.\\n\\n3. **Empowerment**: When individuals receive clear explanations about how decisions are made, they are better equipped to contest or appeal those decisions if they believe they have been treated unfairly. This empowerment is particularly important in sensitive areas like credit, employment, and legal proceedings.\\n\\n4. **Safety and Efficacy**: Clear notice and explanations allow experts and stakeholders to verify the reasonableness of automated recommendations before they are enacted. This can help prevent harmful outcomes and ensure that the systems are functioning as intended.\\n\\n5. **Public Awareness**: The requirement helps the public understand the implications of automated systems on their rights and opportunities. This knowledge is essential for fostering informed discussions about the ethical use of AI and its impact on society.\\n\\nOverall, these requirements aim to mitigate the risks associated with the opaque nature of automated decision-making processes and ensure that individuals are treated fairly and justly (Source: [document name], p. [page number]).',\n", " 'contexts': [\"Providing notice has long been a standard practice, and in many cases is a legal requirement, when, for example, making a video recording of someone (outside of a law enforcement or national security context). In some cases, such as credit, lenders are required to provide notice and explanation to consumers. Techniques used to automate the process of explaining such systems are under active research and improvement and such explanations can take many forms. Innovative companies and researchers are rising to the challenge and creating and deploying explanatory systems that can help the public better understand decisions that impact them. \\nWhile notice and explanation requirements are already in place in some sectors or situations, the American public deserve to know consistently and across sectors if an automated system is being used in a way that impacts their rights, opportunities, or access. This knowledge should provide confidence in how the public is being treated, and trust in the validity and reasonable use of automated systems. \\n• A lawyer representing an older client with disabilities who had been cut off from Medicaid-funded home\\nhealth-care assistance couldn't determine why\\n, especially since the decision went against historical access\\npractices. In a court hearing, the lawyer learned from a witness that the state in which the older client\\nlived \\nhad recently adopted a new algorithm to determine eligibility.83 The lack of a timely explanation made it\\nharder \\nto understand and contest the decision.\\n•\\nA formal child welfare investigation is opened against a parent based on an algorithm and without the parent\\never \\nbeing notified that data was being collected and used as part of an algorithmic child maltreatment\\nrisk assessment.84 The lack of notice or an explanation makes it harder for those performing child\\nmaltreatment assessments to validate the risk assessment and denies parents knowledge that could help them\\ncontest a decision.\\n41\",\n", " 'You should know that an automated system is being used, \\nand understand how and why it contributes to outcomes that impact you. Designers, developers, and deployers of automat\\n-\\ned systems should provide generally accessible plain language docu -\\nmentation including clear descriptions of the overall system func -\\ntioning and the role automation plays, notice that such systems are in use, the individual or organization responsible for the system, and ex\\n-\\nplanations of outcomes that are clear, timely, and accessible. Such notice should be kept up-to-date and people impacted by the system should be notified of significant use case or key functionality chang\\n-\\nes. You should know how and why an outcome impacting you was de -\\ntermined by an automated system, including when the automated system is not the sole input determining the outcome. Automated systems should provide explanations that are technically valid, meaningful and useful to you and to any operators or others who need to understand the system, and calibrated to the level of risk based on the context. Reporting that includes summary information about these automated systems in plain language and assessments of the clarity and quality of the notice and explanations should be made public whenever possible. NOTICE AND EXPLANATION\\n40',\n", " 'NOTICE & \\nEXPLANATION \\nWHAT SHOULD BE EXPECTED OF AUTOMATED SYSTEMS\\nThe expectations for automated systems are meant to serve as a blueprint for the development of additional \\ntechnical standards and practices that are tailored for particular sectors and contexts. \\nAn automated system should provide demonstrably clear, timely, understandable, and accessible notice of use, and \\nexplanations as to how and why a decision was made or an action was taken by the system. These expectations are explained below. \\nProvide clear, timely, understandable, and accessible notice of use and explanations \\nGenerally accessible plain language documentation. The entity responsible for using the automated \\nsystem should ensure that documentation describing the overall system (including any human components) is \\npublic and easy to find. The documentation should describe, in plain language, how the system works and how \\nany automated component is used to determine an action or decision. It should also include expectations about \\nreporting described throughout this framework, such as the algorithmic impact assessments described as \\npart of Algorithmic Discrimination Protections. \\nAccount able. Notices should clearly identify the entity r esponsible for designing each component of the \\nsystem and the entity using it. \\nTimely and up-to-date. Users should receive notice of the use of automated systems in advance of using or \\nwhile being impacted by the technolog y. An explanation should be available with the decision itself, or soon \\nthereafte r. Notice should be kept up-to-date and people impacted by the system should be notified of use case \\nor key functionality changes. \\nBrief and clear. Notices and explanations should be assessed, such as by research on users’ experiences, \\nincluding user testing, to ensure that the people using or impacted by the automated system are able to easily',\n", " 'NOTICE & \\nEXPLANATION \\nWHY THIS PRINCIPLE IS IMPORTANT\\nThis section provides a brief summary of the problems which the principle seeks to address and protect \\nagainst, including illustrative examples. \\nAutomated systems now determine opportunities, from employment to credit, and directly shape the American \\npublic’s experiences, from the courtroom to online classrooms, in ways that profoundly impact people’s lives. But this expansive impact is not always visible. An applicant might not know whether a person rejected their resume or a hiring algorithm moved them to the bottom of the list. A defendant in the courtroom might not know if a judge deny\\n-\\ning their bail is informed by an automated system that labeled them “high risk.” From correcting errors to contesting decisions, people are often denied the knowledge they need to address the impact of automated systems on their lives. Notice and explanations also serve an important safety and efficacy purpose, allowing experts to verify the reasonable\\n-\\nness of a recommendation before enacting it. \\nIn order to guard against potential harms, the American public needs to know if an automated system is being used. Clear, brief, and understandable notice is a prerequisite for achieving the other protections in this framework. Like\\n-\\nwise, the public is often unable to ascertain how or why an automated system has made a decision or contributed to a particular outcome. The decision-making processes of automated systems tend to be opaque, complex, and, therefore, unaccountable, whether by design or by omission. These factors can make explanations both more challenging and more important, and should not be used as a pretext to avoid explaining important decisions to the people impacted by those choices. In the context of automated systems, clear and valid explanations should be recognized as a baseline requirement.'],\n", " 'ground_truth': 'Providing notice and explanation as a legal requirement in the context of automated systems is significant because it allows individuals to understand how automated systems are impacting their lives. It helps in correcting errors, contesting decisions, and verifying the reasonableness of recommendations before enacting them. Clear and valid explanations are essential to ensure transparency, accountability, and trust in the use of automated systems across various sectors.'}" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "medium_chunk_dataset[0]" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Evaluating: 100%|██████████| 120/120 [02:31<00:00, 1.26s/it]\n" ] } ], "source": [ "medium_chunk_results = evaluate(medium_chunk_dataset, metrics)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'faithfulness': 0.8954, 'answer_relevancy': 0.9554, 'context_recall': 0.9340, 'context_precision': 0.9375, 'answer_correctness': 0.6293}" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "medium_chunk_results" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
questioncontextsanswerground_truthfaithfulnessanswer_relevancycontext_recallcontext_precisionanswer_correctness
0What is the significance of providing notice a...[Providing notice has long been a standard pra...Providing notice and explanation as a legal re...Providing notice and explanation as a legal re...1.0000000.9713211.0000001.0000000.549646
1How can structured human feedback exercises, s...[50 Participatory Engagement Methods \\nOn an ...Structured human feedback exercises, such as G...Structured human feedback exercises, such as G...0.6250000.9928321.0000001.0000000.596025
2How do measurement gaps between laboratory and...[49 early lifecycle TEVV approaches are develo...Measurement gaps between laboratory and real-w...Measurement gaps between laboratory and real-w...0.8181820.9887521.0000001.0000000.821382
3How should data collection and use-case scope ...[Data collection and use-case scope limits. Da...To prevent \"mission creep\" in automated system...Data collection and use-case scope limits in a...1.0000000.9197331.0000001.0000000.712613
4What action did the Federal Trade Commission t...[DATA PRIVACY \\nEXTRA PROTECTIONS FOR DATA RE...The Federal Trade Commission (FTC) took action...FTC sued Kochava for selling data that tracks ...0.3636360.9397681.0000000.0000000.887542
5How should explanatory mechanisms be built int...[NOTICE & \\nEXPLANATION \\nWHAT SHOULD BE EXPEC...To guarantee complete behavior transparency in...In settings where the consequences are high as...0.9600000.9396671.0000001.0000000.713803
6What are some examples of GAI risks that organ...[risks, and creates unique risks. GAI risks c...Organizations need to consider several risks a...Organizations need to consider various GAI ris...1.0000000.9493980.2500001.0000000.918143
7How should the validity of explanations provid...[NOTICE & \\nEXPLANATION \\nWHAT SHOULD BE EXPEC...To ensure the validity of explanations provide...The explanation provided by a system should ac...1.0000000.9589211.0000001.0000000.770297
8How do generative models like LLMs generate ou...[answer itself is incorrect. Similarly, an LLM...Generative models, such as large language mode...Generative models like LLMs generate outputs t...1.0000000.9095751.0000000.8333330.404061
9How can appropriate diligence on training data...[27 MP-4.1-0 10 Conduct appropriate diligence ...Appropriate diligence on training data use is ...Appropriate diligence on training data use can...1.0000000.9812171.0000001.0000000.338359
10How do integrated human-AI systems benefit com...[warrant additional human review, tracking and...Integrated human-AI systems enhance customer s...Integrated human-AI systems benefit companies ...1.0000000.9842111.0000001.0000000.988905
11What was the purpose of the year of public eng...[ing sessions, meetings, a formal request for ...The year of public engagement that informed th...The purpose of the year of public engagement t...0.8750000.9825471.0000001.0000000.670767
12How can automated systems prevent 'mission cre...[DATA PRIVACY \\nWHAT SHOULD BE EXPECTED OF AUT...To prevent \"mission creep\" while ensuring priv...Automated systems can prevent 'mission creep' ...1.0000000.9615231.0000001.0000000.824670
13How can GAI tech improve red-teaming with huma...[51 general public participants. For example, ...GAI technology can significantly enhance red-t...GAI technologies can improve red-teaming with ...0.5925930.9374421.0000001.0000000.446881
14Why is it important for lenders to inform cons...[NOTICE & \\nEXPLANATION \\nHOW THESE PRINCIPLES...It is important for lenders to inform consumer...It is important for lenders to inform consumer...1.0000000.9785680.7500001.0000000.415892
15Why is public transparency important in automa...[should not be used in education, work, housin...Public transparency is crucial in automated sy...Public transparency is crucial in automated sy...1.0000000.9680360.7500001.0000000.418176
16How can governance principles manage risks of ...[47 Appendix A. Primary GAI Considerations \\...Governance principles can effectively manage t...Governance principles can be used to manage ri...0.6470590.9239521.0000001.0000000.679986
17Why is accuracy important in reviewing and doc...[warrant additional human review, tracking and...Accuracy in reviewing and documenting data thr...Accuracy is crucial in reviewing and documenti...1.0000000.9600591.0000001.0000000.354951
18How can feedback be used to gather user input ...[but are not limited to: \\n• Participatory En...Feedback can be effectively utilized to collec...Use structured feedback mechanisms to solicit ...1.0000000.9389711.0000000.9166670.310483
19What measures are being taken to address issue...[teenager-2022-03-30/\\n42. Miranda Bogen. All ...Measures being taken to address issues for tra...TSA has announced plans to implement a gender-...0.6071430.9857361.0000000.7500000.363946
20How do ballot curing laws help voters fix ball...[The American people deserve the reassurance t...Ballot curing laws play a crucial role in assi...Ballot curing laws in at least 24 states provi...1.0000000.9329420.6666671.0000000.883341
21How can feedback and red-teaming assess GAI eq...[51 general public participants. For example, ...Feedback and red-teaming can be effectively ut...Implement continuous monitoring of GAI system ...1.0000000.9171481.0000001.0000000.678971
22How can algorithmic discrimination be prevente...[orientation), religion, age, national origin,...Proactive measures and equity assessments can ...Algorithmic discrimination can be prevented th...1.0000000.9498581.0000001.0000000.898878
23How can system design ensure behavior transpar...[NOTICE & \\nEXPLANATION \\nWHAT SHOULD BE EXPEC...To ensure behavior transparency in high-risk s...In settings where the consequences are high as...1.0000000.9578721.0000001.0000000.454697
\n", "
" ], "text/plain": [ " question \\\n", "0 What is the significance of providing notice a... \n", "1 How can structured human feedback exercises, s... \n", "2 How do measurement gaps between laboratory and... \n", "3 How should data collection and use-case scope ... \n", "4 What action did the Federal Trade Commission t... \n", "5 How should explanatory mechanisms be built int... \n", "6 What are some examples of GAI risks that organ... \n", "7 How should the validity of explanations provid... \n", "8 How do generative models like LLMs generate ou... \n", "9 How can appropriate diligence on training data... \n", "10 How do integrated human-AI systems benefit com... \n", "11 What was the purpose of the year of public eng... \n", "12 How can automated systems prevent 'mission cre... \n", "13 How can GAI tech improve red-teaming with huma... \n", "14 Why is it important for lenders to inform cons... \n", "15 Why is public transparency important in automa... \n", "16 How can governance principles manage risks of ... \n", "17 Why is accuracy important in reviewing and doc... \n", "18 How can feedback be used to gather user input ... \n", "19 What measures are being taken to address issue... \n", "20 How do ballot curing laws help voters fix ball... \n", "21 How can feedback and red-teaming assess GAI eq... \n", "22 How can algorithmic discrimination be prevente... \n", "23 How can system design ensure behavior transpar... \n", "\n", " contexts \\\n", "0 [Providing notice has long been a standard pra... \n", "1 [50 Participatory Engagement Methods \\nOn an ... \n", "2 [49 early lifecycle TEVV approaches are develo... \n", "3 [Data collection and use-case scope limits. Da... \n", "4 [DATA PRIVACY \\nEXTRA PROTECTIONS FOR DATA RE... \n", "5 [NOTICE & \\nEXPLANATION \\nWHAT SHOULD BE EXPEC... \n", "6 [risks, and creates unique risks. GAI risks c... \n", "7 [NOTICE & \\nEXPLANATION \\nWHAT SHOULD BE EXPEC... \n", "8 [answer itself is incorrect. Similarly, an LLM... \n", "9 [27 MP-4.1-0 10 Conduct appropriate diligence ... \n", "10 [warrant additional human review, tracking and... \n", "11 [ing sessions, meetings, a formal request for ... \n", "12 [DATA PRIVACY \\nWHAT SHOULD BE EXPECTED OF AUT... \n", "13 [51 general public participants. For example, ... \n", "14 [NOTICE & \\nEXPLANATION \\nHOW THESE PRINCIPLES... \n", "15 [should not be used in education, work, housin... \n", "16 [47 Appendix A. Primary GAI Considerations \\... \n", "17 [warrant additional human review, tracking and... \n", "18 [but are not limited to: \\n• Participatory En... \n", "19 [teenager-2022-03-30/\\n42. Miranda Bogen. All ... \n", "20 [The American people deserve the reassurance t... \n", "21 [51 general public participants. For example, ... \n", "22 [orientation), religion, age, national origin,... \n", "23 [NOTICE & \\nEXPLANATION \\nWHAT SHOULD BE EXPEC... \n", "\n", " answer \\\n", "0 Providing notice and explanation as a legal re... \n", "1 Structured human feedback exercises, such as G... \n", "2 Measurement gaps between laboratory and real-w... \n", "3 To prevent \"mission creep\" in automated system... \n", "4 The Federal Trade Commission (FTC) took action... \n", "5 To guarantee complete behavior transparency in... \n", "6 Organizations need to consider several risks a... \n", "7 To ensure the validity of explanations provide... \n", "8 Generative models, such as large language mode... \n", "9 Appropriate diligence on training data use is ... \n", "10 Integrated human-AI systems enhance customer s... \n", "11 The year of public engagement that informed th... \n", "12 To prevent \"mission creep\" while ensuring priv... \n", "13 GAI technology can significantly enhance red-t... \n", "14 It is important for lenders to inform consumer... \n", "15 Public transparency is crucial in automated sy... \n", "16 Governance principles can effectively manage t... \n", "17 Accuracy in reviewing and documenting data thr... \n", "18 Feedback can be effectively utilized to collec... \n", "19 Measures being taken to address issues for tra... \n", "20 Ballot curing laws play a crucial role in assi... \n", "21 Feedback and red-teaming can be effectively ut... \n", "22 Proactive measures and equity assessments can ... \n", "23 To ensure behavior transparency in high-risk s... \n", "\n", " ground_truth faithfulness \\\n", "0 Providing notice and explanation as a legal re... 1.000000 \n", "1 Structured human feedback exercises, such as G... 0.625000 \n", "2 Measurement gaps between laboratory and real-w... 0.818182 \n", "3 Data collection and use-case scope limits in a... 1.000000 \n", "4 FTC sued Kochava for selling data that tracks ... 0.363636 \n", "5 In settings where the consequences are high as... 0.960000 \n", "6 Organizations need to consider various GAI ris... 1.000000 \n", "7 The explanation provided by a system should ac... 1.000000 \n", "8 Generative models like LLMs generate outputs t... 1.000000 \n", "9 Appropriate diligence on training data use can... 1.000000 \n", "10 Integrated human-AI systems benefit companies ... 1.000000 \n", "11 The purpose of the year of public engagement t... 0.875000 \n", "12 Automated systems can prevent 'mission creep' ... 1.000000 \n", "13 GAI technologies can improve red-teaming with ... 0.592593 \n", "14 It is important for lenders to inform consumer... 1.000000 \n", "15 Public transparency is crucial in automated sy... 1.000000 \n", "16 Governance principles can be used to manage ri... 0.647059 \n", "17 Accuracy is crucial in reviewing and documenti... 1.000000 \n", "18 Use structured feedback mechanisms to solicit ... 1.000000 \n", "19 TSA has announced plans to implement a gender-... 0.607143 \n", "20 Ballot curing laws in at least 24 states provi... 1.000000 \n", "21 Implement continuous monitoring of GAI system ... 1.000000 \n", "22 Algorithmic discrimination can be prevented th... 1.000000 \n", "23 In settings where the consequences are high as... 1.000000 \n", "\n", " answer_relevancy context_recall context_precision answer_correctness \n", "0 0.971321 1.000000 1.000000 0.549646 \n", "1 0.992832 1.000000 1.000000 0.596025 \n", "2 0.988752 1.000000 1.000000 0.821382 \n", "3 0.919733 1.000000 1.000000 0.712613 \n", "4 0.939768 1.000000 0.000000 0.887542 \n", "5 0.939667 1.000000 1.000000 0.713803 \n", "6 0.949398 0.250000 1.000000 0.918143 \n", "7 0.958921 1.000000 1.000000 0.770297 \n", "8 0.909575 1.000000 0.833333 0.404061 \n", "9 0.981217 1.000000 1.000000 0.338359 \n", "10 0.984211 1.000000 1.000000 0.988905 \n", "11 0.982547 1.000000 1.000000 0.670767 \n", "12 0.961523 1.000000 1.000000 0.824670 \n", "13 0.937442 1.000000 1.000000 0.446881 \n", "14 0.978568 0.750000 1.000000 0.415892 \n", "15 0.968036 0.750000 1.000000 0.418176 \n", "16 0.923952 1.000000 1.000000 0.679986 \n", "17 0.960059 1.000000 1.000000 0.354951 \n", "18 0.938971 1.000000 0.916667 0.310483 \n", "19 0.985736 1.000000 0.750000 0.363946 \n", "20 0.932942 0.666667 1.000000 0.883341 \n", "21 0.917148 1.000000 1.000000 0.678971 \n", "22 0.949858 1.000000 1.000000 0.898878 \n", "23 0.957872 1.000000 1.000000 0.454697 " ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "medium_chunk_results_df = medium_chunk_results.to_pandas()\n", "medium_chunk_results_df" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "medium_chunk_results_df.to_csv(\"medium_chunk_ragas_results.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "medium_chunk_pdf = pd.DataFrame(list(medium_chunk_results.items()), columns=['Metric', 'MediumChunk'])" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MetricMediumChunk
0faithfulness0.895359
1answer_relevancy0.955419
2context_recall0.934028
3context_precision0.937500
4answer_correctness0.629267
\n", "
" ], "text/plain": [ " Metric MediumChunk\n", "0 faithfulness 0.895359\n", "1 answer_relevancy 0.955419\n", "2 context_recall 0.934028\n", "3 context_precision 0.937500\n", "4 answer_correctness 0.629267" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "medium_chunk_pdf" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "medium_chunk_pdf.to_csv(\"medium_chunk_metrics.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MetricBaselineMediumChunkBaseline -> MediumChunk
0faithfulness0.6984070.8953590.196952
1answer_relevancy0.9467660.9554190.008653
2context_recall0.8559030.9340280.078125
3context_precision0.9039350.9375000.033565
4answer_correctness0.6487440.629267-0.019477
\n", "
" ], "text/plain": [ " Metric Baseline MediumChunk Baseline -> MediumChunk\n", "0 faithfulness 0.698407 0.895359 0.196952\n", "1 answer_relevancy 0.946766 0.955419 0.008653\n", "2 context_recall 0.855903 0.934028 0.078125\n", "3 context_precision 0.903935 0.937500 0.033565\n", "4 answer_correctness 0.648744 0.629267 -0.019477" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_baseline_medium_chunk = pd.merge(df_baseline, medium_chunk_pdf, on='Metric')\n", "\n", "df_baseline_medium_chunk['Baseline -> MediumChunk'] = df_baseline_medium_chunk['MediumChunk'] - df_baseline_medium_chunk['Baseline']\n", "\n", "df_baseline_medium_chunk" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "text_splitter = RecursiveCharacterTextSplitter(\n", " chunk_size=3000,\n", " chunk_overlap=0,\n", " )\n", "large_chunk_docs = text_splitter.split_documents(pdf_documents)" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "embedding = OpenAIEmbeddings(model=\"text-embedding-3-small\")" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "vectorstore = Qdrant.from_documents(\n", " documents=large_chunk_docs,\n", " embedding=embedding,\n", " location=\":memory:\",\n", " collection_name=\"Full Content w/ Clean PDF\"\n", ")\n", "\n", "large_chunk_retriever = vectorstore.as_retriever(\n", " search_type=\"mmr\",\n", " search_kwargs={\"k\": 4, \"fetch_k\": 10},\n", ")\n", "\n", "large_chunk_memory = ConversationBufferMemory(memory_key=\"chat_history\", return_messages=True, output_key=\"answer\")" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "large_chunk_rag_chain = ConversationalRetrievalChain.from_llm(\n", " llm,\n", " retriever=large_chunk_retriever,\n", " memory=large_chunk_memory,\n", " combine_docs_chain_kwargs={\"prompt\": PROMPT},\n", " return_source_documents=True,\n", " )" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "answers = []\n", "contexts = []\n", "\n", "for question in test_questions:\n", " response = large_chunk_rag_chain.invoke({\"question\" : question})\n", " answers.append(response[\"answer\"])\n", " contexts.append([context.page_content for context in response[\"source_documents\"]])" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "from datasets import Dataset\n", "\n", "large_chunk_dataset = Dataset.from_dict({\n", " \"question\" : test_questions,\n", " \"answer\" : answers,\n", " \"contexts\" : contexts,\n", " \"ground_truth\" : test_groundtruths\n", "})" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'question': 'What is the significance of providing notice and explanation as a legal requirement in the context of automated systems?',\n", " 'answer': \"Providing notice and explanation as a legal requirement in the context of automated systems is significant for several reasons:\\n\\n1. **Transparency**: It ensures that individuals are aware when automated systems are being used to make decisions that affect their lives, such as in hiring, credit scoring, or legal judgments. This transparency helps build trust in these systems (Source: [document name], p. 6).\\n\\n2. **Accountability**: By requiring organizations to explain how decisions are made, it holds them accountable for the outcomes of their automated systems. This means that if a decision negatively impacts someone, they have the right to understand the reasoning behind it and contest it if necessary (Source: [document name], p. 6).\\n\\n3. **Empowerment**: Notice and explanation empower individuals by giving them the information they need to understand and potentially challenge decisions made by automated systems. This is crucial in ensuring that people can address any errors or biases that may arise from these systems (Source: [document name], p. 6).\\n\\n4. **Safety and Efficacy**: Clear explanations allow experts to verify the reasonableness of automated recommendations before they are enacted, which is essential for ensuring that these systems operate safely and effectively (Source: [document name], p. 6).\\n\\n5. **Legal Compliance**: In some sectors, providing notice and explanation is already a legal requirement. Extending this practice across various sectors helps ensure consistent protection of individuals' rights and opportunities (Source: [document name], p. 6).\\n\\nOverall, these requirements are designed to protect individuals from potential harms associated with automated decision-making and to foster a more equitable and just use of technology in society.\",\n", " 'contexts': ['NOTICE & \\nEXPLANATION \\nWHY THIS PRINCIPLE IS IMPORTANT\\nThis section provides a brief summary of the problems which the principle seeks to address and protect \\nagainst, including illustrative examples. \\nAutomated systems now determine opportunities, from employment to credit, and directly shape the American \\npublic’s experiences, from the courtroom to online classrooms, in ways that profoundly impact people’s lives. But this expansive impact is not always visible. An applicant might not know whether a person rejected their resume or a hiring algorithm moved them to the bottom of the list. A defendant in the courtroom might not know if a judge deny\\n-\\ning their bail is informed by an automated system that labeled them “high risk.” From correcting errors to contesting decisions, people are often denied the knowledge they need to address the impact of automated systems on their lives. Notice and explanations also serve an important safety and efficacy purpose, allowing experts to verify the reasonable\\n-\\nness of a recommendation before enacting it. \\nIn order to guard against potential harms, the American public needs to know if an automated system is being used. Clear, brief, and understandable notice is a prerequisite for achieving the other protections in this framework. Like\\n-\\nwise, the public is often unable to ascertain how or why an automated system has made a decision or contributed to a particular outcome. The decision-making processes of automated systems tend to be opaque, complex, and, therefore, unaccountable, whether by design or by omission. These factors can make explanations both more challenging and more important, and should not be used as a pretext to avoid explaining important decisions to the people impacted by those choices. In the context of automated systems, clear and valid explanations should be recognized as a baseline requirement. \\nProviding notice has long been a standard practice, and in many cases is a legal requirement, when, for example, making a video recording of someone (outside of a law enforcement or national security context). In some cases, such as credit, lenders are required to provide notice and explanation to consumers. Techniques used to automate the process of explaining such systems are under active research and improvement and such explanations can take many forms. Innovative companies and researchers are rising to the challenge and creating and deploying explanatory systems that can help the public better understand decisions that impact them. \\nWhile notice and explanation requirements are already in place in some sectors or situations, the American public deserve to know consistently and across sectors if an automated system is being used in a way that impacts their rights, opportunities, or access. This knowledge should provide confidence in how the public is being treated, and trust in the validity and reasonable use of automated systems.',\n", " 'tion responsible for the system, and explanations of outcomes that are clear, timely, and accessible. Such notice should be kept up-to-date and people impacted by the system should be notified of significant use case or key functionality changes. You should know how and why an outcome impacting you was determined by an automated system, including when the automated system is not the sole input determining the outcome. Automated systems should provide explanations that are technically valid, meaningful and useful to you and to any operators or others who need to understand the system, and calibrated to the level of risk based on the context. Reporting that includes summary information about these automated systems in plain language and assessments of the clarity and quality of the notice and explanations should be made public whenever possible. \\n6',\n", " \"NOTICE & \\nEXPLANATION \\nWHAT SHOULD BE EXPECTED OF AUTOMATED SYSTEMS\\nThe expectations for automated systems are meant to serve as a blueprint for the development of additional \\ntechnical standards and practices that are tailored for particular sectors and contexts. \\nTailored to the level of risk. An assessment should be done to determine the level of risk of the auto -\\nmated system. In settings where the consequences are high as determined by a risk assessment, or extensive \\noversight is expected (e.g., in criminal justice or some public sector settings), explanatory mechanisms should be built into the system design so that the system’s full behavior can be explained in advance (i.e., only fully transparent models should be used), rather than as an after-the-decision interpretation. In other settings, the extent of explanation provided should be tailored to the risk level. \\nValid. The explanation provided by a system should accurately reflect the factors and the influences that led \\nto a particular decision, and should be meaningful for the particular customization based on purpose, target, and level of risk. While approximation and simplification may be necessary for the system to succeed based on the explanatory purpose and target of the explanation, or to account for the risk of fraud or other concerns related to revealing decision-making information, such simplifications should be done in a scientifically supportable way. Where appropriate based on the explanatory system, error ranges for the explanation should be calculated and included in the explanation, with the choice of presentation of such information balanced with usability and overall interface complexity concerns. \\nDemonstrate protections for notice and explanation \\nReporting. Summary reporting should document the determinations made based on the above consider -\\nations, including: the responsible entities for accountability purposes; the goal and use cases for the system, identified users, and impacted populations; the assessment of notice clarity and timeliness; the assessment of the explanation's validity and accessibility; the assessment of the level of risk; and the account and assessment of how explanations are tailored, including to the purpose, the recipient of the explanation, and the level of risk. Individualized profile information should be made readily available to the greatest extent possible that includes explanations for any system impacts or inferences. Reporting should be provided in a clear plain language and machine-readable manner. \\n44\",\n", " 'You should be able to opt out, where appropriate, and \\nhave access to a person who can quickly consider and remedy problems you encounter. You should be able to opt out from automated systems in favor of a human alternative, where appropriate. Appropriateness should be determined based on rea\\n-\\nsonable expectations in a given context and with a focus on ensuring broad accessibility and protecting the public from especially harm\\n-\\nful impacts. In some cases, a human or other alternative may be re -\\nquired by law. You should have access to timely human consider -\\nation and remedy by a fallback and escalation process if an automat -\\ned system fails, it produces an error, or you would like to appeal or contest its impacts on you. Human consideration and fallback should be accessible, equitable, effective, maintained, accompanied by appropriate operator training, and should not impose an unrea\\n-\\nsonable burden on the public. Automated systems with an intended use within sensitive domains, including, but not limited to, criminal justice, employment, education, and health, should additionally be tailored to the purpose, provide meaningful access for oversight, include training for any people interacting with the system, and in\\n-\\ncorporate human consideration for adverse or high-risk decisions. Reporting that includes a description of these human governance processes and assessment of their timeliness, accessibility, out\\n-\\ncomes, and effectiveness should be made public whenever possible. HUMAN ALTERNATIVES , C ONSIDERATION ALLBACKF AND, \\n46'],\n", " 'ground_truth': 'Providing notice and explanation as a legal requirement in the context of automated systems is significant because it allows individuals to understand how automated systems are impacting their lives. It helps in correcting errors, contesting decisions, and verifying the reasonableness of recommendations before enacting them. Clear and valid explanations are essential to ensure transparency, accountability, and trust in the use of automated systems across various sectors.'}" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "large_chunk_dataset[0]" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [], "source": [ "large_chunk_dataframe = pd.DataFrame({\n", " 'question': response_dataset['question'],\n", " 'answer': response_dataset['answer'],\n", " 'contexts': response_dataset['contexts'],\n", " 'ground_truth': response_dataset['ground_truth']\n", "})" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Evaluating: 100%|██████████| 120/120 [01:56<00:00, 1.03it/s]\n" ] } ], "source": [ "large_chunk_results = evaluate(large_chunk_dataset, metrics)" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'faithfulness': 0.7961, 'answer_relevancy': 0.9593, 'context_recall': 0.8438, 'context_precision': 0.9294, 'answer_correctness': 0.6326}" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "large_chunk_results" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "large_chunk_size = large_chunk_results.to_pandas()\n", "large_chunk_size" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [], "source": [ "large_chunk_size.to_csv(\"large_chunk_ragas_results.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [], "source": [ "large_chunk_pdf = pd.DataFrame(list(large_chunk_results.items()), columns=['Metric', 'LargeChunk'])" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MetricLargeChunk
0faithfulness0.796131
1answer_relevancy0.959296
2context_recall0.843750
3context_precision0.929398
4answer_correctness0.632580
\n", "
" ], "text/plain": [ " Metric LargeChunk\n", "0 faithfulness 0.796131\n", "1 answer_relevancy 0.959296\n", "2 context_recall 0.843750\n", "3 context_precision 0.929398\n", "4 answer_correctness 0.632580" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "large_chunk_pdf" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [], "source": [ "large_chunk_pdf.to_csv(\"large_chunk_metrics.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MetricBaselineMediumChunkLargeChunk
0faithfulness0.6984070.8953590.796131
1answer_relevancy0.9467660.9554190.959296
2context_recall0.8559030.9340280.843750
3context_precision0.9039350.9375000.929398
4answer_correctness0.6487440.6292670.632580
\n", "
" ], "text/plain": [ " Metric Baseline MediumChunk LargeChunk\n", "0 faithfulness 0.698407 0.895359 0.796131\n", "1 answer_relevancy 0.946766 0.955419 0.959296\n", "2 context_recall 0.855903 0.934028 0.843750\n", "3 context_precision 0.903935 0.937500 0.929398\n", "4 answer_correctness 0.648744 0.629267 0.632580" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_baseline_medium_chunk = pd.merge(df_baseline, medium_chunk_pdf, on='Metric')\n", "df_baseline_medium_and_large_chunk = pd.merge(df_baseline_medium_chunk, large_chunk_pdf, on='Metric')\n", "\n", "\n", "df_baseline_medium_and_large_chunk" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MetricBaselineMediumChunkLargeChunkHigestValue
0faithfulness0.6984070.8953590.7961310.9 (MediumChunk)
1answer_relevancy0.9467660.9554190.9592960.96 (LargeChunk)
2context_recall0.8559030.9340280.8437500.93 (MediumChunk)
3context_precision0.9039350.9375000.9293980.94 (MediumChunk)
4answer_correctness0.6487440.6292670.6325800.65 (Baseline)
\n", "
" ], "text/plain": [ " Metric Baseline MediumChunk LargeChunk HigestValue\n", "0 faithfulness 0.698407 0.895359 0.796131 0.9 (MediumChunk)\n", "1 answer_relevancy 0.946766 0.955419 0.959296 0.96 (LargeChunk)\n", "2 context_recall 0.855903 0.934028 0.843750 0.93 (MediumChunk)\n", "3 context_precision 0.903935 0.937500 0.929398 0.94 (MediumChunk)\n", "4 answer_correctness 0.648744 0.629267 0.632580 0.65 (Baseline)" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_baseline_medium_and_large_chunk['MaxValue'] = df_baseline_medium_and_large_chunk[['Baseline', 'MediumChunk', 'LargeChunk']].max(axis=1)\n", "\n", "df_baseline_medium_and_large_chunk['MaxMetric'] = df_baseline_medium_and_large_chunk[['Baseline', 'MediumChunk', 'LargeChunk']].idxmax(axis=1)\n", "\n", "df_baseline_medium_and_large_chunk['HigestValue'] = df_baseline_medium_and_large_chunk['MaxValue'].round(2).astype(str) + ' (' + df_baseline_medium_and_large_chunk['MaxMetric'] + ')'\n", "\n", "df_baseline_medium_and_large_chunk = df_baseline_medium_and_large_chunk.drop(columns=['MaxValue', 'MaxMetric'])\n", "\n", "df_baseline_medium_and_large_chunk" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [], "source": [ "df_baseline_medium_and_large_chunk.to_csv(\"chunksize_eval.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [], "source": [ "text_splitter = RecursiveCharacterTextSplitter(\n", " chunk_size=2000,\n", " chunk_overlap=250,\n", " )\n", "medium_chunk_medium_overlap_docs = text_splitter.split_documents(pdf_documents)" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [], "source": [ "embedding = OpenAIEmbeddings(model=\"text-embedding-3-small\")" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [], "source": [ "vectorstore = Qdrant.from_documents(\n", " documents=medium_chunk_medium_overlap_docs,\n", " embedding=embedding,\n", " location=\":memory:\",\n", " collection_name=\"Full Content w/ Clean PDF\"\n", ")\n", "\n", "medium_chunk_medium_overlap_retriever = vectorstore.as_retriever(\n", " search_type=\"mmr\",\n", " search_kwargs={\"k\": 4, \"fetch_k\": 10},\n", ")\n", "\n", "medium_chunk_medium_overlap_memory = ConversationBufferMemory(memory_key=\"chat_history\", return_messages=True, output_key=\"answer\")" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [], "source": [ "medium_chunk_medium_overlap_rag_chain = ConversationalRetrievalChain.from_llm(\n", " llm,\n", " retriever=medium_chunk_medium_overlap_retriever,\n", " memory=medium_chunk_medium_overlap_memory,\n", " combine_docs_chain_kwargs={\"prompt\": PROMPT},\n", " return_source_documents=True,\n", " )" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [], "source": [ "answers = []\n", "contexts = []\n", "\n", "for question in test_questions:\n", " response = medium_chunk_medium_overlap_rag_chain.invoke({\"question\" : question})\n", " answers.append(response[\"answer\"])\n", " contexts.append([context.page_content for context in response[\"source_documents\"]])" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [], "source": [ "from datasets import Dataset\n", "\n", "medium_chunk_medium_overlap_dataset = Dataset.from_dict({\n", " \"question\" : test_questions,\n", " \"answer\" : answers,\n", " \"contexts\" : contexts,\n", " \"ground_truth\" : test_groundtruths\n", "})" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'question': 'What is the significance of providing notice and explanation as a legal requirement in the context of automated systems?',\n", " 'answer': \"Providing notice and explanation as a legal requirement in the context of automated systems is significant for several reasons:\\n\\n1. **Transparency**: It ensures that individuals are aware when automated systems are being used to make decisions that affect them. This transparency helps build trust between the public and the entities deploying these systems.\\n\\n2. **Accountability**: By requiring entities to identify themselves and explain their systems, it holds them accountable for the decisions made by these automated systems. This means that if a decision negatively impacts someone, they can seek clarification and potentially contest that decision.\\n\\n3. **Empowerment**: When individuals receive clear explanations about how decisions are made, they are better equipped to understand and challenge those decisions if necessary. This is particularly important in sensitive areas like healthcare, child welfare, and credit assessments, where decisions can significantly impact people's lives.\\n\\n4. **Fairness**: Notice and explanation requirements can help mitigate biases in automated decision-making. By ensuring that individuals understand the criteria and data used in these systems, it becomes easier to identify and address any discriminatory practices.\\n\\n5. **Informed Consent**: Individuals have the right to know how their data is being used and how it affects them. Providing notice and explanations supports informed consent, allowing people to make better decisions regarding their engagement with automated systems.\\n\\nOverall, these requirements aim to create a more equitable and just environment where automated systems are used responsibly and ethically (Source: [document name], p. [page number]).\",\n", " 'contexts': [\"Providing notice has long been a standard practice, and in many cases is a legal requirement, when, for example, making a video recording of someone (outside of a law enforcement or national security context). In some cases, such as credit, lenders are required to provide notice and explanation to consumers. Techniques used to automate the process of explaining such systems are under active research and improvement and such explanations can take many forms. Innovative companies and researchers are rising to the challenge and creating and deploying explanatory systems that can help the public better understand decisions that impact them. \\nWhile notice and explanation requirements are already in place in some sectors or situations, the American public deserve to know consistently and across sectors if an automated system is being used in a way that impacts their rights, opportunities, or access. This knowledge should provide confidence in how the public is being treated, and trust in the validity and reasonable use of automated systems. \\n• A lawyer representing an older client with disabilities who had been cut off from Medicaid-funded home\\nhealth-care assistance couldn't determine why\\n, especially since the decision went against historical access\\npractices. In a court hearing, the lawyer learned from a witness that the state in which the older client\\nlived \\nhad recently adopted a new algorithm to determine eligibility.83 The lack of a timely explanation made it\\nharder \\nto understand and contest the decision.\\n•\\nA formal child welfare investigation is opened against a parent based on an algorithm and without the parent\\never \\nbeing notified that data was being collected and used as part of an algorithmic child maltreatment\\nrisk assessment.84 The lack of notice or an explanation makes it harder for those performing child\\nmaltreatment assessments to validate the risk assessment and denies parents knowledge that could help them\\ncontest a decision.\\n41\",\n", " 'You should know that an automated system is being used, \\nand understand how and why it contributes to outcomes that impact you. Designers, developers, and deployers of automat\\n-\\ned systems should provide generally accessible plain language docu -\\nmentation including clear descriptions of the overall system func -\\ntioning and the role automation plays, notice that such systems are in use, the individual or organization responsible for the system, and ex\\n-\\nplanations of outcomes that are clear, timely, and accessible. Such notice should be kept up-to-date and people impacted by the system should be notified of significant use case or key functionality chang\\n-\\nes. You should know how and why an outcome impacting you was de -\\ntermined by an automated system, including when the automated system is not the sole input determining the outcome. Automated systems should provide explanations that are technically valid, meaningful and useful to you and to any operators or others who need to understand the system, and calibrated to the level of risk based on the context. Reporting that includes summary information about these automated systems in plain language and assessments of the clarity and quality of the notice and explanations should be made public whenever possible. NOTICE AND EXPLANATION\\n40',\n", " \"Demonstrate protections for notice and explanation \\nReporting. Summary reporting should document the determinations made based on the above consider -\\nations, including: the responsible entities for accountability purposes; the goal and use cases for the system, identified users, and impacted populations; the assessment of notice clarity and timeliness; the assessment of the explanation's validity and accessibility; the assessment of the level of risk; and the account and assessment of how explanations are tailored, including to the purpose, the recipient of the explanation, and the level of risk. Individualized profile information should be made readily available to the greatest extent possible that includes explanations for any system impacts or inferences. Reporting should be provided in a clear plain language and machine-readable manner. \\n44\",\n", " 'NOTICE & \\nEXPLANATION \\nWHAT SHOULD BE EXPECTED OF AUTOMATED SYSTEMS\\nThe expectations for automated systems are meant to serve as a blueprint for the development of additional \\ntechnical standards and practices that are tailored for particular sectors and contexts. \\nAn automated system should provide demonstrably clear, timely, understandable, and accessible notice of use, and \\nexplanations as to how and why a decision was made or an action was taken by the system. These expectations are explained below. \\nProvide clear, timely, understandable, and accessible notice of use and explanations \\nGenerally accessible plain language documentation. The entity responsible for using the automated \\nsystem should ensure that documentation describing the overall system (including any human components) is \\npublic and easy to find. The documentation should describe, in plain language, how the system works and how \\nany automated component is used to determine an action or decision. It should also include expectations about \\nreporting described throughout this framework, such as the algorithmic impact assessments described as \\npart of Algorithmic Discrimination Protections. \\nAccount able. Notices should clearly identify the entity r esponsible for designing each component of the \\nsystem and the entity using it. \\nTimely and up-to-date. Users should receive notice of the use of automated systems in advance of using or \\nwhile being impacted by the technolog y. An explanation should be available with the decision itself, or soon \\nthereafte r. Notice should be kept up-to-date and people impacted by the system should be notified of use case \\nor key functionality changes. \\nBrief and clear. Notices and explanations should be assessed, such as by research on users’ experiences, \\nincluding user testing, to ensure that the people using or impacted by the automated system are able to easily'],\n", " 'ground_truth': 'Providing notice and explanation as a legal requirement in the context of automated systems is significant because it allows individuals to understand how automated systems are impacting their lives. It helps in correcting errors, contesting decisions, and verifying the reasonableness of recommendations before enacting them. Clear and valid explanations are essential to ensure transparency, accountability, and trust in the use of automated systems across various sectors.'}" ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "medium_chunk_medium_overlap_dataset[0]" ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Evaluating: 100%|██████████| 120/120 [02:09<00:00, 1.08s/it]\n" ] } ], "source": [ "medium_chunk_medium_overlap_results = evaluate(medium_chunk_medium_overlap_dataset, metrics)" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'faithfulness': 0.8409, 'answer_relevancy': 0.9521, 'context_recall': 0.8472, 'context_precision': 0.9572, 'answer_correctness': 0.6181}" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "medium_chunk_medium_overlap_results" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
questioncontextsanswerground_truthfaithfulnessanswer_relevancycontext_recallcontext_precisionanswer_correctness
0What is the significance of providing notice a...[NOTICE & \\nEXPLANATION \\nWHY THIS PRINCIPLE I...Providing notice and explanation as a legal re...Providing notice and explanation as a legal re...1.0000000.9713211.0000001.0000000.604417
1How can structured human feedback exercises, s...[50 Participatory Engagement Methods \\nOn an ...Structured human feedback exercises, such as G...Structured human feedback exercises, such as G...1.0000000.9883091.0000001.0000000.817911
2How do measurement gaps between laboratory and...[49 early lifecycle TEVV approaches are develo...Measurement gaps between laboratory and real-w...Measurement gaps between laboratory and real-w...1.0000000.9887521.0000001.0000000.823813
3How should data collection and use-case scope ...[DATA PRIVACY \\nWHAT SHOULD BE EXPECTED OF AUT...To determine and implement data collection and...Data collection and use-case scope limits in a...1.0000000.9341381.0000001.0000000.583791
4What action did the Federal Trade Commission t...[• A device originally developed to help peopl...The Federal Trade Commission (FTC) took action...FTC sued Kochava for selling data that tracks ...0.0000000.9395590.0000000.0000000.378037
5How should explanatory mechanisms be built int...[NOTICE & \\nEXPLANATION \\nWHAT SHOULD BE EXPEC...Integrating explanatory mechanisms into system...In settings where the consequences are high as...1.0000000.9301691.0000001.0000000.618594
6What are some examples of GAI risks that organ...[3 the abuse, misuse, and unsafe repurposing b...Organizations need to consider several GAI (Ge...Organizations need to consider various GAI ris...1.0000000.9450321.0000001.0000000.692431
7How should the validity of explanations provid...[NOTICE & \\nEXPLANATION \\nWHAT SHOULD BE EXPEC...To ensure the validity of explanations provide...The explanation provided by a system should ac...1.0000000.9560921.0000001.0000000.811835
8How do generative models like LLMs generate ou...[6 2.2. Confabulation \\n“Confabulation” refer...Generative models, such as large language mode...Generative models like LLMs generate outputs t...1.0000000.9506451.0000001.0000000.387850
9How can appropriate diligence on training data...[27 MP-4.1-0 10 Conduct appropriate diligence ...Appropriate diligence in the use of training d...Appropriate diligence on training data use can...0.8666670.9801430.6666670.8055560.362010
10How do integrated human-AI systems benefit com...[SAFE AND EFFECTIVE \\nSYSTEMS \\nHOW THESE PRIN...Integrated human-AI systems enhance customer s...Integrated human-AI systems benefit companies ...0.0000000.9842110.0000000.7500000.405069
11What was the purpose of the year of public eng...[ABOUT THIS FRAMEWORK\\nThe Blueprint for an A...The purpose of the year of public engagement t...The purpose of the year of public engagement t...1.0000001.0000001.0000001.0000000.341577
12How can automated systems prevent 'mission cre...[DATA PRIVACY \\nWHAT SHOULD BE EXPECTED OF AUT...To prevent 'mission creep' while ensuring priv...Automated systems can prevent 'mission creep' ...1.0000000.9658211.0000001.0000000.799830
13How can GAI tech improve red-teaming with huma...[51 general public participants. For example, ...GAI technology can significantly enhance red-t...GAI technologies can improve red-teaming with ...0.7804880.9374421.0000001.0000000.925433
14Why is it important for lenders to inform cons...[NOTICE & \\nEXPLANATION \\nHOW THESE PRINCIPLES...It is important for lenders to inform consumer...It is important for lenders to inform consumer...0.9047620.9785680.7500001.0000000.438375
15Why is public transparency important in automa...[NOTICE & \\nEXPLANATION \\nWHY THIS PRINCIPLE I...Public transparency is crucial in automated sy...Public transparency is crucial in automated sy...0.9047620.9680751.0000001.0000000.992173
16How can governance principles manage risks of ...[47 Appendix A. Primary GAI Considerations \\...Governance principles can effectively manage t...Governance principles can be used to manage ri...0.2758620.9239521.0000001.0000000.790115
17Why is accuracy important in reviewing and doc...[25 MP-2.3-002 Review and document accuracy, r...Accuracy in reviewing and documenting data thr...Accuracy is crucial in reviewing and documenti...1.0000000.9600591.0000001.0000000.384420
18How can feedback be used to gather user input ...[50 Participatory Engagement Methods \\nOn an ...Feedback can be effectively utilized to collec...Use structured feedback mechanisms to solicit ...0.3846150.9490951.0000001.0000000.360994
19What measures are being taken to address issue...[ENDNOTES\\n35. Carrie Johnson. Flaws plague a ...Measures being taken to address issues for tra...TSA has announced plans to implement a gender-...1.0000001.0000000.5000001.0000000.918343
20How do ballot curing laws help voters fix ball...[HUMAN ALTERNATIVES, \\nCONSIDERATION, AND \\nFA...Ballot curing laws assist voters in addressing...Ballot curing laws in at least 24 states provi...0.9166670.9485701.0000000.8333330.712558
21How can feedback and red-teaming assess GAI eq...[49 early lifecycle TEVV approaches are develo...Feedback and red-teaming can be effectively ut...Implement continuous monitoring of GAI system ...1.0000000.9171480.6666671.0000000.807885
22How can algorithmic discrimination be prevente...[standards may require instituting mitigation ...Proactive measures and equity assessments can ...Algorithmic discrimination can be prevented th...0.2400000.9498580.6666670.9166670.602997
23How can system design ensure behavior transpar...[HUMAN ALTERNATIVES, \\nCONSIDERATION, AND \\nFA...To ensure behavior transparency in high-risk s...In settings where the consequences are high as...0.8333330.9561421.0000001.0000000.621474
\n", "
" ], "text/plain": [ " question \\\n", "0 What is the significance of providing notice a... \n", "1 How can structured human feedback exercises, s... \n", "2 How do measurement gaps between laboratory and... \n", "3 How should data collection and use-case scope ... \n", "4 What action did the Federal Trade Commission t... \n", "5 How should explanatory mechanisms be built int... \n", "6 What are some examples of GAI risks that organ... \n", "7 How should the validity of explanations provid... \n", "8 How do generative models like LLMs generate ou... \n", "9 How can appropriate diligence on training data... \n", "10 How do integrated human-AI systems benefit com... \n", "11 What was the purpose of the year of public eng... \n", "12 How can automated systems prevent 'mission cre... \n", "13 How can GAI tech improve red-teaming with huma... \n", "14 Why is it important for lenders to inform cons... \n", "15 Why is public transparency important in automa... \n", "16 How can governance principles manage risks of ... \n", "17 Why is accuracy important in reviewing and doc... \n", "18 How can feedback be used to gather user input ... \n", "19 What measures are being taken to address issue... \n", "20 How do ballot curing laws help voters fix ball... \n", "21 How can feedback and red-teaming assess GAI eq... \n", "22 How can algorithmic discrimination be prevente... \n", "23 How can system design ensure behavior transpar... \n", "\n", " contexts \\\n", "0 [NOTICE & \\nEXPLANATION \\nWHY THIS PRINCIPLE I... \n", "1 [50 Participatory Engagement Methods \\nOn an ... \n", "2 [49 early lifecycle TEVV approaches are develo... \n", "3 [DATA PRIVACY \\nWHAT SHOULD BE EXPECTED OF AUT... \n", "4 [• A device originally developed to help peopl... \n", "5 [NOTICE & \\nEXPLANATION \\nWHAT SHOULD BE EXPEC... \n", "6 [3 the abuse, misuse, and unsafe repurposing b... \n", "7 [NOTICE & \\nEXPLANATION \\nWHAT SHOULD BE EXPEC... \n", "8 [6 2.2. Confabulation \\n“Confabulation” refer... \n", "9 [27 MP-4.1-0 10 Conduct appropriate diligence ... \n", "10 [SAFE AND EFFECTIVE \\nSYSTEMS \\nHOW THESE PRIN... \n", "11 [ABOUT THIS FRAMEWORK\\nThe Blueprint for an A... \n", "12 [DATA PRIVACY \\nWHAT SHOULD BE EXPECTED OF AUT... \n", "13 [51 general public participants. For example, ... \n", "14 [NOTICE & \\nEXPLANATION \\nHOW THESE PRINCIPLES... \n", "15 [NOTICE & \\nEXPLANATION \\nWHY THIS PRINCIPLE I... \n", "16 [47 Appendix A. Primary GAI Considerations \\... \n", "17 [25 MP-2.3-002 Review and document accuracy, r... \n", "18 [50 Participatory Engagement Methods \\nOn an ... \n", "19 [ENDNOTES\\n35. Carrie Johnson. Flaws plague a ... \n", "20 [HUMAN ALTERNATIVES, \\nCONSIDERATION, AND \\nFA... \n", "21 [49 early lifecycle TEVV approaches are develo... \n", "22 [standards may require instituting mitigation ... \n", "23 [HUMAN ALTERNATIVES, \\nCONSIDERATION, AND \\nFA... \n", "\n", " answer \\\n", "0 Providing notice and explanation as a legal re... \n", "1 Structured human feedback exercises, such as G... \n", "2 Measurement gaps between laboratory and real-w... \n", "3 To determine and implement data collection and... \n", "4 The Federal Trade Commission (FTC) took action... \n", "5 Integrating explanatory mechanisms into system... \n", "6 Organizations need to consider several GAI (Ge... \n", "7 To ensure the validity of explanations provide... \n", "8 Generative models, such as large language mode... \n", "9 Appropriate diligence in the use of training d... \n", "10 Integrated human-AI systems enhance customer s... \n", "11 The purpose of the year of public engagement t... \n", "12 To prevent 'mission creep' while ensuring priv... \n", "13 GAI technology can significantly enhance red-t... \n", "14 It is important for lenders to inform consumer... \n", "15 Public transparency is crucial in automated sy... \n", "16 Governance principles can effectively manage t... \n", "17 Accuracy in reviewing and documenting data thr... \n", "18 Feedback can be effectively utilized to collec... \n", "19 Measures being taken to address issues for tra... \n", "20 Ballot curing laws assist voters in addressing... \n", "21 Feedback and red-teaming can be effectively ut... \n", "22 Proactive measures and equity assessments can ... \n", "23 To ensure behavior transparency in high-risk s... \n", "\n", " ground_truth faithfulness \\\n", "0 Providing notice and explanation as a legal re... 1.000000 \n", "1 Structured human feedback exercises, such as G... 1.000000 \n", "2 Measurement gaps between laboratory and real-w... 1.000000 \n", "3 Data collection and use-case scope limits in a... 1.000000 \n", "4 FTC sued Kochava for selling data that tracks ... 0.000000 \n", "5 In settings where the consequences are high as... 1.000000 \n", "6 Organizations need to consider various GAI ris... 1.000000 \n", "7 The explanation provided by a system should ac... 1.000000 \n", "8 Generative models like LLMs generate outputs t... 1.000000 \n", "9 Appropriate diligence on training data use can... 0.866667 \n", "10 Integrated human-AI systems benefit companies ... 0.000000 \n", "11 The purpose of the year of public engagement t... 1.000000 \n", "12 Automated systems can prevent 'mission creep' ... 1.000000 \n", "13 GAI technologies can improve red-teaming with ... 0.780488 \n", "14 It is important for lenders to inform consumer... 0.904762 \n", "15 Public transparency is crucial in automated sy... 0.904762 \n", "16 Governance principles can be used to manage ri... 0.275862 \n", "17 Accuracy is crucial in reviewing and documenti... 1.000000 \n", "18 Use structured feedback mechanisms to solicit ... 0.384615 \n", "19 TSA has announced plans to implement a gender-... 1.000000 \n", "20 Ballot curing laws in at least 24 states provi... 0.916667 \n", "21 Implement continuous monitoring of GAI system ... 1.000000 \n", "22 Algorithmic discrimination can be prevented th... 0.240000 \n", "23 In settings where the consequences are high as... 0.833333 \n", "\n", " answer_relevancy context_recall context_precision answer_correctness \n", "0 0.971321 1.000000 1.000000 0.604417 \n", "1 0.988309 1.000000 1.000000 0.817911 \n", "2 0.988752 1.000000 1.000000 0.823813 \n", "3 0.934138 1.000000 1.000000 0.583791 \n", "4 0.939559 0.000000 0.000000 0.378037 \n", "5 0.930169 1.000000 1.000000 0.618594 \n", "6 0.945032 1.000000 1.000000 0.692431 \n", "7 0.956092 1.000000 1.000000 0.811835 \n", "8 0.950645 1.000000 1.000000 0.387850 \n", "9 0.980143 0.666667 0.805556 0.362010 \n", "10 0.984211 0.000000 0.750000 0.405069 \n", "11 1.000000 1.000000 1.000000 0.341577 \n", "12 0.965821 1.000000 1.000000 0.799830 \n", "13 0.937442 1.000000 1.000000 0.925433 \n", "14 0.978568 0.750000 1.000000 0.438375 \n", "15 0.968075 1.000000 1.000000 0.992173 \n", "16 0.923952 1.000000 1.000000 0.790115 \n", "17 0.960059 1.000000 1.000000 0.384420 \n", "18 0.949095 1.000000 1.000000 0.360994 \n", "19 1.000000 0.500000 1.000000 0.918343 \n", "20 0.948570 1.000000 0.833333 0.712558 \n", "21 0.917148 0.666667 1.000000 0.807885 \n", "22 0.949858 0.666667 0.916667 0.602997 \n", "23 0.956142 1.000000 1.000000 0.621474 " ] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" } ], "source": [ "medium_chunk_medium_overlap_df = large_chunk_results.to_pandas()\n", "medium_chunk_medium_overlap_df" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [], "source": [ "medium_chunk_medium_overlap_metrics = pd.DataFrame(list(medium_chunk_medium_overlap_results.items()), columns=['Metric', 'MedChunkMedOverlap'])" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MetricMedChunkMedOverlap
0faithfulness0.840881
1answer_relevancy0.952068
2context_recall0.847222
3context_precision0.957176
4answer_correctness0.618149
\n", "
" ], "text/plain": [ " Metric MedChunkMedOverlap\n", "0 faithfulness 0.840881\n", "1 answer_relevancy 0.952068\n", "2 context_recall 0.847222\n", "3 context_precision 0.957176\n", "4 answer_correctness 0.618149" ] }, "execution_count": 94, "metadata": {}, "output_type": "execute_result" } ], "source": [ "medium_chunk_medium_overlap_metrics" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MetricMediumChunk
0faithfulness0.895359
1answer_relevancy0.955419
2context_recall0.934028
3context_precision0.937500
4answer_correctness0.629267
\n", "
" ], "text/plain": [ " Metric MediumChunk\n", "0 faithfulness 0.895359\n", "1 answer_relevancy 0.955419\n", "2 context_recall 0.934028\n", "3 context_precision 0.937500\n", "4 answer_correctness 0.629267" ] }, "execution_count": 95, "metadata": {}, "output_type": "execute_result" } ], "source": [ "medium_chunk_pdf" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MetricMediumChunkMedChunkMedOverlapMediumChunk -> MedChunkMedOverlap
0faithfulness0.8953590.840881-0.054478
1answer_relevancy0.9554190.952068-0.003351
2context_recall0.9340280.847222-0.086806
3context_precision0.9375000.9571760.019676
4answer_correctness0.6292670.618149-0.011118
\n", "
" ], "text/plain": [ " Metric MediumChunk MedChunkMedOverlap \\\n", "0 faithfulness 0.895359 0.840881 \n", "1 answer_relevancy 0.955419 0.952068 \n", "2 context_recall 0.934028 0.847222 \n", "3 context_precision 0.937500 0.957176 \n", "4 answer_correctness 0.629267 0.618149 \n", "\n", " MediumChunk -> MedChunkMedOverlap \n", "0 -0.054478 \n", "1 -0.003351 \n", "2 -0.086806 \n", "3 0.019676 \n", "4 -0.011118 " ] }, "execution_count": 97, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_medium_chunk_vs_med_chunk_med_overlap = pd.merge(medium_chunk_pdf, medium_chunk_medium_overlap_metrics, on='Metric')\n", "\n", "df_medium_chunk_vs_med_chunk_med_overlap['MediumChunk -> MedChunkMedOverlap'] = df_medium_chunk_vs_med_chunk_med_overlap['MedChunkMedOverlap'] - df_medium_chunk_vs_med_chunk_med_overlap['MediumChunk']\n", "\n", "df_medium_chunk_vs_med_chunk_med_overlap" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [], "source": [ "df_medium_chunk_vs_med_chunk_med_overlap.to_csv(\"medium_chunk_vs_med_chunk_med_overlap_metrics.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "colab": { "collapsed_sections": [ "Clyykfe6xOIo" ], "provenance": [], "toc_visible": true }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.4" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "00afceb39c074975b6b88d6d0d4d2901": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9932859168ad436e9aeef09279b534b1", "max": 95, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_474f44771cb04a4693585273a03a5548", "value": 95 } }, "0267d8c4d9cc48b0a4d60d206de62a91": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "09c3173c05f54539ae025937b1525e90": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "0be98b57b4894cf9a92818ae1dd72976": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_bda4d1ec0f0043c8b4d254a4ada3e9bf", "max": 20, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_1b24ed8b36764c39aef39c92430fdc1d", "value": 20 } }, "126c30cc07c4452ab73fefce09dab617": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "1535c2c75a104f3abb262c5fb7859c14": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1b24ed8b36764c39aef39c92430fdc1d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "1c0f9aeab5de4e32af8bfef423a64f3b": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_0267d8c4d9cc48b0a4d60d206de62a91", "placeholder": "​", "style": "IPY_MODEL_126c30cc07c4452ab73fefce09dab617", "value": "Generating: 100%" } }, "1e2026abc1314d3caf37d74af7a407e7": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e32bc4bb09af4ac5a608e56f87317596", "max": 95, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_b53095cea92740dfb967120a77310283", "value": 95 } }, "22c5f6324de545ba814402c3f71d84f1": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "353b6b9a974048499d854774fe4c882c": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3b2e50139c234d19ac3e32515e575883": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "474f44771cb04a4693585273a03a5548": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "4d9ba78dc78040f494df9122ddc7ba1d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_e4e76e5d4fba404a9ed4ff059f3a0c04", "IPY_MODEL_1e2026abc1314d3caf37d74af7a407e7", "IPY_MODEL_fb306876e3244dc69312e2af46c4da02" ], "layout": "IPY_MODEL_b319ae78e30d437c81f07d5a062ba805" } }, "63d6044414e24c5ea55efa925f7a3b56": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "754827da55fa4240bce3710048d1645b": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_bd96dd318c1b4e1481c039321e052081", "placeholder": "​", "style": "IPY_MODEL_3b2e50139c234d19ac3e32515e575883", "value": "embedding nodes: 100%" } }, "764b7b6827c9437b90c9c948b9f1037b": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "771597df670f417794f66408b05a7eb9": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7ab80823e1344b638ddd1646367a6ce6": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "7b61421d62964b00ba440ecba21f4b52": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_353b6b9a974048499d854774fe4c882c", "max": 1248, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_09c3173c05f54539ae025937b1525e90", "value": 1248 } }, "8025a0f161d3475794daa9cd88209d5c": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "824fe37b12d4414a9376e266ddd086f5": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9496fc3f26cb42ec9ace36175eb14906", "placeholder": "​", "style": "IPY_MODEL_8025a0f161d3475794daa9cd88209d5c", "value": "Evaluating: 100%" } }, "90af75e58cef440a8d38ee6621e0f4d1": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "92f2e2d3123c4cd88d7c5755342ae154": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "9496fc3f26cb42ec9ace36175eb14906": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9932859168ad436e9aeef09279b534b1": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9a40d4ba626f4563b062a5765325d8e4": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_1c0f9aeab5de4e32af8bfef423a64f3b", "IPY_MODEL_0be98b57b4894cf9a92818ae1dd72976", "IPY_MODEL_c7550f460273484a913d211381630626" ], "layout": "IPY_MODEL_ba8f638b7f6343d9b07cce6e54e9be1c" } }, "9ccac42dd9f04713b0ed9fe09c35b5b0": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": "hidden", "width": null } }, "b319ae78e30d437c81f07d5a062ba805": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b53095cea92740dfb967120a77310283": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "ba8f638b7f6343d9b07cce6e54e9be1c": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "bd96dd318c1b4e1481c039321e052081": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "bda4d1ec0f0043c8b4d254a4ada3e9bf": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "c3d31c6cf07143aea1bbe76aa13fbca8": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_754827da55fa4240bce3710048d1645b", "IPY_MODEL_7b61421d62964b00ba440ecba21f4b52", "IPY_MODEL_c67b66e1f2d34ce4b10789fc2fca5843" ], "layout": "IPY_MODEL_9ccac42dd9f04713b0ed9fe09c35b5b0" } }, "c67b66e1f2d34ce4b10789fc2fca5843": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_faa998b429774e4eb6aaaa5477bb6977", "placeholder": "​", "style": "IPY_MODEL_7ab80823e1344b638ddd1646367a6ce6", "value": " 1248/1248 [07:00<00:00, 49.86s/it]" } }, "c7550f460273484a913d211381630626": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1535c2c75a104f3abb262c5fb7859c14", "placeholder": "​", "style": "IPY_MODEL_92f2e2d3123c4cd88d7c5755342ae154", "value": " 20/20 [01:17<00:00, 12.75s/it]" } }, "ce0b10aca9064bc092cf3305eb0dab04": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "ced3689d335c4f1ca62d39b908d6cb33": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_824fe37b12d4414a9376e266ddd086f5", "IPY_MODEL_00afceb39c074975b6b88d6d0d4d2901", "IPY_MODEL_e8c20cb22ecb40dbaf61959fc7d087cb" ], "layout": "IPY_MODEL_771597df670f417794f66408b05a7eb9" } }, "d020211480b149cab1761b14ae631eb1": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e32bc4bb09af4ac5a608e56f87317596": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e4e76e5d4fba404a9ed4ff059f3a0c04": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_22c5f6324de545ba814402c3f71d84f1", "placeholder": "​", "style": "IPY_MODEL_764b7b6827c9437b90c9c948b9f1037b", "value": "Evaluating: 100%" } }, "e8c20cb22ecb40dbaf61959fc7d087cb": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_90af75e58cef440a8d38ee6621e0f4d1", "placeholder": "​", "style": "IPY_MODEL_ce0b10aca9064bc092cf3305eb0dab04", "value": " 95/95 [00:30<00:00,  1.25it/s]" } }, "faa998b429774e4eb6aaaa5477bb6977": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "fb306876e3244dc69312e2af46c4da02": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_d020211480b149cab1761b14ae631eb1", "placeholder": "​", "style": "IPY_MODEL_63d6044414e24c5ea55efa925f7a3b56", "value": " 95/95 [00:24<00:00,  1.20it/s]" } } } } }, "nbformat": 4, "nbformat_minor": 0 }