{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# NIST Questions\n", "This Notebook utilises the points under Govern, Map, Measure, and Manage in the NIST AI RMF to query a policy and obtain responses of how well the policy aligns." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Import Packages" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import openai\n", "from llama_index import SimpleDirectoryReader, ServiceContext, VectorStoreIndex\n", "import nest_asyncio\n", "import pandas as pd\n", "nest_asyncio.apply()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## OpenAI API Key" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "openai.api_key = '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'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Import the GenAI Policy\n", "The policies used here was created by AI after being asked to create a policy aligned with the NIST document which was uploaded" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "gold_policy = SimpleDirectoryReader(input_files=['data/Badguys AI Ethics and Responsible AI Policy.pdf']).load_data()\n", "mock_policy = SimpleDirectoryReader(input_files=['data/Mock Policy.pdf']).load_data()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Set chunk information" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "chunk_size = 128\n", "chunk_overlap = 20\n", "similarity_top_k = 6" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Read text files of NIST AI RMF statements" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "Govern = open(\"./Statements/Govern.txt\", \"r\").readlines()\n", "Govern = [Govern[i].replace(\"\\n\", \"\") for i in range(len(Govern))]\n", "\n", "Map = open(\"./Statements/Map.txt\", \"r\").readlines()\n", "Map = [Map[i].replace(\"\\n\", \"\") for i in range(len(Map))]\n", "\n", "Measure = open(\"./Statements/Measure.txt\", \"r\").readlines()\n", "Measure = [Measure[i].replace(\"\\n\", \"\") for i in range(len(Measure))]\n", "\n", "Manage = open(\"./Statements/Manage.txt\", \"r\").readlines()\n", "Manage = [Manage[i].replace(\"\\n\", \"\") for i in range(len(Manage))]\n", "\n", "eval_questions = np.concatenate((Govern, Map, Measure, Manage))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Define the question-asking function" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def ask_questions(docs):\n", " service_context = ServiceContext.from_defaults(chunk_size=chunk_size, chunk_overlap=20)\n", " index = VectorStoreIndex.from_documents(docs, service_context=service_context)\n", " query_engine = index.as_query_engine(similarity_top_k=similarity_top_k)\n", " responses = []\n", " sources = []\n", " for question in eval_questions:\n", " response = query_engine.query(\"Give evidence of where the policy aligns with the following point: \" + question)\n", " source = \"\"\n", " for i in range(similarity_top_k):\n", " source += response.source_nodes[i].node.get_content(metadata_mode=\"all\") + \"\\n\\n-----\\n\"\n", " responses.append(response.response)\n", " sources.append(source)\n", " return responses, sources" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Query the document" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "Data = pd.DataFrame(index=eval_questions)\n", "Data[\"Gold\"], Data[\"Gold Sources\"] = ask_questions(gold_policy)\n", "Data[\"Company\"], Data[\"Company Sources\"] = ask_questions(mock_policy)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Write to .csv file" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "Data.to_csv(\"./Results/Compare.csv\")" ] } ], "metadata": { "kernelspec": { "display_name": "docu_compare", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 2 }