{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# NIST Questions\n",
    "This Notebook utilises the points under Govern, Map, Measure, and Manage in the NIST AI RMF to query a policy and obtain responses of how well the policy aligns."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Import Packages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import openai\n",
    "from llama_index import SimpleDirectoryReader, ServiceContext, VectorStoreIndex\n",
    "import nest_asyncio\n",
    "import pandas as pd\n",
    "nest_asyncio.apply()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## OpenAI API Key"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "openai.api_key = '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'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Import the GenAI Policy\n",
    "The policies used here was created by AI after being asked to create a policy aligned with the NIST document which was uploaded"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "gold_policy = SimpleDirectoryReader(input_files=['data/Badguys AI Ethics and Responsible AI Policy.pdf']).load_data()\n",
    "mock_policy = SimpleDirectoryReader(input_files=['data/Mock Policy.pdf']).load_data()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Set chunk information"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "chunk_size = 128\n",
    "chunk_overlap = 20\n",
    "similarity_top_k = 6"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Read text files of NIST AI RMF statements"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "Govern = open(\"./Statements/Govern.txt\", \"r\").readlines()\n",
    "Govern = [Govern[i].replace(\"\\n\", \"\") for i in range(len(Govern))]\n",
    "\n",
    "Map = open(\"./Statements/Map.txt\", \"r\").readlines()\n",
    "Map = [Map[i].replace(\"\\n\", \"\") for i in range(len(Map))]\n",
    "\n",
    "Measure = open(\"./Statements/Measure.txt\", \"r\").readlines()\n",
    "Measure = [Measure[i].replace(\"\\n\", \"\") for i in range(len(Measure))]\n",
    "\n",
    "Manage = open(\"./Statements/Manage.txt\", \"r\").readlines()\n",
    "Manage = [Manage[i].replace(\"\\n\", \"\") for i in range(len(Manage))]\n",
    "\n",
    "eval_questions = np.concatenate((Govern, Map, Measure, Manage))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Define the question-asking function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def ask_questions(docs):\n",
    "    service_context = ServiceContext.from_defaults(chunk_size=chunk_size, chunk_overlap=20)\n",
    "    index = VectorStoreIndex.from_documents(docs, service_context=service_context)\n",
    "    query_engine = index.as_query_engine(similarity_top_k=similarity_top_k)\n",
    "    responses = []\n",
    "    sources = []\n",
    "    for question in eval_questions:\n",
    "        response = query_engine.query(\"Give evidence of where the policy aligns with the following point: \" + question)\n",
    "        source = \"\"\n",
    "        for i in range(similarity_top_k):\n",
    "            source += response.source_nodes[i].node.get_content(metadata_mode=\"all\") + \"\\n\\n-----\\n\"\n",
    "        responses.append(response.response)\n",
    "        sources.append(source)\n",
    "    return responses, sources"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Query the document"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Data = pd.DataFrame(index=eval_questions)\n",
    "Data[\"Gold\"], Data[\"Gold Sources\"] = ask_questions(gold_policy)\n",
    "Data[\"Company\"], Data[\"Company Sources\"] = ask_questions(mock_policy)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Write to .csv file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Data.to_csv(\"./Results/Compare.csv\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "docu_compare",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}