{ "cells": [ { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "import evaluate" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[nltk_data] Downloading package wordnet to /root/nltk_data...\n", "[nltk_data] Package wordnet is already up-to-date!\n", "[nltk_data] Downloading package punkt to /root/nltk_data...\n", "[nltk_data] Package punkt is already up-to-date!\n", "[nltk_data] Downloading package omw-1.4 to /root/nltk_data...\n", "[nltk_data] Package omw-1.4 is already up-to-date!\n" ] } ], "source": [ "metric = evaluate.load(\"meteor\")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "df = pd.read_csv(\"csv/predictions.csv\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "predictions = []\n", "references = []\n", "for idx in range(len(df)):\n", " predictions.append(df.loc[idx]['prediction'])\n", " reference = [df.loc[idx]['caption1'],df.loc[idx]['caption2'],df.loc[idx]['caption3'],df.loc[idx]['caption4'],df.loc[idx]['caption5'] ]\n", " references.append(reference)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "from aac_metrics import evaluate\n", "corpus_score = evaluate(predictions, references)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'bleu_1': tensor(0.3913, dtype=torch.float64),\n", " 'bleu_2': tensor(0.1931, dtype=torch.float64),\n", " 'bleu_3': tensor(0.1065, dtype=torch.float64),\n", " 'bleu_4': tensor(0.0569, dtype=torch.float64),\n", " 'meteor': tensor(0.1197, dtype=torch.float64),\n", " 'rouge_l': tensor(0.2745, dtype=torch.float64),\n", " 'cider_d': tensor(0.1235, dtype=torch.float64),\n", " 'spice': tensor(0.0670, dtype=torch.float64),\n", " 'spider': tensor(0.0953, dtype=torch.float64)}" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "corpus_score[0]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'bleu_1': 0.3912776883574468, 'bleu_2': 0.19312066269135236, 'bleu_3': 0.10651188216812753, 'bleu_4': 0.05690269475018141, 'meteor': 0.11968742992878356, 'rouge_l': 0.2744644068893943, 'cider_d': 0.12347016800968286, 'spice': 0.06704068138550699, 'spider': 0.09525542469759493}\n" ] } ], "source": [ "print({k: v.item() for k, v in corpus_score[0].items()})" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "results = metric.compute(predictions=predictions, references=references)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'meteor': 0.26686702985116983}\n" ] } ], "source": [ "print(results)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "bleu = evaluate.load(\"bleu\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "from transformers import AutoTokenizer\n", "tokenizer = AutoTokenizer.from_pretrained(\"facebook/bart-large\")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "bleu_result = bleu.compute(predictions=predictions, references=references, max_order=4)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'bleu': 0.06128958043343902, 'precisions': [0.42544588056899413, 0.09036238675413934, 0.031210136916404455, 0.01176031360836289], 'brevity_penalty': 1.0, 'length_ratio': 1.3508583690987124, 'translation_length': 13849, 'reference_length': 10252}\n" ] } ], "source": [ "print(bleu_result)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "'function' object has no attribute 'load'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m/workspace/audiobart/metric_test.ipynb Cell 13\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m rouge_metric \u001b[39m=\u001b[39m evaluate\u001b[39m.\u001b[39;49mload(\u001b[39m\"\u001b[39m\u001b[39mrouge\u001b[39m\u001b[39m\"\u001b[39m)\n", "\u001b[0;31mAttributeError\u001b[0m: 'function' object has no attribute 'load'" ] } ], "source": [ "rouge_metric = evaluate.load(\"rouge\")" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "rouge_result = rouge_metric.compute(predictions=predictions, references=references)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'rouge1': 0.30500784605917763,\n", " 'rouge2': 0.08778194034686765,\n", " 'rougeL': 0.2707178803695874,\n", " 'rougeLsum': 0.27045227295118685}" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rouge_result" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }