xicocdi commited on
Commit
2182f80
1 Parent(s): a942057

final push

Browse files
Files changed (2) hide show
  1. Embedding_Model_Eval.ipynb +206 -0
  2. app.py +1 -5
Embedding_Model_Eval.ipynb CHANGED
@@ -576,6 +576,212 @@
576
  "multiquery_ft_embedding_metrics_df.to_csv(\"multiquery_ft_embedding_metrics.csv\", index=False)"
577
  ]
578
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
579
  {
580
  "cell_type": "code",
581
  "execution_count": 41,
 
576
  "multiquery_ft_embedding_metrics_df.to_csv(\"multiquery_ft_embedding_metrics.csv\", index=False)"
577
  ]
578
  },
579
+ {
580
+ "cell_type": "code",
581
+ "execution_count": 1,
582
+ "metadata": {},
583
+ "outputs": [],
584
+ "source": [
585
+ "import pandas as pd"
586
+ ]
587
+ },
588
+ {
589
+ "cell_type": "code",
590
+ "execution_count": 2,
591
+ "metadata": {},
592
+ "outputs": [],
593
+ "source": [
594
+ "multiquery_metrics_df = pd.read_csv(\"multiquery_metrics.csv\")"
595
+ ]
596
+ },
597
+ {
598
+ "cell_type": "code",
599
+ "execution_count": 3,
600
+ "metadata": {},
601
+ "outputs": [],
602
+ "source": [
603
+ "multiquery_ft_embedding_metrics_df = pd.read_csv(\"multiquery_ft_embedding_metrics.csv\")"
604
+ ]
605
+ },
606
+ {
607
+ "cell_type": "code",
608
+ "execution_count": 5,
609
+ "metadata": {},
610
+ "outputs": [
611
+ {
612
+ "data": {
613
+ "text/html": [
614
+ "<div>\n",
615
+ "<style scoped>\n",
616
+ " .dataframe tbody tr th:only-of-type {\n",
617
+ " vertical-align: middle;\n",
618
+ " }\n",
619
+ "\n",
620
+ " .dataframe tbody tr th {\n",
621
+ " vertical-align: top;\n",
622
+ " }\n",
623
+ "\n",
624
+ " .dataframe thead th {\n",
625
+ " text-align: right;\n",
626
+ " }\n",
627
+ "</style>\n",
628
+ "<table border=\"1\" class=\"dataframe\">\n",
629
+ " <thead>\n",
630
+ " <tr style=\"text-align: right;\">\n",
631
+ " <th></th>\n",
632
+ " <th>Metric</th>\n",
633
+ " <th>MultiQuery</th>\n",
634
+ " </tr>\n",
635
+ " </thead>\n",
636
+ " <tbody>\n",
637
+ " <tr>\n",
638
+ " <th>0</th>\n",
639
+ " <td>faithfulness</td>\n",
640
+ " <td>0.896804</td>\n",
641
+ " </tr>\n",
642
+ " <tr>\n",
643
+ " <th>1</th>\n",
644
+ " <td>answer_relevancy</td>\n",
645
+ " <td>0.953211</td>\n",
646
+ " </tr>\n",
647
+ " <tr>\n",
648
+ " <th>2</th>\n",
649
+ " <td>context_recall</td>\n",
650
+ " <td>0.890625</td>\n",
651
+ " </tr>\n",
652
+ " <tr>\n",
653
+ " <th>3</th>\n",
654
+ " <td>context_precision</td>\n",
655
+ " <td>0.920732</td>\n",
656
+ " </tr>\n",
657
+ " <tr>\n",
658
+ " <th>4</th>\n",
659
+ " <td>answer_correctness</td>\n",
660
+ " <td>0.690058</td>\n",
661
+ " </tr>\n",
662
+ " </tbody>\n",
663
+ "</table>\n",
664
+ "</div>"
665
+ ],
666
+ "text/plain": [
667
+ " Metric MultiQuery\n",
668
+ "0 faithfulness 0.896804\n",
669
+ "1 answer_relevancy 0.953211\n",
670
+ "2 context_recall 0.890625\n",
671
+ "3 context_precision 0.920732\n",
672
+ "4 answer_correctness 0.690058"
673
+ ]
674
+ },
675
+ "execution_count": 5,
676
+ "metadata": {},
677
+ "output_type": "execute_result"
678
+ }
679
+ ],
680
+ "source": [
681
+ "multiquery_metrics_df"
682
+ ]
683
+ },
684
+ {
685
+ "cell_type": "code",
686
+ "execution_count": 6,
687
+ "metadata": {},
688
+ "outputs": [
689
+ {
690
+ "data": {
691
+ "text/html": [
692
+ "<div>\n",
693
+ "<style scoped>\n",
694
+ " .dataframe tbody tr th:only-of-type {\n",
695
+ " vertical-align: middle;\n",
696
+ " }\n",
697
+ "\n",
698
+ " .dataframe tbody tr th {\n",
699
+ " vertical-align: top;\n",
700
+ " }\n",
701
+ "\n",
702
+ " .dataframe thead th {\n",
703
+ " text-align: right;\n",
704
+ " }\n",
705
+ "</style>\n",
706
+ "<table border=\"1\" class=\"dataframe\">\n",
707
+ " <thead>\n",
708
+ " <tr style=\"text-align: right;\">\n",
709
+ " <th></th>\n",
710
+ " <th>Metric</th>\n",
711
+ " <th>MultiQuery</th>\n",
712
+ " <th>Fine-Tune Embedding</th>\n",
713
+ " <th>Baseline -&gt; Fine-Tune Embedding</th>\n",
714
+ " </tr>\n",
715
+ " </thead>\n",
716
+ " <tbody>\n",
717
+ " <tr>\n",
718
+ " <th>0</th>\n",
719
+ " <td>faithfulness</td>\n",
720
+ " <td>0.896804</td>\n",
721
+ " <td>0.868351</td>\n",
722
+ " <td>-0.028452</td>\n",
723
+ " </tr>\n",
724
+ " <tr>\n",
725
+ " <th>1</th>\n",
726
+ " <td>answer_relevancy</td>\n",
727
+ " <td>0.953211</td>\n",
728
+ " <td>0.955777</td>\n",
729
+ " <td>0.002566</td>\n",
730
+ " </tr>\n",
731
+ " <tr>\n",
732
+ " <th>2</th>\n",
733
+ " <td>context_recall</td>\n",
734
+ " <td>0.890625</td>\n",
735
+ " <td>0.944444</td>\n",
736
+ " <td>0.053819</td>\n",
737
+ " </tr>\n",
738
+ " <tr>\n",
739
+ " <th>3</th>\n",
740
+ " <td>context_precision</td>\n",
741
+ " <td>0.920732</td>\n",
742
+ " <td>0.953668</td>\n",
743
+ " <td>0.032936</td>\n",
744
+ " </tr>\n",
745
+ " <tr>\n",
746
+ " <th>4</th>\n",
747
+ " <td>answer_correctness</td>\n",
748
+ " <td>0.690058</td>\n",
749
+ " <td>0.603407</td>\n",
750
+ " <td>-0.086651</td>\n",
751
+ " </tr>\n",
752
+ " </tbody>\n",
753
+ "</table>\n",
754
+ "</div>"
755
+ ],
756
+ "text/plain": [
757
+ " Metric MultiQuery Fine-Tune Embedding \\\n",
758
+ "0 faithfulness 0.896804 0.868351 \n",
759
+ "1 answer_relevancy 0.953211 0.955777 \n",
760
+ "2 context_recall 0.890625 0.944444 \n",
761
+ "3 context_precision 0.920732 0.953668 \n",
762
+ "4 answer_correctness 0.690058 0.603407 \n",
763
+ "\n",
764
+ " Baseline -> Fine-Tune Embedding \n",
765
+ "0 -0.028452 \n",
766
+ "1 0.002566 \n",
767
+ "2 0.053819 \n",
768
+ "3 0.032936 \n",
769
+ "4 -0.086651 "
770
+ ]
771
+ },
772
+ "execution_count": 6,
773
+ "metadata": {},
774
+ "output_type": "execute_result"
775
+ }
776
+ ],
777
+ "source": [
778
+ "df_baseline_ft_embeddings = pd.merge(multiquery_metrics_df, multiquery_ft_embedding_metrics_df, on='Metric')\n",
779
+ "\n",
780
+ "df_baseline_ft_embeddings['Baseline -> Fine-Tune Embedding'] = df_baseline_ft_embeddings['Fine-Tune Embedding'] - df_baseline_ft_embeddings['MultiQuery']\n",
781
+ "\n",
782
+ "df_baseline_ft_embeddings"
783
+ ]
784
+ },
785
  {
786
  "cell_type": "code",
787
  "execution_count": 41,
app.py CHANGED
@@ -76,13 +76,9 @@ retriever = vectorstore.as_retriever(
76
  llm = ChatOpenAI(
77
  model="gpt-4o-mini",
78
  temperature=0,
79
- streaming=True,
80
  )
81
 
82
- retriever_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
83
- multiquery_retriever = MultiQueryRetriever.from_llm(
84
- retriever=retriever, llm=retriever_llm
85
- )
86
 
87
 
88
  @cl.on_chat_start
 
76
  llm = ChatOpenAI(
77
  model="gpt-4o-mini",
78
  temperature=0,
 
79
  )
80
 
81
+ multiquery_retriever = MultiQueryRetriever.from_llm(retriever=retriever, llm=llm)
 
 
 
82
 
83
 
84
  @cl.on_chat_start