alvinhenrick commited on
Commit
e91a13e
1 Parent(s): db478bb

switch to biobert

Browse files
medirag/index/local.py CHANGED
@@ -6,7 +6,7 @@ from llama_index.vector_stores.faiss import FaissVectorStore
6
 
7
  class DailyMedIndexer:
8
  def __init__(self,
9
- model_name="sentence-transformers/all-mpnet-base-v2",
10
  dimension=768,
11
  persist_dir="./storage"):
12
 
 
6
 
7
  class DailyMedIndexer:
8
  def __init__(self,
9
+ model_name="dmis-lab/biobert-base-cased-v1.2",
10
  dimension=768,
11
  persist_dir="./storage"):
12
 
tests/core/test_document_processor.py CHANGED
@@ -25,7 +25,7 @@ def test_document_processor(data_dir):
25
  # Index and query documents
26
  indexer = DailyMedIndexer()
27
  indexer.load_index(documents=documents)
28
- # indexer.save_index()
29
 
30
  query = "What are the key things about the drug's usage?"
31
  results = indexer.retrieve(query)
 
25
  # Index and query documents
26
  indexer = DailyMedIndexer()
27
  indexer.load_index(documents=documents)
28
+ # indexer.save_index(persist_dir="../data/daily_bio_bert_indexed")
29
 
30
  query = "What are the key things about the drug's usage?"
31
  results = indexer.retrieve(query)
tests/data/daily_bio_bert_indexed/default__vector_store.json ADDED
Binary file (399 kB). View file
 
tests/data/daily_bio_bert_indexed/docstore.json ADDED
The diff for this file is too large to render. See raw diff
 
tests/data/daily_bio_bert_indexed/graph_store.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"graph_dict": {}}
tests/data/daily_bio_bert_indexed/image__vector_store.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"embedding_dict": {}, "text_id_to_ref_doc_id": {}, "metadata_dict": {}}
tests/data/daily_bio_bert_indexed/index_store.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"index_store/data": {"d62d52d7-6cfa-482f-9213-17f807073522": {"__type__": "vector_store", "__data__": "{\"index_id\": \"d62d52d7-6cfa-482f-9213-17f807073522\", \"summary\": null, \"nodes_dict\": {\"0\": \"2f8628c2-b725-43df-9650-518b49b1ea97\", \"1\": \"b30e06b9-7415-4a29-a0f5-8d2ad913f27f\", \"2\": \"8fc0f6b1-9bad-4b41-9848-6f7e3782548a\", \"3\": \"f09cc63c-a6f1-41d9-a2ca-da6264526c99\", \"4\": \"b4d687b5-88b5-40d5-b7cd-75efac3f0578\", \"5\": \"e53bf321-c93c-4cdc-acb2-ca75473cea71\", \"6\": \"75134b5e-4def-4540-a00f-a14e29643c65\", \"7\": \"e90118dd-1d06-4a5c-ac4f-f5a1a8afd5b3\", \"8\": \"7699cfbc-aad3-484d-a5f7-79892575a273\", \"9\": \"bf2e2eb8-31fd-4c5b-aee5-02576e882cfb\", \"10\": \"c78a4884-f4e9-4c73-bc57-0df7db00600d\", \"11\": \"de81b18c-e880-4006-9864-f09a3a5eef92\", \"12\": \"6f30e20a-c021-45f2-b6e6-2d4183fcced0\", \"13\": \"4a0a810d-a36a-4b40-8f64-e4cb7066b8a4\", \"14\": \"846d389d-6329-4922-956c-6e24bf4de6c4\", \"15\": \"f1fabfa6-1a97-4b57-b902-070c6294345d\", \"16\": \"1b603c07-aa02-4292-933c-40498b68f94b\", \"17\": \"9d461565-600c-4683-86f8-94d3368171e4\", \"18\": \"076663f4-35a8-4d68-81f3-6219feaec27f\", \"19\": \"047f1a1b-256b-47f4-a2f7-a2239e491073\", \"20\": \"3278807e-23a2-42cd-abf1-4458541a61ca\", \"21\": \"6f7a3c34-0781-4dac-b863-07adf8e10de8\", \"22\": \"01ba95e8-ceca-46fa-9af3-cf27a64a20bd\", \"23\": \"32a905b6-7e62-4bcf-a044-324085089a79\", \"24\": \"619058dc-fee9-4402-886f-235f669e7bd0\", \"25\": \"31ac8010-5b22-467e-9f54-cb4bed31c072\", \"26\": \"9e56872a-1cdb-41a3-bd21-a7d0f199e57f\", \"27\": \"be6f0f43-35da-4907-b6e7-f795847570b7\", \"28\": \"04bd8c3c-ef0e-4e68-91d8-4dc026574fa6\", \"29\": \"34a759f6-95e5-4b0e-b557-f909907cb203\", \"30\": \"ef16122f-4ea3-48c0-8aa1-4a298a09ca2c\", \"31\": \"395ed926-a51d-4145-acaa-caebafdcc5cb\", \"32\": \"1884baf9-8749-4255-b5d4-7102dce3120f\", \"33\": \"ad4ec267-b440-407f-bd9d-4aff85fb30b4\", \"34\": \"5aee1612-de39-4d07-bfb8-2a3c2f4cc3f1\", \"35\": \"986c393c-46d3-47be-ab68-b5486a9ad84b\", \"36\": \"bab72927-561f-40b2-8921-1685923080bb\", \"37\": \"b0f3e46c-149d-44e7-8994-19d8c4d7c5fb\", \"38\": \"4f3a8da3-cb6c-49a0-9b8e-ef612863d203\", \"39\": \"8fc1bcc8-391f-4373-b4d3-473b8e8205cd\", \"40\": \"f8c907db-f311-4d92-a31c-63aedbcc0bce\", \"41\": \"605fd6ae-5973-414d-bd28-ff054cbcb348\", \"42\": \"01ea925f-3b04-447f-93e0-c0ee56b5ea41\", \"43\": \"231a8a8e-9807-49b7-9aad-1b0cf6e0a60b\", \"44\": \"21de36d7-a538-447c-99d0-aaaf9d1276e2\", \"45\": \"03488aa8-1846-4a3e-8623-82a34ffc1759\", \"46\": \"c6cd32fc-95d2-49e1-8087-a556f3912d02\", \"47\": \"c37f68c0-c9d1-4618-af60-7db0402818e0\", \"48\": \"002f6f72-cb81-4d43-b1a5-7334b4e0a3b2\", \"49\": \"626200ce-79a7-471e-8080-d08cb6004b8a\", \"50\": \"a4a4ef05-3371-486c-8868-5b661a5d41b0\", \"51\": \"487111b6-0e09-4114-8117-20238fa0e2c8\", \"52\": \"c8f5a33b-3073-43ef-8c5b-cf43c1c40095\", \"53\": \"e8a9016e-87ff-4f49-ab01-295823d39723\", \"54\": \"64d621a4-2743-4629-92aa-1b977aa3ca41\", \"55\": \"b7a2f47f-312d-4ebe-b93b-ed44e2464131\", \"56\": \"e5433772-2a18-4412-9621-212bc6321cf5\", \"57\": \"3a802601-f3fd-48f1-9303-0c457d54dfdf\", \"58\": \"0af02d91-b0e9-4eaf-8eaa-fe973d7288ed\", \"59\": \"1a3ba147-9bed-43bd-8e48-fe50dd9de3c3\", \"60\": \"e4898cc8-3755-4d30-8a4c-fc732f688b41\", \"61\": \"b9486676-3728-43de-8e25-4f5f471482b8\", \"62\": \"779c7e41-d155-40ff-b24c-bda8f026cd8b\", \"63\": \"e1856475-7385-40a2-a4db-4c0d44405430\", \"64\": \"1ef71a06-1624-48a6-b85a-106002abe8e3\", \"65\": \"d792d6c4-4b49-4509-88bb-96e507cdf0f6\", \"66\": \"f8376c97-b87f-456b-a781-ae64a7213a0a\", \"67\": \"4e7aff44-0ef7-4a50-9cf7-248be010cf18\", \"68\": \"ccda30df-1864-4dc1-a0a3-dac8e9793d6a\", \"69\": \"c1d727aa-b05c-4507-ab07-daed0951a7a3\", \"70\": \"b0bfae1b-4247-4dd0-9adc-19d9d7a0008d\", \"71\": \"198634d5-a3a2-4048-8324-ef9ddadac802\", \"72\": \"241382c7-c1ee-41e7-9eb8-52f2d4381db4\", \"73\": \"8f33218b-b0a6-4e72-9dcc-9c8e58ac10c2\", \"74\": \"f6cd786a-6875-49e1-8715-5d315941526c\", \"75\": \"3d06ca8e-5a3e-4b61-ada3-e07e4963f361\", \"76\": \"80e17797-2d1c-4e41-a046-8a9f7c443ee3\", \"77\": \"85a01493-6a69-464e-aa2d-b2d944b536fb\", \"78\": \"502d4111-d62e-46bf-aeb8-6e9c3bffdf4d\", \"79\": \"7bad5348-2e8b-4877-a73f-0b97fa237267\", \"80\": \"6e6f9fc6-737e-43dd-ac29-822f466df2e2\", \"81\": \"d28454b6-7bb0-41cf-bca5-7e89f29b1146\", \"82\": \"5d9d4431-737f-4e08-a041-021dd05fdf23\", \"83\": \"e387651c-882f-405f-8b67-5ac483feeddc\", \"84\": \"43dd6e9e-4e59-49a9-bf33-492e50be2c35\", \"85\": \"db31cfe5-a36d-41ec-9237-9b992d196e5c\", \"86\": \"bc0dce03-3747-4a12-a2f2-c8e7e100b047\", \"87\": \"781eb426-0626-4efe-8f96-b751ca661998\", \"88\": \"43ad57c1-b7b1-4fc8-9c65-d38aedd85648\", \"89\": \"e9cb3d4d-a4c4-4799-b3dc-b07d6302163a\", \"90\": \"ba8d05c9-ca8e-4d91-a564-c8c41114c9e0\", \"91\": \"b25dfb58-7df4-45cd-b257-94aa18799067\", \"92\": \"901d9988-5ce6-4415-a151-4f188d6f19d7\", \"93\": \"6be7709b-d1f3-4fc0-b917-bf58cb1580a2\", \"94\": \"1aa03969-fbaf-4288-bee1-104b5221fd82\", \"95\": \"73749dde-6f8c-482e-909e-83e7d8ff0840\", \"96\": \"cd2cc95b-a570-404c-bf31-0aab023bf806\", \"97\": \"3409670f-54f0-4141-9e8b-07e36c1cd955\", \"98\": \"b31116e6-b34a-41f7-aadb-d41a32c62b66\", \"99\": \"75b8fbb8-8d1d-41df-a8b1-1e5b5d698a97\", \"100\": \"92ee522f-d370-4b5e-ba58-07bca056a901\", \"101\": \"7e036939-76b9-4bb2-87dd-d9b689d5d26f\", \"102\": \"8ae34c33-2c29-4529-979c-bd1007fa8504\", \"103\": \"7a44b537-01ad-4847-93ae-fe9db9da2471\", \"104\": \"1479922d-b952-408e-a98f-3e2cc996a052\", \"105\": \"0a5a64ed-e92a-4180-9c18-eb0efdff6fa6\", \"106\": \"4cb43227-f400-4378-845e-386108f9de31\", \"107\": \"a0bbb4c1-badb-4c55-808a-17b61bafcff7\", \"108\": \"d720d30c-4f52-4fd3-ade3-c64a1e76cd06\", \"109\": \"eb2e4ea3-2f79-4272-aced-48556afec86e\", \"110\": \"fa3b8384-6347-4978-87a8-1bdd23417375\", \"111\": \"2a4fed89-eb91-4bdd-8b1e-1a617b5a4e3f\", \"112\": \"4871df5a-112e-4f49-b0e3-2626276299a0\", \"113\": \"6a24f2e2-d005-48cc-9e0b-3d23a0a072d8\", \"114\": \"b89c2bdc-6831-4b14-8801-17afbe8496ab\", \"115\": \"48b63c32-89e8-4fd1-8400-51aa1874e3e7\", \"116\": \"fb0f0fa4-ead4-4c16-a157-0c0a452ac1a8\", \"117\": \"3a287995-f7cb-4f9c-80fb-5f0cd9942dff\", \"118\": \"55ffe13d-9c44-48a3-a505-fa4872875270\", \"119\": \"d80cf990-431b-49d4-a0d1-63f668654b0e\", \"120\": \"59db1663-98fe-47f1-9aed-654868eddc98\", \"121\": \"fc4ac23c-b0c1-4385-b4fd-915879026f83\", \"122\": \"d43170c2-eab1-44cd-9d4d-378fbdb60c0c\", \"123\": \"ec898f3c-260b-4d1b-99fc-004b80154ff7\", \"124\": \"2226360c-46d9-46d3-b978-3b7d59584fb0\", \"125\": \"b4741aed-e2f8-4a63-8a88-b0865e8cbfdc\", \"126\": \"6bd02a9c-32b3-4645-9f69-de0cccdc3fb4\", \"127\": \"3ba01e3e-4420-408e-866a-8bc132d42074\", \"128\": \"482dfaff-f40f-45f8-ba2a-277dc8528579\", \"129\": \"ed80b68b-33d3-4532-ad8c-2938113e326d\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}
tests/rag/test_rag.py CHANGED
@@ -10,7 +10,7 @@ load_dotenv() # take environment variables from .env.
10
 
11
  def test_rag_with_example(data_dir):
12
  # Example usage:
13
- index_path = data_dir.joinpath("daily_med_indexed")
14
  # Ensure the path is correct and the directory exists
15
  assert index_path.exists(), f"Directory not found: {index_path}"
16
 
@@ -19,7 +19,7 @@ def test_rag_with_example(data_dir):
19
  indexer.load_index()
20
  rm = DailyMedRetrieve(daily_med_indexer=indexer)
21
 
22
- query = "What are the key things about the drug's usage?"
23
  turbo = dspy.OpenAI(model='gpt-3.5-turbo')
24
 
25
  dspy.settings.configure(lm=turbo, rm=rm)
 
10
 
11
  def test_rag_with_example(data_dir):
12
  # Example usage:
13
+ index_path = data_dir.joinpath("daily_bio_bert_indexed")
14
  # Ensure the path is correct and the directory exists
15
  assert index_path.exists(), f"Directory not found: {index_path}"
16
 
 
19
  indexer.load_index()
20
  rm = DailyMedRetrieve(daily_med_indexer=indexer)
21
 
22
+ query = "What information do you have about Clopidogrel? "
23
  turbo = dspy.OpenAI(model='gpt-3.5-turbo')
24
 
25
  dspy.settings.configure(lm=turbo, rm=rm)