Spaces:
Sleeping
Sleeping
Tanguyvans
commited on
Commit
•
7833461
1
Parent(s):
f26b169
augment from similar
Browse files
utils.py
CHANGED
@@ -123,6 +123,28 @@ def get_similarities_among_diseases_uris(
|
|
123 |
return data
|
124 |
|
125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
def get_embedding(string: str, encoder) -> List[float]:
|
127 |
# Embed the string using sentence-transformers
|
128 |
vector = encoder.encode(string, show_progress_bar=False)
|
|
|
123 |
return data
|
124 |
|
125 |
|
126 |
+
def augment_the_set_of_diseaces(engine, diseases: List[str]) -> str:
|
127 |
+
|
128 |
+
for i in range(15-len(diseases)):
|
129 |
+
with engine.connect() as conn:
|
130 |
+
with conn.begin():
|
131 |
+
sql = f"""
|
132 |
+
SELECT TOP 1 e2.uri AS new_disease, (SUM(VECTOR_COSINE(e1.embedding, e2.embedding))/ {len(diseases)}) AS score
|
133 |
+
FROM Test.EntityEmbeddings e1, Test.EntityEmbeddings e2
|
134 |
+
WHERE e1.uri IN ({','.join([f"'http://identifiers.org/medgen/{disease}'" for disease in diseases])})
|
135 |
+
AND e2.uri NOT IN ({','.join([f"'http://identifiers.org/medgen/{disease}'" for disease in diseases])})
|
136 |
+
AND e2.label != 'nan'
|
137 |
+
GROUP BY e2.label
|
138 |
+
ORDER BY score DESC
|
139 |
+
"""
|
140 |
+
|
141 |
+
result = conn.execute(text(sql))
|
142 |
+
data = result.fetchall()
|
143 |
+
|
144 |
+
diseases.append(data[0][0].split('/')[-1])
|
145 |
+
|
146 |
+
return diseases
|
147 |
+
|
148 |
def get_embedding(string: str, encoder) -> List[float]:
|
149 |
# Embed the string using sentence-transformers
|
150 |
vector = encoder.encode(string, show_progress_bar=False)
|