Spaces:

jharrison27
/

connections-solver

Runtime error

App Files Files Community

jharrison27 commited on May 20

Commit

66cf393

•

1 Parent(s): e2775c7

fix looping

Browse files

Files changed (1) hide show

app.py +18 -33

app.py CHANGED Viewed

@@ -1,15 +1,10 @@
 import streamlit as st
-import logging
 from transformers import pipeline
 from sklearn.metrics.pairwise import cosine_similarity
 from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
 from sklearn.cluster import KMeans
 import numpy as np
-# Setting up logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
 # Mock data
 mock_words = [
     "apple", "banana", "cherry", "date",  # Fruits
@@ -27,7 +22,6 @@ models = {
 @st.cache_resource
 def load_models():
-    logger.info("Loading models...")
     pipelines = {}
     for name, model_name in models.items():
         pipelines[name] = pipeline('feature-extraction', model=model_name)
@@ -39,37 +33,28 @@ def embed_words(words, model_name):
     """
     Embed the given words using the specified model and return the averaged embeddings.
     """
-    logger.info(f"Embedding words using model {model_name}...")
     embedder = pipelines[model_name]
     embeddings = embedder(words)
     return np.array([np.mean(embedding[0], axis=0) for embedding in embeddings])
-def iterative_clustering(words, model_name, method):
-    logger.info(f"Starting iterative clustering using {method}...")
-    remaining_words = words[:]
-    grouped_words = []
-    while len(remaining_words) >= 4:
-        embeddings = embed_words(remaining_words, model_name)
-        if method == 'Cosine Similarity':
-            logger.info("Clustering using Cosine Similarity...")
-            sim_matrix = cosine_similarity(embeddings)
-            Z = linkage(sim_matrix, 'average', metric='cosine')
-            labels = fcluster(Z, t=4, criterion='maxclust')
-        elif method == 'K-means':
-            logger.info("Clustering using K-means...")
-            kmeans = KMeans(n_clusters=4, random_state=0).fit(embeddings)
-            labels = kmeans.labels_ + 1
-        for i in range(1, 5):
-            cluster = [word for idx, word in enumerate(remaining_words) if labels[idx] == i]
-            if len(cluster) == 4:
-                grouped_words.append(cluster)
-                remaining_words = [word for word in remaining_words if word not in cluster]
-                break
-    return grouped_words
 def display_clusters(clusters):
-    logger.info("Displaying clusters...")
-    for i, words in enumerate(clusters):
         st.markdown(f"### Group {i+1}")
         st.write(", ".join(words))
@@ -83,8 +68,8 @@ def main():
     if st.button("Generate Clusters"):
         with st.spinner("Generating clusters..."):
-            clusters = iterative_clustering(mock_words, model_name, clustering_method)
-            display_clusters(clusters)
 if __name__ == "__main__":
     main()

 import streamlit as st
 from transformers import pipeline
 from sklearn.metrics.pairwise import cosine_similarity
 from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
 from sklearn.cluster import KMeans
 import numpy as np
 # Mock data
 mock_words = [
     "apple", "banana", "cherry", "date",  # Fruits
 @st.cache_resource
 def load_models():
     pipelines = {}
     for name, model_name in models.items():
         pipelines[name] = pipeline('feature-extraction', model=model_name)
     """
     Embed the given words using the specified model and return the averaged embeddings.
     """
     embedder = pipelines[model_name]
     embeddings = embedder(words)
     return np.array([np.mean(embedding[0], axis=0) for embedding in embeddings])
+def cluster_words(words, model_name, method):
+    embeddings = embed_words(words, model_name)
+    if method == 'Cosine Similarity':
+        # Use cosine similarity and hierarchical clustering
+        sim_matrix = cosine_similarity(embeddings)
+        Z = linkage(sim_matrix, 'average', metric='cosine')
+        labels = fcluster(Z, t=4, criterion='maxclust')
+    elif method == 'K-means':
+        # Use K-means clustering
+        kmeans = KMeans(n_clusters=4, random_state=0).fit(embeddings)
+        labels = kmeans.labels_ + 1
+    clusters = {i: [] for i in range(1, 5)}
+    for word, label in zip(words, labels):
+        clusters[label].append(word)
+    return clusters
 def display_clusters(clusters):
+    for i, words in clusters.items():
         st.markdown(f"### Group {i+1}")
         st.write(", ".join(words))
     if st.button("Generate Clusters"):
         with st.spinner("Generating clusters..."):
+            clusters = cluster_words(mock_words, model_name, clustering_method)
+        display_clusters(clusters)
 if __name__ == "__main__":
     main()