jharrison27's picture
Initial commit
fce83cf
raw
history blame
1.15 kB
import streamlit as st
from transformers import pipeline
from sklearn.cluster import KMeans
import numpy as np
# Mock data
mock_words = [
"apple", "banana", "cherry", "date", # Fruits
"car", "truck", "bus", "bicycle", # Vehicles
"red", "blue", "green", "yellow", # Colors
"cat", "dog", "rabbit", "hamster" # Pets
]
# Embedding model
embedder = pipeline('feature-extraction', model='distilbert-base-uncased')
def embed_words(words):
embeddings = embedder(words)
return np.array([np.mean(embedding[0], axis=0) for embedding in embeddings])
def cluster_words(words):
embeddings = embed_words(words)
kmeans = KMeans(n_clusters=4, random_state=0).fit(embeddings)
clusters = {i: [] for i in range(4)}
for word, label in zip(words, kmeans.labels_):
clusters[label].append(word)
return clusters
def main():
st.title("NYT Connections Solver")
if st.button("Generate Clusters"):
clusters = cluster_words(mock_words)
for i, words in clusters.items():
st.write(f"Group {i+1}: {', '.join(words)}")
if __name__ == "__main__":
main()