text-matching / topics.py
Keane Moraes
clustering works
d87b50e
raw
history blame contribute delete
924 Bytes
import openai
from utils import *
class TopicModelling:
EMBEDDING_MAX_TOKENS = 1023
def __init__(self, text:str) -> None:
self.keywords = []
self.corpus = text
# self.text = create_nest_sentences(self.corpus, self.EMBEDDING_MAX_TOKENS)
self.model = load_keyword_model()
def generate_topics(self) -> list:
keywords = self.model.extract_keywords(self.corpus, keyphrase_ngram_range=(1, 1), stop_words=None)
topics = self.model.extract_keywords(self.corpus, keyphrase_ngram_range=(1, 2), stop_words=None)
keywords = [kw[0] for kw in keywords] + [kw[0] for kw in topics]
concepts = self.model.extract_keywords(self.corpus, keyphrase_ngram_range=(3, 3), stop_words='english', top_n=5)
concepts = [kw[0] for kw in concepts]
return keywords, concepts