File size: 924 Bytes
28e14c5
 
 
d87b50e
28e14c5
 
 
 
 
d87b50e
 
28e14c5
 
 
d87b50e
 
 
 
 
 
 
 
 
 
 
28e14c5
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import openai 
from utils import *

class TopicModelling:
    
    EMBEDDING_MAX_TOKENS = 1023
    
    def __init__(self, text:str) -> None:
        self.keywords = []
        self.corpus = text
        # self.text = create_nest_sentences(self.corpus, self.EMBEDDING_MAX_TOKENS)
        self.model = load_keyword_model()
        
    def generate_topics(self) -> list:

        keywords = self.model.extract_keywords(self.corpus, keyphrase_ngram_range=(1, 1), stop_words=None)
        topics = self.model.extract_keywords(self.corpus, keyphrase_ngram_range=(1, 2), stop_words=None)
        keywords = [kw[0] for kw in keywords]  + [kw[0] for kw in topics]
        concepts = self.model.extract_keywords(self.corpus, keyphrase_ngram_range=(3, 3), stop_words='english', top_n=5)
        concepts = [kw[0] for kw in concepts]
        
        return keywords, concepts