pszemraj's picture
Add BERTopic model
dcca47d
{
"topic_representations": {
"-1": [
[
"clustering",
0.4848363399505615
],
[
"convolutional",
0.37369394302368164
],
[
"neural",
0.312651127576828
],
[
"hierarchical",
0.27853137254714966
],
[
"autoregressive",
0.27748018503189087
],
[
"terms",
0.2767411172389984
],
[
"vector",
0.27440059185028076
],
[
"informatics",
0.25727829337120056
],
[
"cog",
0.2516351342201233
],
[
"contrastive",
0.24765774607658386
]
],
"0": [
[
"betty",
0.3284682631492615
],
[
"door",
0.3276304304599762
],
[
"her",
0.32692670822143555
],
[
"gillis",
0.3171406686306
],
[
"room",
0.31319257616996765
],
[
"it",
0.2811369299888611
],
[
"she",
0.2768298387527466
],
[
"comes",
0.2715323865413666
],
[
"norma",
0.26948243379592896
],
[
"he",
0.2526022791862488
]
],
"1": [
[
"frozen",
0.3659113645553589
],
[
"anna",
0.341401070356369
],
[
"snow",
0.3255179524421692
],
[
"hans",
0.3221004903316498
],
[
"elsa",
0.308777391910553
],
[
"her",
0.28782346844673157
],
[
"olaf",
0.28283146023750305
],
[
"ice",
0.26882657408714294
],
[
"door",
0.2574964165687561
],
[
"oh",
0.23128411173820496
]
],
"2": [
[
"closeup",
0.40513578057289124
],
[
"shot",
0.36382049322128296
],
[
"viewpoint",
0.34654057025909424
],
[
"umpire",
0.3015895187854767
],
[
"camera",
0.2784820795059204
],
[
"moment",
0.27803078293800354
],
[
"him",
0.2625623643398285
],
[
"across",
0.2548322081565857
],
[
"toward",
0.2538435161113739
],
[
"screentalk",
0.2526821494102478
]
],
"3": [
[
"dory",
0.5226326584815979
],
[
"gill",
0.46992284059524536
],
[
"coral",
0.4173227548599243
],
[
"marlin",
0.41619008779525757
],
[
"ocean",
0.4143868684768677
],
[
"fish",
0.41095995903015137
],
[
"swim",
0.32874956727027893
],
[
"moonfish",
0.328085333108902
],
[
"sharkbait",
0.31584128737449646
],
[
"crab",
0.29703080654144287
]
],
"4": [
[
"operations",
0.34669172763824463
],
[
"structure",
0.3408670127391815
],
[
"operation",
0.30059388279914856
],
[
"theory",
0.29499298334121704
],
[
"interpretation",
0.278867244720459
],
[
"explanation",
0.2513776123523712
],
[
"merge",
0.2296367585659027
],
[
"accessible",
0.2083439975976944
],
[
"simplest",
0.20353963971138
],
[
"system",
0.1941068023443222
]
],
"5": [
[
"spatial",
0.3951055705547333
],
[
"identity",
0.38651394844055176
],
[
"movement",
0.36755087971687317
],
[
"identities",
0.3519442081451416
],
[
"noir",
0.30545735359191895
],
[
"film",
0.2977658212184906
],
[
"escape",
0.2961333096027374
],
[
"films",
0.29314419627189636
],
[
"materiality",
0.29112547636032104
],
[
"modernity",
0.28847798705101013
]
],
"6": [
[
"vocabulary",
0.4336850643157959
],
[
"words",
0.3973504304885864
],
[
"topic",
0.34300118684768677
],
[
"text",
0.34016329050064087
],
[
"topics",
0.3280230760574341
],
[
"documents",
0.32741570472717285
],
[
"document",
0.3125719726085663
],
[
"important",
0.3091876804828644
],
[
"use",
0.3074134588241577
],
[
"data",
0.2872866094112396
]
],
"7": [
[
"encoder",
0.43614310026168823
],
[
"captions",
0.3940947949886322
],
[
"embeddings",
0.3763776421546936
],
[
"decoder",
0.34448710083961487
],
[
"caption",
0.32104817032814026
],
[
"image",
0.3169279098510742
],
[
"images",
0.31604981422424316
],
[
"embedding",
0.302567720413208
],
[
"photorealism",
0.2843397259712219
],
[
"hyperparameters",
0.27215155959129333
]
],
"8": [
[
"saw",
0.34083816409111023
],
[
"hounds",
0.31906190514564514
],
[
"smiled",
0.3068355917930603
],
[
"had",
0.301816463470459
],
[
"hunt",
0.2616852819919586
],
[
"thought",
0.25539731979370117
],
[
"came",
0.24651679396629333
],
[
"night",
0.24536016583442688
],
[
"rainsford",
0.23914431035518646
],
[
"stopped",
0.23613306879997253
]
],
"9": [
[
"learning",
0.4070361256599426
],
[
"assignment",
0.39031094312667847
],
[
"data",
0.3826494514942169
],
[
"research",
0.3340250253677368
],
[
"project",
0.3279297351837158
],
[
"projects",
0.3115888237953186
],
[
"doing",
0.2865546643733978
],
[
"questions",
0.285556823015213
],
[
"students",
0.27915194630622864
],
[
"response",
0.26637130975723267
]
],
"10": [
[
"cogvideo",
0.49034741520881653
],
[
"videos",
0.3677862882614136
],
[
"videogpt",
0.360797643661499
],
[
"video",
0.29351869225502014
],
[
"clips",
0.2921605408191681
],
[
"cog",
0.28942468762397766
],
[
"temporal",
0.26239338517189026
],
[
"generate",
0.25879186391830444
],
[
"autoregressive",
0.2531501352787018
],
[
"frames",
0.24417388439178467
]
],
"11": [
[
"lstm",
0.541039228439331
],
[
"recurrent",
0.43020468950271606
],
[
"encoder",
0.4302035868167877
],
[
"seq2seq",
0.40897467732429504
],
[
"neural",
0.40122631192207336
],
[
"decoder",
0.3706669211387634
],
[
"interpretable",
0.32842832803726196
],
[
"unsupervised",
0.3145299255847931
],
[
"predict",
0.30226990580558777
],
[
"medical_",
0.27314335107803345
]
],
"12": [
[
"improve",
0.32711517810821533
],
[
"next",
0.30205947160720825
],
[
"do",
0.2952941656112671
],
[
"going",
0.2921675741672516
],
[
"good",
0.2693977355957031
],
[
"go",
0.2688019871711731
],
[
"think",
0.2602623999118805
],
[
"like",
0.2579489052295685
],
[
"things",
0.2563323378562927
],
[
"are",
0.2535612881183624
]
],
"13": [
[
"vocoding",
0.4635753333568573
],
[
"spectrogram",
0.40794235467910767
],
[
"enhancement",
0.396728515625
],
[
"melspectrogram",
0.39321568608283997
],
[
"audio",
0.3639255166053772
],
[
"denoising",
0.3390977680683136
],
[
"reverb",
0.3325429856777191
],
[
"diffwave",
0.3322610557079315
],
[
"adversarial",
0.2974066734313965
],
[
"enhanced",
0.28673672676086426
]
],
"14": [
[
"probabilities",
0.30326712131500244
],
[
"tagging",
0.2987886667251587
],
[
"probability",
0.2826310396194458
],
[
"words",
0.26746612787246704
],
[
"gram",
0.2548341453075409
],
[
"sentence",
0.24815544486045837
],
[
"processing",
0.2447664588689804
],
[
"analogical",
0.2349052131175995
],
[
"reasoning",
0.23397308588027954
],
[
"lemmatization",
0.23278945684432983
]
],
"15": [
[
"convolutional",
0.5941274166107178
],
[
"segmentation",
0.4438096284866333
],
[
"superpixel",
0.3719842731952667
],
[
"convolutions",
0.36787962913513184
],
[
"superpixels",
0.36115726828575134
],
[
"pixels",
0.35811054706573486
],
[
"layers",
0.3561064302921295
],
[
"filters",
0.3434562683105469
],
[
"neural",
0.3377504050731659
],
[
"overfitting",
0.3270261585712433
]
]
},
"topics": [
15,
15,
15,
15,
15,
15,
15,
15,
-1,
15,
15,
-1,
15,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
0,
2,
2,
2,
2,
2,
2,
0,
2,
1,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
0,
2,
2,
2,
2,
2,
2,
2,
2,
2,
1,
2,
2,
2,
2,
2,
2,
2,
2,
2,
0,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
1,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
0,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
0,
2,
2,
2,
11,
11,
11,
11,
11,
11,
11,
11,
11,
11,
11,
11,
11,
-1,
11,
-1,
-1,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
2,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
9,
9,
6,
6,
9,
9,
9,
9,
9,
9,
9,
9,
6,
9,
9,
6,
9,
6,
6,
6,
6,
6,
-1,
12,
12,
12,
12,
12,
12,
12,
12,
12,
12,
12,
12,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
10,
10,
10,
-1,
10,
10,
10,
10,
10,
10,
10,
-1,
10,
10,
10,
10,
-1,
10,
10,
10,
10,
-1,
9,
9,
9,
6,
6,
6,
6,
6,
6,
4,
6,
6,
6,
6,
6,
6,
6,
6,
6,
6,
6,
6,
6,
9,
9,
9,
2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
7,
7,
7,
7,
7,
7,
7,
7,
7,
7,
7,
7,
7,
-1,
7,
7,
7,
7,
7,
7,
7,
7,
7,
7,
7,
7,
7,
12,
13,
13,
13,
13,
13,
13,
13,
14,
13,
13,
13,
13,
13,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
9,
6,
6,
6,
6,
9,
6,
6,
6,
6,
6,
6,
-1,
6,
6,
0,
14,
14,
-1,
-1,
-1,
14,
14,
14,
14,
11,
14,
14,
14,
14,
11,
11,
14,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
0,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
2,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
2,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
12,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1
],
"topic_sizes": {
"15": 11,
"-1": 15,
"2": 171,
"0": 241,
"1": 211,
"11": 17,
"8": 22,
"3": 60,
"9": 21,
"6": 40,
"12": 14,
"5": 45,
"10": 18,
"4": 59,
"7": 26,
"13": 12,
"14": 12
},
"topic_mapper": [
[
-1,
-1,
-1
],
[
0,
0,
3
],
[
1,
1,
4
],
[
2,
2,
5
],
[
3,
3,
13
],
[
4,
4,
12
],
[
5,
5,
8
],
[
6,
6,
1
],
[
7,
7,
2
],
[
8,
8,
0
],
[
9,
9,
9
],
[
10,
10,
6
],
[
11,
11,
15
],
[
12,
12,
10
],
[
13,
13,
7
],
[
14,
14,
11
],
[
15,
15,
14
]
],
"topic_labels": {
"-1": "-1_clustering_convolutional_neural_hierarchical",
"0": "0_betty_door_her_gillis",
"1": "1_frozen_anna_snow_hans",
"2": "2_closeup_shot_viewpoint_umpire",
"3": "3_dory_gill_coral_marlin",
"4": "4_operations_structure_operation_theory",
"5": "5_spatial_identity_movement_identities",
"6": "6_vocabulary_words_topic_text",
"7": "7_encoder_captions_embeddings_decoder",
"8": "8_saw_hounds_smiled_had",
"9": "9_learning_assignment_data_research",
"10": "10_cogvideo_videos_videogpt_video",
"11": "11_lstm_recurrent_encoder_seq2seq",
"12": "12_improve_next_do_going",
"13": "13_vocoding_spectrogram_enhancement_melspectrogram",
"14": "14_probabilities_tagging_probability_words",
"15": "15_convolutional_segmentation_superpixel_convolutions"
},
"custom_labels": null,
"_outliers": 1,
"topic_aspects": {}
}