import pysbd from txtai.embeddings import Embeddings import networkx as nx from tqdm import tqdm from txtai.graph import GraphFactory from datasets import load_dataset import streamlit as st import streamlit.components.v1 as components st.set_page_config(page_title="DebateKG") st.title("DebateKG - Automatic Policy Debate Case Creation") st.caption("github: https://github.com/Hellisotherpeople/DebateKG") dataset = load_dataset("Hellisotherpeople/DebateSum", split = "train") seg = pysbd.Segmenter(language="en", clean=False) embeddings = Embeddings({ "path": "entence-transformers/all-mpnet-base-v2", "content": True, "functions": [ {"name": "graph", "function": "graph.attribute"}, ], "expressions": [ {"name": "topic", "expression": "graph(indexid, 'topic')"}, {"name": "topicrank", "expression": "graph(indexid, 'topicrank')"} ], "graph": { "limit": 100, "minscore": 0.10, "topics": { "terms": 4, "resolution" : 100 } } }) embeddings.load("DebateSum_SemanticGraph_mpnet_extract.tar.gz") graph = embeddings.graph print(graph.backend.number_of_nodes(), graph.backend.number_of_edges())