triplex-knowledge-graph-visualizer

Runtime error

App Files Files Community

Remsky commited on Jul 27

Commit

4289090

•

0 Parent(s):

Super-squash branch 'main' using huggingface_hub

Browse files

Files changed (8) hide show

.gitattributes +35 -0
README.md +13 -0
app.py +94 -0
lib/__init__.py +0 -0
lib/graph_extract.py +142 -0
lib/samples.py +46 -0
lib/visualize.py +111 -0
requirements.txt +7 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: Triplex Knowledge Graph Visualizer
+emoji: 🕸️
+colorFrom: blue
+colorTo: red
+sdk: gradio
+app_file: app.py
+pinned: true
+models:
+ - SciPhi/Triplex
+preload_from_hub:
+ - SciPhi/Triplex
+---

app.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import random
+import gradio as gr
+import spaces
+from lib.graph_extract import triplextract, parse_triples
+from lib.visualize import create_cytoscape_plot
+from lib.samples import snippets
+WORD_LIMIT = 300
+@spaces.GPU
+def process_text(text, entity_types, predicates):
+    if not text:
+        return None, "Please enter some text."
+    words = text.split()
+    if len(words) > WORD_LIMIT:
+        return None, f"Please limit your input to {WORD_LIMIT} words. Current word count: {len(words)}"
+    entity_types = [et.strip() for et in entity_types.split(",") if et.strip()]
+    predicates = [p.strip() for p in predicates.split(",") if p.strip()]
+    if not entity_types:
+        return None, "Please enter at least one entity type."
+    if not predicates:
+        return None, "Please enter at least one predicate."
+    try:
+        prediction = triplextract(text, entity_types, predicates)
+        if prediction.startswith("Error"):
+            return None, prediction
+        entities, relationships = parse_triples(prediction)
+        if not entities and not relationships:
+            return (
+                None,
+                "No entities or relationships found. Try different text or check your input.",
+            )
+        fig = create_cytoscape_plot(entities, relationships)
+        return (
+            fig,
+            f"Entities: {entities}\nRelationships: {relationships}\n\nRaw output:\n{prediction}",
+        )
+    except Exception as e:
+        print(f"Error in process_text: {e}")
+        return None, f"An error occurred: {str(e)}"
+def update_inputs(sample_name):
+    sample = snippets[sample_name]
+    return sample.text_input, sample.entity_types, sample.predicates
+with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
+    gr.Markdown("# Knowledge Graph Extractor")
+    default_sample_name = random.choice(list(snippets.keys()))
+    default_sample = snippets[default_sample_name]
+    with gr.Row():
+        with gr.Column(scale=1):
+            sample_dropdown = gr.Dropdown(
+                choices=list(snippets.keys()),
+                label="Select Sample",
+                value=default_sample_name
+            )
+            input_text = gr.Textbox(
+                label="Input Text",
+                lines=5,
+                value=default_sample.text_input
+            )
+            entity_types = gr.Textbox(label="Entity Types", value=default_sample.entity_types)
+            predicates = gr.Textbox(label="Predicates", value=default_sample.predicates)
+            submit_btn = gr.Button("Extract Knowledge Graph")
+        with gr.Column(scale=2):
+            output_graph = gr.Plot(label="Knowledge Graph")
+            error_message = gr.Textbox(label="Textual Output")
+    sample_dropdown.change(
+        update_inputs,
+        inputs=[sample_dropdown],
+        outputs=[input_text, entity_types, predicates]
+    )
+    submit_btn.click(
+        process_text,
+        inputs=[input_text, entity_types, predicates],
+        outputs=[output_graph, error_message],
+    )
+if __name__ == "__main__":
+    demo.launch()

lib/__init__.py ADDED Viewed

File without changes

lib/graph_extract.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import json
+import re
+from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
+import torch
+import warnings
+import spaces
+flash_attn_installed = False
+try:
+    import subprocess
+    print("Installing flash-attn...")
+    subprocess.run(
+    "pip install flash-attn --no-build-isolation",
+    env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
+    shell=True,
+    )
+    flash_attn_installed = True
+except Exception as e:
+    print(f"Error installing flash-attn: {e}")
+# Suppress specific warnings
+warnings.filterwarnings(
+    "ignore",
+    message="You have modified the pretrained model configuration to control generation.",
+)
+warnings.filterwarnings(
+    "ignore",
+    message="You are not running the flash-attention implementation, expect numerical differences.",
+)
+print("Initializing application...")
+model = AutoModelForCausalLM.from_pretrained(
+    "sciphi/triplex",
+    trust_remote_code=True,
+    attn_implementation="flash_attention_2" if flash_attn_installed else None,
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+    low_cpu_mem_usage=True,#advised if any device map given
+).eval()
+tokenizer = AutoTokenizer.from_pretrained(
+    "sciphi/triplex",
+    trust_remote_code=True,
+    attn_implementation="flash_attention_2",
+        torch_dtype=torch.bfloat16,
+    )
+print("Model and tokenizer loaded successfully.")
+# Set up generation config
+generation_config = GenerationConfig.from_pretrained("sciphi/triplex")
+generation_config.max_length = 2048
+generation_config.pad_token_id = tokenizer.eos_token_id
+@spaces.GPU
+def triplextract(text, entity_types, predicates):
+    input_format = """Perform Named Entity Recognition (NER) and extract knowledge graph triplets from the text. NER identifies named entities of given entity types, and triple extraction identifies relationships between entities using specified predicates. Return the result as a JSON object with an "entities_and_triples" key containing an array of entities and triples.
+        **Entity Types:**
+        {entity_types}
+        **Predicates:**
+        {predicates}
+        **Text:**
+        {text}
+        """
+    message = input_format.format(
+                entity_types = json.dumps({"entity_types": entity_types}),
+                predicates = json.dumps({"predicates": predicates}),
+                text = text)
+    # message = input_format.format(
+    #     entity_types=entity_types, predicates=predicates, text=text
+    # )
+    messages = [{"role": "user", "content": message}]
+    print("Tokenizing input...")
+    input_ids = tokenizer.apply_chat_template(
+        messages, add_generation_prompt=True, return_tensors="pt"
+    ).to(model.device)
+    attention_mask = input_ids.ne(tokenizer.pad_token_id)
+    print("Generating output...")
+    try:
+        with torch.no_grad():
+            output = model.generate(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                generation_config=generation_config,
+                do_sample=True,
+            )
+        decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
+        print("Decoding output completed.")
+        return decoded_output
+    except torch.cuda.OutOfMemoryError as e:
+        print(f"CUDA out of memory error: {e}")
+        return "Error: CUDA out of memory."
+    except Exception as e:
+        print(f"Error in generation: {e}")
+        return f"Error in generation: {str(e)}"
+def parse_triples(prediction):
+    entities = {}
+    relationships = []
+    try:
+        data = json.loads(prediction)
+        items = data.get("entities_and_triples", [])
+    except json.JSONDecodeError:
+        json_match = re.search(r"```json\s*(.*?)\s*```", prediction, re.DOTALL)
+        if json_match:
+            try:
+                data = json.loads(json_match.group(1))
+                items = data.get("entities_and_triples", [])
+            except json.JSONDecodeError:
+                items = re.findall(r"\[(.*?)\]", prediction)
+        else:
+            items = re.findall(r"\[(.*?)\]", prediction)
+    for item in items:
+        if isinstance(item, str):
+            if ":" in item:
+                id, entity = item.split(",", 1)
+                id = id.strip("[]").strip()
+                entity_type, entity_value = entity.split(":", 1)
+                entities[id] = {
+                    "type": entity_type.strip(),
+                    "value": entity_value.strip(),
+                }
+            else:
+                parts = item.split()
+                if len(parts) >= 3:
+                    source = parts[0].strip("[]")
+                    relation = " ".join(parts[1:-1])
+                    target = parts[-1].strip("[]")
+                    relationships.append((source, relation.strip(), target))
+    return entities, relationships

lib/samples.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from collections import namedtuple
+Snippet = namedtuple('Snippet', ['text_input', 'entity_types', 'predicates'])
+snippets = {
+    'paris': Snippet(
+        text_input="""Paris is the capital of France. It has a population of 2.16 million people.
+        The Eiffel Tower, located in Paris, is a famous landmark with a height of 324 meters.
+        Paris is known for its romantic atmosphere.""",
+        entity_types="LOCATION, POPULATION, STYLE",
+        predicates="HAS, IS"
+    ),
+    'dickens': Snippet(
+        text_input="""It was the best of times, it was the worst of times, it was the age of wisdom,
+        it was the age of foolishness, it was the epoch of belief, it was the epoch of incredulity,
+        it was the season of Light, it was the season of Darkness, it was the spring of hope,
+        it was the winter of despair, we had everything before us, we had nothing before us,
+        we were all going direct to Heaven, we were all going direct the other way – in short,
+        the period was so far like the present period, that some of its noisiest authorities
+        insisted on its being received, for good or for evil, in the superlative degree of comparison only.""",
+        entity_types="TIME, EMOTION, LOCATION, EVENT, OUTCOME, PLACE",
+        predicates="WAS, HAD, WERE"
+    ),
+    'tech_company': Snippet(
+        text_input="""Apple Inc. was founded by Steve Jobs, Steve Wozniak, and Ronald Wayne in 1976.
+        Headquartered in Cupertino, California, Apple designs and produces consumer electronics,
+        software, and online services. The company's flagship products include the iPhone smartphone,
+        iPad tablet, and Mac personal computer. As of 2023, Apple has over 150,000 employees worldwide
+        and generates annual revenue exceeding $350 billion.""",
+        entity_types="COMPANY, PERSON, PRODUCT, LOCATION, DATE, NUMBER, EVENT, SUBJECT",
+        predicates="FOUNDED, HEADQUARTERED_IN, PRODUCES, HAS, EMPLOYEES, "
+    ),
+    'climate_change': Snippet(
+        text_input="""Global warming is causing significant changes to Earth's climate. The average global
+        temperature has increased by approximately 1.1°C since the pre-industrial era. This warming is
+        primarily caused by human activities, particularly the emission of greenhouse gases like carbon dioxide.
+        The Paris Agreement, signed in 2015, aims to limit global temperature increase to well below 2°C above
+        pre-industrial levels. To achieve this goal, many countries are implementing policies to reduce carbon
+        emissions and transition to renewable energy sources.""",
+        entity_types="PHENOMENON, PLANET, TEMPERATURE, CAUSE, CHEMICAL, AGREEMENT, DATE, GOAL, POLICY",
+        predicates="CAUSES, INCREASED_BY, CAUSED_BY, SIGNED_IN, AIMS_TO, IMPLEMENTING"
+    )
+}

lib/visualize.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import plotly.graph_objects as go
+import networkx as nx
+import plotly.graph_objects as go
+import networkx as nx
+def create_cytoscape_plot(entities, relationships):
+    G = nx.DiGraph()  # Use DiGraph for directed edges
+    for entity_id, entity_data in entities.items():
+        G.add_node(entity_id, **entity_data)
+    for source, relation, target in relationships:
+        G.add_edge(source, target, relation=relation)
+    pos = nx.spring_layout(G, k=0.5, iterations=50)  # Adjust layout parameters
+    edge_trace = go.Scatter(
+        x=[],
+        y=[],
+        line=dict(width=1, color="#888"),
+        hoverinfo="text",
+        mode="lines",
+        text=[],
+    )
+    node_trace = go.Scatter(
+        x=[],
+        y=[],
+        mode="markers+text",
+        hoverinfo="text",
+        marker=dict(
+            showscale=True,
+            colorscale="Viridis",
+            reversescale=True,
+            color=[],
+            size=15,
+            colorbar=dict(
+                thickness=15,
+                title="Node Connections",
+                xanchor="left",
+                titleside="right",
+            ),
+            line_width=2,
+        ),
+        text=[],
+        textposition="top center",
+    )
+    edge_labels = []
+    for edge in G.edges():
+        x0, y0 = pos[edge[0]]
+        x1, y1 = pos[edge[1]]
+        edge_trace["x"] += (x0, x1, None)
+        edge_trace["y"] += (y0, y1, None)
+        # Calculate midpoint for edge label
+        mid_x, mid_y = (x0 + x1) / 2, (y0 + y1) / 2
+        edge_labels.append(
+            go.Scatter(
+                x=[mid_x],
+                y=[mid_y],
+                mode="text",
+                text=[G.edges[edge]["relation"]],
+                textposition="middle center",
+                hoverinfo="none",
+                showlegend=False,
+                textfont=dict(size=8),
+            )
+        )
+    for node in G.nodes():
+        x, y = pos[node]
+        node_trace["x"] += (x,)
+        node_trace["y"] += (y,)
+        node_info = f"{entities[node]['value']} ({entities[node]['type']})"
+        node_trace["text"] += (node_info,)
+        node_trace["marker"]["color"] += (len(list(G.neighbors(node))),)
+    fig = go.Figure(
+        data=[edge_trace, node_trace] + edge_labels,
+        layout=go.Layout(
+            title="Knowledge Graph",
+            titlefont_size=16,
+            showlegend=False,
+            hovermode="closest",
+            margin=dict(b=20, l=5, r=5, t=40),
+            annotations=[],
+            xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
+            yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
+            width=800,
+            height=600,
+        ),
+    )
+    # Enable dragging of nodes
+    fig.update_layout(
+        newshape=dict(line_color="#009900"),
+        # Enable zoom
+        xaxis=dict(
+            scaleanchor="y",
+            scaleratio=1,
+        ),
+        yaxis=dict(
+            scaleanchor="x",
+            scaleratio=1,
+        ),
+    )
+    return fig

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio==4.39.0
+plotly==5.23.0
+matplotlib==3.7.2
+torch==2.0.1
+transformers==4.43.3
+accelerate==0.33.0
+networkx