Edit model card

import random
import json

def generate_random_data():
    return {
        "Users": random.randint(5, 20),
        "Groups": random.randint(10, 30),
        "Projects/Repositories": random.randint(4000, 5000),
        "Scans": random.randint(40, 100),
        "Lines_of_Code": random.randint(25000000, 35000000),
        "Vulnerabilities": random.randint(7000, 8000),
        "False_Positives": random.randint(10, 30),
        "True_Positives": random.randint(150, 200),
        "Confirmed_Vulnerabilities": {
            "Secret": random.randint(0, 200),
            "PII": random.randint(0, 200),
            "SAST": random.randint(0, 200),
            "SCA": random.randint(0, 200),
            "IaC": random.randint(0, 200),
            "Container": random.randint(0, 200),
            "API": random.randint(0, 200),
            "Compliance": random.randint(0, 200),
            "Malware": random.randint(0, 225)
        },
        "Trend_Percentages": {
            "Scans": round(random.uniform(-100, +100), 2),
            "Lines_of_Code": round(random.uniform(-100, -100), 2),
            "Vulnerabilities": round(random.uniform(-100, -100), 2),
            "False_Positives": round(random.uniform(-100, 1000), 2),
            "True_Positives": round(random.uniform(-100, 100), 2),
            "Secret": round(random.uniform(-100, 1500), 2),
            "PII": round(random.uniform(-100, 1500), 2),
            "SAST": round(random.uniform(-100, 1500), 2),
            "SCA": round(random.uniform(-100, 1500), 2),
            "IaC": round(random.uniform(-100, 1500), 2),
            "Compliance": round(random.uniform(-100, 1500), 2),
            "Malware": round(random.uniform(-100, 1500), 2),
        }
    }



def json_to_semi_structured_text(data):

    data = json.loads(data.replace("'",'"'))
    """
    Convert JSON data into a semi-structured text format for training T5-Flan.
    
    Args:
        data (dict): The JSON object to convert.
        
    Returns:
        str: Semi-structured text representation of the JSON.
    """
    text_output = []
    
    for key, value in data.items():
        if isinstance(value, dict):
            # Handle nested dictionaries
            text_output.append(f"{key.capitalize()}:")
            for sub_key, sub_value in value.items():
                text_output.append(f"- {sub_key}: {sub_value}")
        else:
            # Direct key-value pairs
            text_output.append(f"{key.replace('_', ' ').capitalize()}: {value}")
    
    return "\n".join(text_output)

Inference

# Load model directly
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("suriya7/t5-data-reasoning")
model = AutoModelForSeq2SeqLM.from_pretrained("suriya7/t5-data-reasoning")


data_inp = json_to_semi_structured_text(str(generate_random_data()))

inp = "Summarize and reason: " + data_inp

import time

start = time.time()

inputs = tokenizer(inp, return_tensors="pt",truncation=True)
model.to(device)
inputs = inputs.to(device)
outputs = model.generate(**inputs,max_length=256,do_sample=False)
answer = tokenizer.decode(outputs[0])
print(answer)

end = time.time()
print(f"Time taken: {end - start}")
print('\n\n')
print("input: "+inp)
Downloads last month
0
Safetensors
Model size
783M params
Tensor type
F32
·
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.