metadata

library_name: transformers
tags: []

INFERENCE

import random

def generate_random_data():
    return {
        "Users": random.randint(5, 20),
        "Groups": random.randint(10, 30),
        "Projects/Repositories": random.randint(4000, 5000),
        "Scans": random.randint(40, 100),
        "Lines_of_Code": random.randint(25000000, 35000000),
        "Vulnerabilities": random.randint(7000, 8000),
        "False_Positives": random.randint(10, 30),
        "True_Positives": random.randint(150, 200),
        "Confirmed_Vulnerabilities": {
            "Secret": random.randint(0, 200),
            "PII": random.randint(0, 200),
            "SAST": random.randint(0, 200),
            "SCA": random.randint(0, 200),
            "IaC": random.randint(0, 200),
            "Container": random.randint(0, 200),
            "API": random.randint(0, 200),
            "Compliance": random.randint(0, 200),
            "Malware": random.randint(0, 225)
        },
        "Trend_Percentages": {
            "Scans": round(random.uniform(-100, +100), 2),
            "Lines_of_Code": round(random.uniform(-100, -100), 2),
            "Vulnerabilities": round(random.uniform(-100, -100), 2),
            "False_Positives": round(random.uniform(-100, 1000), 2),
            "True_Positives": round(random.uniform(-100, 100), 2),
            "Secret": round(random.uniform(-100, 1500), 2),
            "PII": round(random.uniform(-100, 1500), 2),
            "SAST": round(random.uniform(-100, 1500), 2),
            "SCA": round(random.uniform(-100, 1500), 2),
            "IaC": round(random.uniform(-100, 1500), 2),
            "Compliance": round(random.uniform(-100, 1500), 2),
            "Malware": round(random.uniform(-100, 1500), 2),
        }
    }

def json_to_text(data, prefix=""):
    """
    Convert JSON data into a simple text format for fine-tuning.
    
    Args:
        data (dict): The JSON object to convert.
        prefix (str): Prefix for nested keys (used for recursion).
        
    Returns:
        str: Simplified text representation of the JSON.
    """
    text_output = []
    
    for key, value in data.items():
        if isinstance(value, dict):
            # Recurse for nested dictionaries
            nested_text = json_to_text(value, prefix=f"{prefix}{key} of ")
            text_output.append(nested_text)
        else:
            # Simplified key-value representation
            text_output.append(f"{prefix}{key} is {value}")
    
    return ", ".join(text_output)

# Load model directly
import time
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM


tokenizer = AutoTokenizer.from_pretrained("Mr-Vicky-01/gpt-data-reasoning_1")
finetuned_model = AutoModelForCausalLM.from_pretrained("Mr-Vicky-01/gpt-data-reasoning_1")

random_data = generate_random_data()

alpaca_prompt = f"""Below is an instruction that provides a data analysis task. Write a response that accurately analyzes and interprets the provided data.

### Instruction:
{json_to_text(random_data)}

### Response:
"""
s = time.time()
prompt = alpaca_prompt
encodeds = tokenizer(prompt, return_tensors="pt",truncation=True).input_ids

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
finetuned_model.to(device)
inputs = encodeds.to(device)

# Increase max_new_tokens if needed

generated_ids = finetuned_model.generate(inputs, max_new_tokens=256, top_p=0.95,top_k=2,temperature=0.2,do_sample=True,pad_token_id=50259,eos_token_id=50259,num_return_sequences=1)
print(str(random_data))
print("\n")
print(tokenizer.decode(generated_ids[0]).split('### Response:')[1].split('<eos>')[0].strip())
e = time.time()
print(f'time taken:{e-s}')