metadata
library_name: transformers
tags: []
INFERENCE
import random
def generate_random_data():
return {
"Users": random.randint(5, 20),
"Groups": random.randint(10, 30),
"Projects/Repositories": random.randint(4000, 5000),
"Scans": random.randint(40, 100),
"Lines_of_Code": random.randint(25000000, 35000000),
"Vulnerabilities": random.randint(7000, 8000),
"False_Positives": random.randint(10, 30),
"True_Positives": random.randint(150, 200),
"Confirmed_Vulnerabilities": {
"Secret": random.randint(0, 200),
"PII": random.randint(0, 200),
"SAST": random.randint(0, 200),
"SCA": random.randint(0, 200),
"IaC": random.randint(0, 200),
"Container": random.randint(0, 200),
"API": random.randint(0, 200),
"Compliance": random.randint(0, 200),
"Malware": random.randint(0, 225)
},
"Trend_Percentages": {
"Scans": round(random.uniform(-100, +100), 2),
"Lines_of_Code": round(random.uniform(-100, -100), 2),
"Vulnerabilities": round(random.uniform(-100, -100), 2),
"False_Positives": round(random.uniform(-100, 1000), 2),
"True_Positives": round(random.uniform(-100, 100), 2),
"Secret": round(random.uniform(-100, 1500), 2),
"PII": round(random.uniform(-100, 1500), 2),
"SAST": round(random.uniform(-100, 1500), 2),
"SCA": round(random.uniform(-100, 1500), 2),
"IaC": round(random.uniform(-100, 1500), 2),
"Compliance": round(random.uniform(-100, 1500), 2),
"Malware": round(random.uniform(-100, 1500), 2),
}
}
def json_to_text(data, prefix=""):
"""
Convert JSON data into a simple text format for fine-tuning.
Args:
data (dict): The JSON object to convert.
prefix (str): Prefix for nested keys (used for recursion).
Returns:
str: Simplified text representation of the JSON.
"""
text_output = []
for key, value in data.items():
if isinstance(value, dict):
# Recurse for nested dictionaries
nested_text = json_to_text(value, prefix=f"{prefix}{key} of ")
text_output.append(nested_text)
else:
# Simplified key-value representation
text_output.append(f"{prefix}{key} is {value}")
return ", ".join(text_output)
# Load model directly
import time
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("Mr-Vicky-01/gpt-data-reasoning_1")
finetuned_model = AutoModelForCausalLM.from_pretrained("Mr-Vicky-01/gpt-data-reasoning_1")
random_data = generate_random_data()
alpaca_prompt = f"""Below is an instruction that provides a data analysis task. Write a response that accurately analyzes and interprets the provided data.
### Instruction:
{json_to_text(random_data)}
### Response:
"""
s = time.time()
prompt = alpaca_prompt
encodeds = tokenizer(prompt, return_tensors="pt",truncation=True).input_ids
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
finetuned_model.to(device)
inputs = encodeds.to(device)
# Increase max_new_tokens if needed
generated_ids = finetuned_model.generate(inputs, max_new_tokens=256, top_p=0.95,top_k=2,temperature=0.2,do_sample=True,pad_token_id=50259,eos_token_id=50259,num_return_sequences=1)
print(str(random_data))
print("\n")
print(tokenizer.decode(generated_ids[0]).split('### Response:')[1].split('<eos>')[0].strip())
e = time.time()
print(f'time taken:{e-s}')