from transformers import AutoModelForCausalLM, AutoTokenizer import torch import os # Set a writable cache directory for Hugging Face os.environ['TRANSFORMERS_CACHE'] = '/cache' # Load LLaMA 3 model and tokenizer # model_name = "https://huggingface.co/nvidia/Llama3-ChatQA-2-70B" # Replace with LLaMA 3 model path or Hugging Face model link if available model_name = "nvidia/Llama3-ChatQA-2-70B" # Replace with LLaMA 3 model path or Hugging Face model link if available tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) # Define API for text generation def generate_text(prompt, max_length=100): inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate(**inputs, max_length=max_length) return tokenizer.decode(outputs[0], skip_special_tokens=True)