from transformers import GPT2LMHeadModel, GPT2Tokenizer import torch # Load model and tokenizer tokenizer = GPT2Tokenizer.from_pretrained("gpt2") model = GPT2LMHeadModel.from_pretrained("gpt2") # Define gender predictions for specific characters character_gender_mapping = { "NARRATOR": "neutral", "FATHER": "male", "HARPER": "female" } def predict_gender_aggregated(character, lines): # Check if the character is in the mapping if character.upper() in character_gender_mapping: return character_gender_mapping[character.upper()] # For other characters, perform gender prediction as before aggregated_text = " ".join(lines) input_text = f"Character: {character}. Dialogue: {aggregated_text}. Gender:" input_ids = tokenizer.encode(input_text, return_tensors='pt') # Create an attention mask attention_mask = torch.ones(input_ids.shape) output = model.generate(input_ids, attention_mask=attention_mask, max_length=60, do_sample=True, temperature=0.7) result = tokenizer.decode(output[0], skip_special_tokens=True) # Extract gender prediction as 'male' or 'female' (assuming it's one of these two) if 'male' in result.lower(): gender_prediction = 'male' elif 'female' in result.lower(): gender_prediction = 'female' else: gender_prediction = 'unknown' # Handle cases where gender isn't explicitly mentioned return gender_prediction # This function will be called for inference def predict(input_data): character = input_data.get("character") lines = input_data.get("lines") # Error handling for missing input if not character or not lines: return {"error": "Missing character or lines in the input"} gender_prediction = predict_gender_aggregated(character, lines) return {"character": character, "predicted_gender": gender_prediction} # Example input format for testing locally if __name__ == "__main__": test_input = { "character": "FATHER", "lines": ["I am very proud of you, son."] } print(predict(test_input))