|
|
|
|
|
|
|
|
|
from typing import Any, Dict, List, Optional, Union
|
|
import re
|
|
|
|
|
|
|
|
|
|
from App_Function_Libraries.Personas.models import Lorebook, Asset, CharacterCardV3, CharacterCardV3Data, Decorator, \
|
|
LorebookEntry
|
|
from App_Function_Libraries.Personas.utils import validate_iso_639_1, extract_json_from_charx, parse_json_file, \
|
|
extract_text_chunks_from_png, decode_base64
|
|
|
|
|
|
|
|
|
|
|
|
class CCv3ParserError(Exception):
|
|
"""Custom exception for CCv3 Parser errors."""
|
|
pass
|
|
|
|
|
|
class CharacterCardV3Parser:
|
|
REQUIRED_SPEC = 'chara_card_v3'
|
|
REQUIRED_VERSION = '3.0'
|
|
|
|
def __init__(self, input_data: Union[str, bytes], input_type: str):
|
|
"""
|
|
Initialize the parser with input data.
|
|
|
|
:param input_data: The input data as a string or bytes.
|
|
:param input_type: The type of the input data: 'json', 'png', 'apng', 'charx'.
|
|
"""
|
|
self.input_data = input_data
|
|
self.input_type = input_type.lower()
|
|
self.character_card: Optional[CharacterCardV3] = None
|
|
|
|
def parse(self):
|
|
"""Main method to parse the input data based on its type."""
|
|
if self.input_type == 'json':
|
|
self.parse_json_input()
|
|
elif self.input_type in ['png', 'apng']:
|
|
self.parse_png_apng_input()
|
|
elif self.input_type == 'charx':
|
|
self.parse_charx_input()
|
|
else:
|
|
raise CCv3ParserError(f"Unsupported input type: {self.input_type}")
|
|
|
|
def parse_json_input(self):
|
|
"""Parse JSON input directly."""
|
|
try:
|
|
data = parse_json_file(
|
|
self.input_data.encode('utf-8') if isinstance(self.input_data, str) else self.input_data)
|
|
self.character_card = self._build_character_card(data)
|
|
except Exception as e:
|
|
raise CCv3ParserError(f"Failed to parse JSON input: {e}")
|
|
|
|
def parse_png_apng_input(self):
|
|
"""Parse PNG or APNG input by extracting 'ccv3' tEXt chunk."""
|
|
try:
|
|
text_chunks = extract_text_chunks_from_png(self.input_data)
|
|
if 'ccv3' not in text_chunks:
|
|
raise CCv3ParserError("PNG/APNG does not contain 'ccv3' tEXt chunk.")
|
|
ccv3_base64 = text_chunks['ccv3']
|
|
ccv3_json_bytes = decode_base64(ccv3_base64)
|
|
data = parse_json_file(ccv3_json_bytes)
|
|
self.character_card = self._build_character_card(data)
|
|
except Exception as e:
|
|
raise CCv3ParserError(f"Failed to parse PNG/APNG input: {e}")
|
|
|
|
def parse_charx_input(self):
|
|
"""Parse CHARX input by extracting 'card.json' from the ZIP archive."""
|
|
try:
|
|
data = extract_json_from_charx(self.input_data)
|
|
self.character_card = self._build_character_card(data)
|
|
except Exception as e:
|
|
raise CCv3ParserError(f"Failed to parse CHARX input: {e}")
|
|
|
|
def _build_character_card(self, data: Dict[str, Any]) -> CharacterCardV3:
|
|
"""Build the CharacterCardV3 object from parsed data."""
|
|
|
|
spec = data.get('spec')
|
|
spec_version = data.get('spec_version')
|
|
if spec != self.REQUIRED_SPEC:
|
|
raise CCv3ParserError(f"Invalid spec: Expected '{self.REQUIRED_SPEC}', got '{spec}'")
|
|
if spec_version != self.REQUIRED_VERSION:
|
|
|
|
|
|
try:
|
|
version_float = float(spec_version)
|
|
if version_float < 3.0:
|
|
raise CCv3ParserError(f"Unsupported spec_version: '{spec_version}' (must be >= '3.0')")
|
|
except ValueError:
|
|
raise CCv3ParserError(f"Invalid spec_version format: '{spec_version}'")
|
|
|
|
data_field = data.get('data')
|
|
if not data_field:
|
|
raise CCv3ParserError("Missing 'data' field in CharacterCardV3 object.")
|
|
|
|
|
|
required_fields = ['name', 'description', 'tags', 'creator', 'character_version',
|
|
'mes_example', 'extensions', 'system_prompt',
|
|
'post_history_instructions', 'first_mes',
|
|
'alternate_greetings', 'personality', 'scenario',
|
|
'creator_notes', 'group_only_greetings']
|
|
for field_name in required_fields:
|
|
if field_name not in data_field:
|
|
raise CCv3ParserError(f"Missing required field in data: '{field_name}'")
|
|
|
|
|
|
assets_data = data_field.get('assets', [{
|
|
'type': 'icon',
|
|
'uri': 'ccdefault:',
|
|
'name': 'main',
|
|
'ext': 'png'
|
|
}])
|
|
assets = self._parse_assets(assets_data)
|
|
|
|
|
|
creator_notes_multilingual = data_field.get('creator_notes_multilingual')
|
|
if creator_notes_multilingual:
|
|
if not isinstance(creator_notes_multilingual, dict):
|
|
raise CCv3ParserError("'creator_notes_multilingual' must be a dictionary.")
|
|
|
|
for lang_code in creator_notes_multilingual.keys():
|
|
if not validate_iso_639_1(lang_code):
|
|
raise CCv3ParserError(f"Invalid language code in 'creator_notes_multilingual': '{lang_code}'")
|
|
|
|
|
|
character_book_data = data_field.get('character_book')
|
|
character_book = self._parse_lorebook(character_book_data) if character_book_data else None
|
|
|
|
|
|
character_card_data = CharacterCardV3Data(
|
|
name=data_field['name'],
|
|
description=data_field['description'],
|
|
tags=data_field['tags'],
|
|
creator=data_field['creator'],
|
|
character_version=data_field['character_version'],
|
|
mes_example=data_field['mes_example'],
|
|
extensions=data_field['extensions'],
|
|
system_prompt=data_field['system_prompt'],
|
|
post_history_instructions=data_field['post_history_instructions'],
|
|
first_mes=data_field['first_mes'],
|
|
alternate_greetings=data_field['alternate_greetings'],
|
|
personality=data_field['personality'],
|
|
scenario=data_field['scenario'],
|
|
creator_notes=data_field['creator_notes'],
|
|
character_book=character_book,
|
|
assets=assets,
|
|
nickname=data_field.get('nickname'),
|
|
creator_notes_multilingual=creator_notes_multilingual,
|
|
source=data_field.get('source'),
|
|
group_only_greetings=data_field['group_only_greetings'],
|
|
creation_date=data_field.get('creation_date'),
|
|
modification_date=data_field.get('modification_date')
|
|
)
|
|
|
|
return CharacterCardV3(
|
|
spec=spec,
|
|
spec_version=spec_version,
|
|
data=character_card_data
|
|
)
|
|
|
|
def _parse_assets(self, assets_data: List[Dict[str, Any]]) -> List[Asset]:
|
|
"""Parse and validate assets."""
|
|
assets = []
|
|
for asset_data in assets_data:
|
|
|
|
for field in ['type', 'uri', 'ext']:
|
|
if field not in asset_data:
|
|
raise CCv3ParserError(f"Asset missing required field: '{field}'")
|
|
if not isinstance(asset_data[field], str):
|
|
raise CCv3ParserError(f"Asset field '{field}' must be a string.")
|
|
|
|
name = asset_data.get('name', '')
|
|
|
|
ext = asset_data['ext'].lower()
|
|
if not re.match(r'^[a-z0-9]+$', ext):
|
|
raise CCv3ParserError(f"Invalid file extension in asset: '{ext}'")
|
|
|
|
assets.append(Asset(
|
|
type=asset_data['type'],
|
|
uri=asset_data['uri'],
|
|
name=name,
|
|
ext=ext
|
|
))
|
|
return assets
|
|
|
|
def _parse_lorebook(self, lorebook_data: Dict[str, Any]) -> Lorebook:
|
|
"""Parse and validate Lorebook object."""
|
|
|
|
if not isinstance(lorebook_data, dict):
|
|
raise CCv3ParserError("Lorebook must be a JSON object.")
|
|
|
|
|
|
name = lorebook_data.get('name')
|
|
description = lorebook_data.get('description')
|
|
scan_depth = lorebook_data.get('scan_depth')
|
|
token_budget = lorebook_data.get('token_budget')
|
|
recursive_scanning = lorebook_data.get('recursive_scanning')
|
|
extensions = lorebook_data.get('extensions', {})
|
|
entries_data = lorebook_data.get('entries', [])
|
|
|
|
|
|
entries = self._parse_lorebook_entries(entries_data)
|
|
|
|
return Lorebook(
|
|
name=name,
|
|
description=description,
|
|
scan_depth=scan_depth,
|
|
token_budget=token_budget,
|
|
recursive_scanning=recursive_scanning,
|
|
extensions=extensions,
|
|
entries=entries
|
|
)
|
|
|
|
def _parse_lorebook_entries(self, entries_data: List[Dict[str, Any]]) -> List[LorebookEntry]:
|
|
"""Parse and validate Lorebook entries."""
|
|
entries = []
|
|
for entry_data in entries_data:
|
|
|
|
for field in ['keys', 'content', 'enabled', 'insertion_order']:
|
|
if field not in entry_data:
|
|
raise CCv3ParserError(f"Lorebook entry missing required field: '{field}'")
|
|
if not isinstance(entry_data['keys'], list) or not all(isinstance(k, str) for k in entry_data['keys']):
|
|
raise CCv3ParserError("'keys' field in Lorebook entry must be a list of strings.")
|
|
if not isinstance(entry_data['content'], str):
|
|
raise CCv3ParserError("'content' field in Lorebook entry must be a string.")
|
|
if not isinstance(entry_data['enabled'], bool):
|
|
raise CCv3ParserError("'enabled' field in Lorebook entry must be a boolean.")
|
|
if not isinstance(entry_data['insertion_order'], (int, float)):
|
|
raise CCv3ParserError("'insertion_order' field in Lorebook entry must be a number.")
|
|
|
|
|
|
use_regex = entry_data.get('use_regex', False)
|
|
constant = entry_data.get('constant')
|
|
selective = entry_data.get('selective')
|
|
secondary_keys = entry_data.get('secondary_keys')
|
|
position = entry_data.get('position')
|
|
name = entry_data.get('name')
|
|
priority = entry_data.get('priority')
|
|
entry_id = entry_data.get('id')
|
|
comment = entry_data.get('comment')
|
|
|
|
if selective and not isinstance(selective, bool):
|
|
raise CCv3ParserError("'selective' field in Lorebook entry must be a boolean.")
|
|
if secondary_keys:
|
|
if not isinstance(secondary_keys, list) or not all(isinstance(k, str) for k in secondary_keys):
|
|
raise CCv3ParserError("'secondary_keys' field in Lorebook entry must be a list of strings.")
|
|
if position and not isinstance(position, str):
|
|
raise CCv3ParserError("'position' field in Lorebook entry must be a string.")
|
|
|
|
|
|
decorators = self._extract_decorators(entry_data['content'])
|
|
|
|
|
|
entries.append(LorebookEntry(
|
|
keys=entry_data['keys'],
|
|
content=entry_data['content'],
|
|
enabled=entry_data['enabled'],
|
|
insertion_order=int(entry_data['insertion_order']),
|
|
use_regex=use_regex,
|
|
constant=constant,
|
|
selective=selective,
|
|
secondary_keys=secondary_keys,
|
|
position=position,
|
|
decorators=decorators,
|
|
name=name,
|
|
priority=priority,
|
|
id=entry_id,
|
|
comment=comment
|
|
))
|
|
return entries
|
|
|
|
def _extract_decorators(self, content: str) -> List[Decorator]:
|
|
"""Extract decorators from the content field."""
|
|
decorators = []
|
|
lines = content.splitlines()
|
|
for line in lines:
|
|
if line.startswith('@@'):
|
|
decorator = self._parse_decorator_line(line)
|
|
if decorator:
|
|
decorators.append(decorator)
|
|
return decorators
|
|
|
|
def _parse_decorator_line(self, line: str) -> Optional[Decorator]:
|
|
"""
|
|
Parses a single decorator line.
|
|
|
|
Example:
|
|
@@decorator_name value
|
|
@@@fallback_decorator value
|
|
"""
|
|
fallback = None
|
|
if line.startswith('@@@'):
|
|
|
|
name_value = line.lstrip('@').strip()
|
|
parts = name_value.split(' ', 1)
|
|
name = parts[0]
|
|
value = parts[1] if len(parts) > 1 else None
|
|
fallback = Decorator(name=name, value=value)
|
|
return fallback
|
|
elif line.startswith('@@'):
|
|
|
|
name_value = line.lstrip('@').strip()
|
|
parts = name_value.split(' ', 1)
|
|
name = parts[0]
|
|
value = parts[1] if len(parts) > 1 else None
|
|
|
|
|
|
|
|
|
|
return Decorator(name=name, value=value)
|
|
else:
|
|
return None
|
|
|
|
def get_character_card(self) -> Optional[CharacterCardV3]:
|
|
"""Returns the parsed CharacterCardV3 object."""
|
|
return self.character_card
|
|
|
|
|
|
|
|
|