Spaces:
Runtime error
Runtime error
import os | |
import math | |
import random | |
import numpy as np | |
import gradio as gr | |
from transformers import AutoTokenizer | |
from rwkv.model import RWKV | |
# Define the Node class for MCTS | |
class Node: | |
def __init__(self, state, parent=None): | |
self.state = state | |
self.parent = parent | |
self.children = [] | |
self.visits = 0 | |
self.wins = 0 | |
def is_fully_expanded(self): | |
return len(self.children) > 0 | |
def best_child(self, c_param=1.4): | |
choices_weights = [ | |
(child.wins / child.visits) + c_param * (2 * math.log(self.visits) / child.visits) ** 0.5 for child in self.children | |
] | |
return self.children[np.argmax(choices_weights)] | |
def expand(self, state): | |
new_node = Node(state, self) | |
self.children.append(new_node) | |
return new_node | |
# Define the MCTS class | |
class MCTS: | |
def __init__(self, simulation_limit=1000): | |
self.root = None | |
self.simulation_limit = simulation_limit | |
def search(self, initial_state): | |
self.root = Node(initial_state) | |
for _ in range(self.simulation_limit): | |
node = self.tree_policy(self.root) | |
reward = self.default_policy(node.state) | |
self.backpropagate(node, reward) | |
return self.root.best_child(c_param=0).state | |
def tree_policy(self, node): | |
while not node.state.is_terminal(): | |
if not node.is_fully_expanded(): | |
return self.expand(node) | |
else: | |
node = node.best_child() | |
return node | |
def expand(self, node): | |
tried_states = [child.state for child in node.children] | |
new_state = node.state.get_random_child_state() | |
while new_state in tried_states: | |
new_state = node.state.get_random_child_state() | |
return node.expand(new_state) | |
def default_policy(self, state): | |
while not state.is_terminal(): | |
state = state.get_random_child_state() | |
return state.get_reward() | |
def backpropagate(self, node, reward): | |
while node is not None: | |
node.visits += 1 | |
node.wins += reward | |
node = node.parent | |
# Define the Game State and Rules | |
class GameState: | |
def __init__(self, board, player): | |
self.board = board | |
self.player = player | |
def is_terminal(self): | |
return self.check_win() or self.check_draw() | |
def check_win(self): | |
for row in self.board: | |
if row.count(row[0]) == len(row) and row[0] != 0: | |
return True | |
for col in range(len(self.board)): | |
if self.board[0][col] == self.board[1][col] == self.board[2][col] and self.board[0][col] != 0: | |
return True | |
if self.board[0][0] == self.board[1][1] == self.board[2][2] and self.board[0][0] != 0: | |
return True | |
if self.board[0][2] == self.board[1][1] == self.board[2][0] and self.board[0][2] != 0: | |
return True | |
return False | |
def check_draw(self): | |
return all(self.board[row][col] != 0 for row in range(len(self.board)) for col in range(len(self.board))) | |
def get_random_child_state(self): | |
available_moves = [(row, col) for row in range(len(self.board)) for col in range(len(self.board)) if self.board[row][col] == 0] | |
if not available_moves: | |
return self | |
row, col = random.choice(available_moves) | |
new_board = [row.copy() for row in self.board] | |
new_board[row][col] = self.player | |
return GameState(new_board, 3 - self.player) | |
def get_reward(self): | |
if self.check_win(): | |
return 1 if self.player == 1 else -1 | |
return 0 | |
def __str__(self): | |
return "\n".join(" ".join(str(cell) for cell in row) for row in self.board) | |
# Initialize the RWKV model and tokenizer | |
model_name = "BlinkDL/rwkv-4-raven" | |
tokenizer = AutoTokenizer.from_pretrained("gpt2") # Use a tokenizer from a supported model | |
# Load the RWKV model | |
model = RWKV(model=model_name, strategy="cuda fp16") | |
# Generate Chain-of-Thought | |
def generate_cot(state): | |
input_text = f"Current state: {state}\nWhat is the best move?" | |
inputs = tokenizer(input_text, return_tensors="pt") | |
outputs = model.generate(inputs.input_ids, max_length=100, num_return_sequences=1) | |
cot = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return cot | |
# Use CoT in MCTS | |
def mcts_with_cot(initial_state): | |
mcts = MCTS(simulation_limit=1000) | |
best_state = mcts.search(initial_state) | |
cot = generate_cot(best_state) | |
return best_state, cot | |
# Function to be called by Gradio | |
def run_mcts_cot(initial_board): | |
initial_state = GameState(initial_board, 1) | |
best_state, cot = mcts_with_cot(initial_state) | |
return str(best_state), cot |