Spaces:

FreedomIntelligence
/

S2S_Evaluation

Running

App Files Files Community

KurtDu commited on 17 days ago

Commit

dff2993

•

1 Parent(s): 3a7b896

Upload 4 files

Browse files

Files changed (4) hide show

app.py +173 -0
elo_rank.py +133 -0
index.html +568 -0
requirements.txt +26 -0

app.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import os
+import json
+import random
+import uuid
+from flask import Flask, request, jsonify, session, render_template
+from flask_cors import CORS
+from datetime import datetime
+from elo_rank import EloRank
+app = Flask(__name__)
+CORS(app)
+app.secret_key = 'supersecretkey'
+DATA_DIR = './data'
+RESULTS_DIR = './results'
+# 实例化 EloRank 系统
+elo_rank_system = EloRank()
+# 初始化 Elo 排名的模型
+models = ['output_path_4o', 'output_path_miniomni', 'output_path_speechgpt', 'output_path_funaudio', 'output_path_4o_cascade', 'output_path_4o_llama_omni']
+for model in models:
+    elo_rank_system.add_model(model)
+def load_test_data(task):
+    """Load the JSON file corresponding to the selected task"""
+    with open(os.path.join(DATA_DIR, f"{task}.json"), "r", encoding='utf-8') as f:
+        test_data = json.load(f)
+    # 更新音频路径，将它们指向 Flask 静态文件夹
+    for item in test_data:
+        item['input_path'] = f"/static/audio{item['input_path']}"
+        item['output_path_4o'] = f"/static/audio{item['output_path_4o']}"
+        item['output_path_miniomni'] = f"/static/audio{item['output_path_miniomni']}"
+        item['output_path_speechgpt'] = f"/static/audio{item['output_path_speechgpt']}"
+        item['output_path_funaudio'] = f"/static/audio{item['output_path_funaudio']}"
+        item['output_path_4o_cascade'] = f"/static/audio{item['output_path_4o_cascade']}"
+        item['output_path_4o_llama_omni'] = f"/static/audio{item['output_path_4o_llama_omni']}"
+    return test_data
+def save_result(task, username, result_data, session_id):
+    """Save user's result in a separate file"""
+    file_path = os.path.join(RESULTS_DIR, f"{task}_{username}_{session_id}.jsonl")
+    # 获取所有模型的 Elo 分数
+    elo_scores = {model: elo_rank_system.get_rating(model) for model in models}
+    # 添加 Elo 分数和时间戳到结果数据
+    result_data['elo_scores'] = elo_scores
+    result_data['timestamp'] = datetime.now().isoformat()
+    with open(file_path, "a", encoding='utf-8') as f:
+        f.write(json.dumps(result_data) + "\n")
+@app.route('/start_test', methods=['POST'])
+def start_test():
+    """Initiate the test for a user with the selected task"""
+    data = request.json
+    task = data['task']
+    username = data['username']
+    # Load the test data
+    test_data = load_test_data(task)
+    # Shuffle test data for the user
+    random.shuffle(test_data)
+    # Generate a unique session ID (for example using uuid)
+    session_id = str(uuid.uuid4())
+    # Store in session
+    session['task'] = task
+    session['username'] = username
+    session['test_data'] = test_data
+    session['current_index'] = 0
+    session['session_id'] = session_id  # Store the session ID in the session
+    task_description = test_data[0].get('task_description', '')
+    return jsonify({"message": "Test started", "total_tests": len(test_data), "task_description": task_description})
+@app.route('/next_test', methods=['GET'])
+def next_test():
+    """Serve the next test item"""
+    if 'current_index' not in session or 'test_data' not in session:
+        return jsonify({"message": "No active test found"}), 400
+    current_index = session['current_index']
+    test_data = session['test_data']
+    if current_index >= len(test_data):
+        # Return the "Test completed" message when all tests are done
+        return jsonify({"message": "Test completed"}), 200
+    # 使用 EloRank 的 sample_next_match 来选择两款模型
+    selected_models = elo_rank_system.sample_next_match()
+    # Serve test data with the two selected models
+    current_test = test_data[current_index]
+    session['selected_models'] = selected_models
+    session['current_index'] += 1
+    return jsonify({
+        "text": current_test["text"],
+        "input_path": current_test["input_path"],
+        "model_a": selected_models[0],
+        "model_b": selected_models[1],
+        "audio_a": current_test[selected_models[0]],
+        "audio_b": current_test[selected_models[1]]
+    })
+@app.route('/submit_result', methods=['POST'])
+def submit_result():
+    """Submit the user's result and save it"""
+    data = request.json
+    chosen_model = data['chosen_model']
+    username = session.get('username')
+    task = session.get('task')
+    current_index = session.get('current_index') - 1  # Subtract since we increment after serving
+    session_id = session.get('session_id')  # Get the session ID
+    if not username or not task or current_index < 0:
+        return jsonify({"message": "No active test found"}), 400
+    # Retrieve the selected models
+    selected_models = session['selected_models']
+    model_a = selected_models[0]
+    model_b = selected_models[1]
+    result = {
+        "name": username,
+        "chosen_model": chosen_model,
+        "model_a": model_a,
+        "model_b": model_b,
+        "result": {
+            model_a: 1 if chosen_model == 'A' else 0,
+            model_b: 1 if chosen_model == 'B' else 0
+        }
+    }
+    # Save the result for the current test using session_id to avoid filename conflict
+    test_data = session['test_data'][current_index]
+    result_data = {**test_data, **result}
+    save_result(task, username, result_data, session_id)
+    # 更新 Elo 排名系统
+    if chosen_model == 'A':
+        elo_rank_system.record_match(model_a, model_b)
+    else:
+        elo_rank_system.record_match(model_b, model_a)
+    return jsonify({"message": "Result submitted", "model_a": model_a, "model_b": model_b, "chosen_model": chosen_model})
+@app.route('/end_test', methods=['GET'])
+def end_test():
+    """End the test session"""
+    session.clear()
+    return jsonify({"message": "Test completed"})
+# 渲染index.html页面
+@app.route('/')
+def index():
+    return render_template('index.html')
+if __name__ == '__main__':
+    if not os.path.exists(RESULTS_DIR):
+        os.makedirs(RESULTS_DIR)
+    # 允许局域网访问
+    app.run(debug=True, host="0.0.0.0", port=6002)

elo_rank.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import random
+import json
+class EloRank:
+    def __init__(self, initial_rating=1000, k_factor=32):
+        """
+        Initialize the EloRank class.
+        :param initial_rating: Initial ELO rating for each model.
+        :param k_factor: The K-factor that determines the sensitivity of rating changes.
+        """
+        self.ratings = {}
+        self.initial_rating = initial_rating
+        self.k_factor = k_factor
+        self.wins = {}
+    def add_model(self, model_id):
+        """
+        Add a new model with the initial rating.
+        :param model_id: Unique identifier for the model.
+        """
+        self.ratings[model_id] = self.initial_rating
+        self.wins[model_id] = 0
+    def record_match(self, winner, loser):
+        """
+        Update the ratings based on a match result.
+        :param winner: Model ID of the winner.
+        :param loser: Model ID of the loser.
+        """
+        rating_winner = self.ratings[winner]
+        rating_loser = self.ratings[loser]
+        expected_winner = self.expected_score(rating_winner, rating_loser)
+        expected_loser = self.expected_score(rating_loser, rating_winner)
+        self.ratings[winner] += self.k_factor * (1 - expected_winner)
+        self.ratings[loser] += self.k_factor * (0 - expected_loser)
+        # Update win count
+        self.wins[winner] += 1
+    def expected_score(self, rating_a, rating_b):
+        """
+        Calculate the expected score for a model.
+        :param rating_a: Rating of model A.
+        :param rating_b: Rating of model B.
+        :return: Expected score.
+        """
+        return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))
+    def get_rating(self, model_id):
+        """
+        Get the current rating of a model.
+        :param model_id: Unique identifier for the model.
+        :return: Current rating of the model.
+        """
+        return self.ratings.get(model_id, None)
+    def get_wins(self, model_id):
+        """
+        Get the number of wins of a model.
+        :param model_id: Unique identifier for the model.
+        :return: Number of wins of the model.
+        """
+        return self.wins.get(model_id, 0)
+    def get_top_models(self, n=2):
+        """
+        Get the top N models by rating.
+        :param n: Number of top models to retrieve.
+        :return: List of model IDs of the top models.
+        """
+        return sorted(self.ratings, key=self.ratings.get, reverse=True)[:n]
+    def sample_next_match(self):
+        """
+        Sample the next match based on the probability proportional to the current rating.
+        This approach helps accelerate the convergence of ranking.
+        :return: Tuple of two model IDs for the next match.
+        """
+        model_ids = list(self.ratings.keys())
+        probabilities = [self.ratings[model_id] for model_id in model_ids]
+        total_rating = sum(probabilities)
+        probabilities = [rating / total_rating for rating in probabilities]
+        # Sample two different models for the next match
+        next_match = random.choices(model_ids, probabilities, k=2)
+        while next_match[0] == next_match[1]:
+            next_match = random.choices(model_ids, probabilities, k=2)
+        return tuple(next_match)
+    def process_match_records(self, file_path):
+        """
+        Process match records from a JSON file and update ratings and win counts accordingly.
+        :param file_path: Path to the JSON file containing match records.
+        """
+        with open(file_path, 'r') as file:
+            match_records = json.load(file)
+        for record in match_records:
+            winner = record['winner']
+            model_1 = record['model_1']
+            model_2 = record['model_2']
+            # Add models if they are not already added
+            if model_1 not in self.ratings:
+                self.add_model(model_1)
+            if model_2 not in self.ratings:
+                self.add_model(model_2)
+            # Record the match result
+            if winner == model_1:
+                self.record_match(model_1, model_2)
+            elif winner == model_2:
+                self.record_match(model_2, model_1)
+# # Example Usage
+# e = EloRank()
+# e.add_model('model_A')
+# e.add_model('model_B')
+# e.add_model('model_C')
+# e.record_match('model_A', 'model_B')
+# print(e.get_rating('model_A'))  # Should be greater than the initial rating
+# print(e.get_rating('model_B'))  # Should be less than the initial rating
+# print(e.get_top_models(2))  # Get the top 2 models
+# print(e.sample_next_match())  # Sample the next match based on ratings
+# # Process match records from a JSON file
+# e.process_match_records('match_records.json')
+# print(e.get_wins('model_A'))  # Get the number of wins for model_A

index.html ADDED Viewed

	@@ -0,0 +1,568 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Speech-to-Speech Model Comparison</title>
+    <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
+    <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
+    <style>
+        body {
+            background-color: #f4f6f9;
+            font-family: 'Arial', sans-serif;
+        }
+        .container {
+            background-color: white;
+            border-radius: 10px;
+            box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
+            padding: 30px;
+        }
+        h3 {
+            font-size: 1.2rem;
+            /* 调整标题字体大小 */
+            font-weight: bold;
+            color: #333;
+        }
+        .form-control {
+            border-radius: 25px;
+            padding: 15px;
+        }
+        .btn {
+            border-radius: 25px;
+            font-size: 0.9rem;
+            padding: 8px 16px;
+            transition: background-color 0.3s ease;
+        }
+        .btn-primary {
+            background-color: #007bff;
+            border: none;
+        }
+        .btn-primary:hover {
+            background-color: #0056b3;
+        }
+        .btn-success {
+            background-color: #28a745;
+            border: none;
+        }
+        .btn-success:hover {
+            background-color: #218838;
+        }
+        .btn-selected {
+            background-color: #155724 !important;
+            color: white !important;
+        }
+        .btn-option {
+            font-size: 0.9rem;
+            padding: 8px 20px;
+            margin: 0 10px;
+        }
+        #test-content {
+            display: none;
+        }
+        #category-select,
+        #task-select-dropdown {
+            width: 120% !important;
+            /* 设置宽度为60% */
+            margin: 0 auto;
+            /* 居中对齐 */
+        }
+        #confirm-choice,
+        #next-test {
+            display: none;
+            transition: opacity 0.3s ease;
+        }
+        #model-comparison {
+            display: none;
+            opacity: 0;
+            transition: opacity 0.3s ease;
+        }
+        #model-comparison.show {
+            opacity: 1;
+        }
+        #switch-task {
+            font-size: 0.8rem;
+            padding: 5px 10px;
+            position: absolute;
+            top: 10px;
+            right: 20px;
+            display: none;
+        }
+        #task-description {
+        display: none;
+        }
+    </style>
+</head>
+<body>
+    <div class="container py-5">
+        <h3 class="text-center mb-4">Speech-to-Speech Model Comparison</h3>
+        <div id="evaluation-info" class="mb-5">
+            <p class="text-start">
+                <strong>Welcome to the Speech-to-Speech (S2S) Model Evaluation!</strong>
+                <br><br>
+                In this evaluation, you will assess the performance of 4 S2S models:
+                <strong>ChatGPT-4o</strong>, <strong>FunAudioLLM</strong>, <strong>SpeechGPT</strong>, and
+                <strong>Mini-Omni</strong>.
+                The goal is to evaluate how well these models handle various speech tasks across different domains.
+                <br><br>
+                Once you select a specific domain and task (e.g., <em>Educational Tutoring</em> and <em>Rhythm Control</em>),
+                you will proceed to the evaluation stage. In each round, you will be presented with an audio input.
+                For example:
+                <br><br>
+                <!-- Left-aligned Audio Sample and Audio Control -->
+                <span style="vertical-align: middle; line-height: 1.2; display: inline-block;"><strong>Audio Sample:</strong></span>
+                <audio controls style="vertical-align: middle;">
+                    <source src="/static/audio/sample/input_audio.wav" type="audio/wav">
+                </audio>
+                <br><br>
+                The corresponding text is:
+                <em>"Say the following sentence at my speed first, then say it again very slowly:
+                    'Artificial intelligence is changing the world in many ways.'" </em>
+                <small>(Note: the audio plays at 1.5x the normal speed.)</small>
+                <br><br>
+                The responses of different S2S models will be provided, and your task is to choose which response best follows
+                the instructions. For example<small>(Note: During the evaluation process, you will be provided with responses from only the two models that have the most comparative significance.)</small>:
+                <br><br>
+                <!-- ChatGPT-4o Output -->
+                <span><strong>ChatGPT-4o:</strong></span>
+                <audio controls style="vertical-align: middle;">
+                    <source src="/static/audio/sample/4o_audio.wav" type="audio/wav">
+                </audio>
+                <p class="text-start" style="margin-left: 20px;">
+                    <strong>Performance:</strong> Speech: Partially followed the instruction on speed. Semantics: Accurately followed the instruction, with no semantic deviation or missing information.
+                </p>
+                <!-- FunAudioLLM Output -->
+                <span><strong>FunAudioLLM:</strong></span>
+                <audio controls style="vertical-align: middle;">
+                    <source src="/static/audio/sample/FunAudio_audio.wav" type="audio/wav">
+                </audio>
+                <p class="text-start" style="margin-left: 20px;">
+                    <strong>Performance:</strong> Speech: Partially followed the instruction on speed. Semantics: Accurately followed the instruction, with no semantic deviation or missing information.
+                </p>
+                <!-- SpeechGPT Output -->
+                <span><strong>SpeechGPT:</strong></span>
+                <audio controls style="vertical-align: middle;">
+                    <source src="/static/audio/sample/SpeechGPT.wav" type="audio/wav">
+                </audio>
+                <p class="text-start" style="margin-left: 20px;">
+                    <strong>Performance:</strong> Speech: Did not follow the instruction on speed. Semantics: Partially followed the instruction, with minor semantic deviation and missing information.
+                </p>
+                <!-- Mini-Omni Output -->
+                <span><strong>Mini-Omni:</strong></span>
+                <audio controls style="vertical-align: middle;">
+                    <source src="/static/audio/sample/mini-omni.wav" type="audio/wav">
+                </audio>
+                <p class="text-start" style="margin-left: 20px;">
+                    <strong>Performance:</strong> Speech: Did not follow the instruction on speed. Semantics: Did not follow the instruction, with significant semantic deviation and missing information.
+                </p>
+                <p class="text-start">
+                    After making your choice, you'll proceed to the next round.
+                </p>
+                <strong>Please enter your username and start the evaluation!</strong>
+            </p>
+        </div>
+        <div id="user-input" class="text-center">
+            <div class="mb-3">
+                <input type="text" id="username" class="form-control w-50 mx-auto" placeholder="Your username" />
+            </div>
+            <button class="btn btn-primary" onclick="startTest()">Start Test</button>
+        </div>
+        <div id="task-select" class="text-center" style="display: none;">
+            <h3 class="my-4">Select Test Category:</h3>
+            <div class="d-grid gap-2 col-6 mx-auto">
+                <!-- Category dropdown -->
+                <select id="category-select" class="form-select mx-auto" onchange="populateTasks()">
+                    <option value="" disabled selected>Select Category</option>
+                    <option value="educational">Educational Tutoring</option>
+                    <option value="social">Social Companionship</option>
+                    <option value="entertainment">Entertainment Dubbing</option>
+                    <option value="medical">Medical Consultation</option>
+                </select>
+            </div>
+            <h3 class="my-4" id="specific-task-title" style="display: none;">Select Specific Task:</h3>
+            <div class="d-grid gap-2 col-6 mx-auto">
+                <!-- Task dropdown -->
+                <select id="task-select-dropdown" class="form-select mx-auto" style="display: none;">
+                    <option value="" disabled selected>Select Specific Task</option>
+                    <!-- Options will be populated dynamically -->
+                </select>
+            </div>
+            <button class="btn btn-primary mt-4" id="start-task-btn" onclick="selectTaskFromDropdown()"
+                style="display: none;">Start Task</button>
+        </div>
+        <button id="switch-task" class="btn btn-warning" onclick="switchTask()">Switch Category and Tasks</button>
+        <div id="test-content">
+            <div class="text-center">
+                <div class="row justify-content-center">
+                    <div class="col-md-6 text-start double-text" style="margin-bottom: 10px;">
+                        <strong>Task description:</strong> <span id="task-description"></span>
+                    </div>
+                </div>
+                <!-- 在音频控件前添加粗体的 Audio: -->
+                <div class="row justify-content-center">
+                    <div class="col-md-6 d-flex justify-content-center align-items-center mb-4">
+                        <strong class="me-2">Audio:</strong> <!-- 加粗的 Audio 标签 -->
+                        <audio id="input-audio" controls></audio>
+                    </div>
+                </div>
+                <div class="row justify-content-center">
+                    <div class="col-md-6 text-start double-text" style="margin-bottom: 10px;">
+                        <strong>Audio text:</strong> <span id="test-text"></span>
+                    </div>
+                </div>
+                <!-- 调整后的左对齐样式 -->
+                <div class="row justify-content-center">
+                    <div class="col-md-6 text-start">
+                        <p><strong>Question:</strong> Which of the following two models answers the result better?</p>
+                    </div>
+                </div>
+                <!-- 使用flex布局放置音频 -->
+                <div class="mb-4 text-center">
+                    <div class="model-section d-flex align-items-center justify-content-center mb-3">
+                        <h6 class="me-2" style="margin-bottom: 0; margin-top: 5px; font-weight: bold;">Model A:</h6>
+                        <audio id="audio-a" controls></audio>
+                    </div>
+                    <div class="model-section d-flex align-items-center justify-content-center">
+                        <h6 class="me-2" style="margin-bottom: 0; margin-top: 5px; font-weight: bold;">Model B:</h6>
+                        <audio id="audio-b" controls></audio>
+                    </div>
+                </div>
+                <div class="d-flex justify-content-center mt-4">
+                    <button class="btn btn-success btn-option mx-2" onclick="selectModel('A')">Model A</button>
+                    <button class="btn btn-success btn-option mx-2" onclick="selectModel('B')">Model B</button>
+                </div>
+                <div id="model-comparison" class="text-center mt-4">
+                    <p>Model A: <span id="model-a"></span></p>
+                    <p>Model B: <span id="model-b"></span></p>
+                    <p>Your choice: <span id="chosen-model"></span></p>
+                </div>
+                <button id="confirm-choice" class="btn btn-primary mt-4" onclick="confirmChoice()">Confirm
+                    Selection</button>
+                <button id="next-test" class="btn btn-primary mt-4" onclick="loadNextTest()">Next Test</button>
+            </div>
+        </div>
+        <div id="test-completed" class="text-center" style="display: none;">
+            <h3>Thank you for completing the <span id="completed-task"></span> test!</h3>
+            <p>Would you like to test another category or task?</p>
+            <button class="btn btn-primary" onclick="switchTask()">Yes</button>
+            <button class="btn btn-secondary" onclick="endTest()">No</button>
+        </div>
+        <!-- 引入 Bootstrap 脚本 -->
+        <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
+        <script>
+            let username;
+            let task;
+            let chosenModel;
+            let modelA, modelB; // 存储映射后的模型名称
+            // 根据模型路径显示友好的模型名称
+            const modelNames = {
+                "output_path_speechgpt": "SpeechGPT",
+                "output_path_miniomni": "Mini-Omni",
+                "output_path_4o": "ChatGPT-4o",
+                "output_path_funaudio": "FunAudioLLM",
+                "output_path_4o_cascade": "Cascade",
+                "output_path_4o_llama_omni": "LLaMA-Omni"
+            };
+            function startTest() {
+                username = $("#username").val();
+                if (!username) {
+                    alert("Please enter a username");
+                    return;
+                }
+                $("#evaluation-info").hide();
+                $("#user-input").hide();
+                $("#task-select").show();
+            }
+            function switchTask() {
+                // 清理页面所有之前的测试内容
+                $("#task-description").text('');
+                $("#test-content").hide();       // 隐藏测试内容
+                $("#test-text").text('');        // 清空音频文本
+                $("#input-audio").attr("src", '');  // 清空音频路径
+                $("#audio-a").attr("src", '');      // 清空模型 A 音频
+                $("#audio-b").attr("src", '');      // 清空模型 B 音频
+                $("#chosen-model").text('');        // 清空用户选择的模型显示
+                $("#model-a").text('');             // 清空模型 A 名称显示
+                $("#model-b").text('');             // 清空模型 B 名称显示
+                $("#confirm-choice").hide();        // 隐藏确认选择按钮
+                $("#next-test").hide();             // 隐藏下一个测试按钮
+                $("#model-comparison").removeClass('show').hide();  // 隐藏模型对比部分
+                // 显示选择任务的页面
+                $("#test-completed").hide();  // 隐藏感谢页面
+                $("#task-select").show();     // 显示选择方向页面
+                $("#switch-task").hide();     // 隐藏切换按钮直到新测试开始
+            }
+            function selectTask(selectedTask) {
+                task = selectedTask;
+                // 切换任务时清理页面内容，防止旧数据残留
+                $("#task-description").text('');
+                $("#test-text").text('');        // 清空音频文本
+                $("#input-audio").attr("src", '');  // 清空音频路径
+                $("#audio-a").attr("src", '');      // 清空模型 A 音频
+                $("#audio-b").attr("src", '');      // 清空模型 B 音频
+                $("#chosen-model").text('');        // 清空用户选择的模型显示
+                $("#model-a").text('');             // 清空模型 A 名称显示
+                $("#model-b").text('');             // 清空模型 B 名称显示
+                $("#confirm-choice").hide();        // 隐藏确认选择按钮
+                $("#next-test").hide();             // 隐藏下一个测试按钮
+                $("#model-comparison").removeClass('show').hide();  // 隐藏模型对比部分
+                // 隐藏选择任务界面，显示切换方向按钮
+                $("#task-select").hide();
+                $("#switch-task").show();
+                // 发起请求获取新的测试数据
+                $.ajax({
+                    url: '/start_test',
+                    type: 'POST',
+                    contentType: 'application/json',
+                    data: JSON.stringify({ username: username, task: task }),
+                    success: function (data) {
+                        $("#test-content").show();
+                        loadNextTest();
+                    },
+                    error: function (xhr, status, error) {
+                        console.error("Error occurred: ", status, error);
+                    }
+                });
+            }
+            function populateTasks() {
+                const category = $("#category-select").val();
+                const taskDropdown = $("#task-select-dropdown");
+                // 清空任务下拉菜单
+                taskDropdown.empty();
+                // 添加禁用的默认选项
+                taskDropdown.append('<option value="" disabled selected>Select Specific Task</option>');
+                // 根据所选分类填充相应的任务选项
+                if (category === 'educational') {
+                    taskDropdown.append('<option value="pronunciation">Correcting pronunciation ability</option>');
+                    taskDropdown.append('<option value="rhythm">Rhythm control capabilities</option>');
+                    taskDropdown.append('<option value="translation">Cross-language translation with emotion</option>');
+                    taskDropdown.append('<option value="language">Language consistency</option>');
+                    taskDropdown.append('<option value="pause">Pause and segmentation</option>');
+                    taskDropdown.append('<option value="polyphone">Polyphonic word comprehension</option>');
+                    taskDropdown.append('<option value="stress">Emphasis control</option>');
+                } else if (category === 'social') {
+                    taskDropdown.append('<option value="emotion">Emotion recognition and expression</option>');
+                    taskDropdown.append('<option value="identity">Identity coping ability</option>');
+                    taskDropdown.append('<option value="humor">Implications ability</option>');
+                    taskDropdown.append('<option value="irony">Sarcasm detection</option>');
+                } else if (category === 'entertainment') {
+                    taskDropdown.append('<option value="natural">Ability to simulate natural sound</option>');
+                    taskDropdown.append('<option value="singing">Singing ability</option>');
+                    taskDropdown.append('<option value="tongue">Tongue twisters capabilities</option>');
+                    taskDropdown.append('<option value="crosstalk">Crosstalk ability</option>');
+                    taskDropdown.append('<option value="poetry">Poetry recitation</option>');
+                    taskDropdown.append('<option value="role">Role-playing</option>');
+                    taskDropdown.append('<option value="story">Storytelling</option>');
+                } else if (category === 'medical') {
+                    taskDropdown.append('<option value="healthcare">Health consultation</option>');
+                    taskDropdown.append('<option value="illness">Querying symptoms</option>');
+                    taskDropdown.append('<option value="psychological">Psychological comfort</option>');
+                }
+                // 显示任务下拉菜单和开始按钮
+                if (category) {
+                    $("#specific-task-title").show();
+                    $("#task-select-dropdown").show();
+                    $("#start-task-btn").show();
+                } else {
+                    $("#specific-task-title").hide();
+                    $("#task-select-dropdown").hide();
+                    $("#start-task-btn").hide();
+                }
+            }
+            function selectTaskFromDropdown() {
+                const selectedTask = $("#task-select-dropdown").val();
+                if (selectedTask) {
+                    task = selectedTask;
+                    $.ajax({
+                        url: '/start_test',
+                        type: 'POST',
+                        contentType: 'application/json',
+                        data: JSON.stringify({ username: username, task: task }),
+                        success: function (data) {
+                            // 在页面显示任务描述
+                            $("#task-description").text(data.task_description);
+                            $("#task-description").show();
+                            $("#task-select").hide();
+                            $("#test-content").show();
+                            $("#switch-task").show();  // 显示右上角的切换任务按钮
+                            loadNextTest();
+                        },
+                        error: function (xhr, status, error) {
+                            console.error("Error occurred: ", status, error);
+                        }
+                    });
+                } else {
+                    alert("Please select a specific task.");
+                }
+            }
+            function loadNextTest() {
+                $.get('/next_test', function (data) {
+                    if (data.message === 'Test completed') {
+                        $("#test-content").hide();  // 隐藏测试内容
+                        $("#test-completed").show(); // 显示测试完成的页面
+                        // 动态显示完成的任务名称
+                        $("#completed-task").text(task);
+                        // 清空 session 数据，防止继续原方向的测试
+                        sessionStorage.removeItem('current_index');
+                    } else {
+                        // 正常加载测试题目
+                        console.log(data);  // 添加调试信息，检查 data 的内容
+                        $("#task-description").text(data.task_description);
+                        $("#test-text").text(data.text);
+                        $("#input-audio").attr("src", data.input_path);
+                        $("#audio-a").attr("src", data.audio_a);
+                        $("#audio-b").attr("src", data.audio_b);
+                        // 更新模型信息
+                        modelA = modelNames[data.model_a];
+                        modelB = modelNames[data.model_b];
+                        $("#model-a").text(modelA);
+                        $("#model-b").text(modelB);
+                        $("#next-test").hide();
+                        $("#model-comparison").hide();
+                        $("#confirm-choice").show();
+                        chosenModel = null;
+                        $(".btn-option").prop('disabled', false);
+                        $(".btn-option").removeClass("btn-selected").addClass("btn-success");
+                    }
+                }, 'json').fail(function (xhr, status, error) {
+                    console.error("Failed to load test data:", status, error);
+                });
+            }
+            function endTest() {
+                // 用户选择结束测试，可以跳转到结束页面或者返回主页
+                alert("Thank you for participating in the test!");
+                // 或者可以通过 window.location.href 跳转到其他页面
+                window.location.href = "/thank_you";  // 你可以设置一个感谢页面或者其他动作
+            }
+            function selectModel(model) {
+                // 将用户选择的模型存储在变量 chosenModel 中
+                chosenModel = model;
+                // 禁用所有模型选择按钮，防止重复点击
+                $(".btn-option").prop('disabled', false); // 允许用户重新选择
+                // 重置所有按钮的样式
+                $(".btn-option").removeClass("btn-selected").addClass("btn-success");
+                // 根据用户选择的模型按钮，添加 btn-selected 类改变其样式
+                if (model === 'A') {
+                    $("button:contains('Model A')").removeClass("btn-success").addClass("btn-selected");
+                } else if (model === 'B') {
+                    $("button:contains('Model B')").removeClass("btn-success").addClass("btn-selected");
+                }
+            }
+            function confirmChoice() {
+                // 检查是否选择了模型，如果没有选择，则提示用户
+                if (!chosenModel) {
+                    alert("Please select a model before confirming.");
+                    return;
+                }
+                // 禁用模型选择按钮，防止用户在确认选择后更改选择
+                $(".btn-option").prop('disabled', true);
+                // 一旦用户确认选择，立刻显示选择的详细信息
+                if (chosenModel === 'A') {
+                    $("#chosen-model").text(modelA);  // 使用 modelA 映射后的名称
+                } else {
+                    $("#chosen-model").text(modelB);  // 使用 modelB 映射后的名称
+                }
+                // 将模型 A 和 B 的名称显示在同一行中，包括中文和英文翻译
+                $("#model-a").text(modelA);  // 模型 A 名称
+                $("#model-b").text(modelB);  // 模型 B 名称
+                // 确认选择后才显示模型详细信息
+                $("#model-comparison").addClass('show');
+                $("#model-comparison").show();  // 展示选择信息
+                // 隐藏确认按钮，显示“Next Test”按钮
+                $("#confirm-choice").hide();
+                $("#next-test").show();
+                // 提交用户选择的模型
+                $.ajax({
+                    url: '/submit_result',
+                    type: 'POST',
+                    contentType: 'application/json',
+                    data: JSON.stringify({ chosen_model: chosenModel }),
+                    success: function (data) {
+                        // 成功提交后处理逻辑
+                    },
+                    error: function (xhr, status, error) {
+                        console.error("Error occurred: ", status, error);
+                    }
+                });
+            }
+        </script>
+</body>
+</html>

requirements.txt ADDED Viewed

	@@ -0,0 +1,26 @@

+blinker==1.8.2
+click==8.1.7
+contourpy==1.3.0
+cycler==0.12.1
+Flask==3.0.3
+Flask-Cors==5.0.0
+fonttools==4.54.1
+importlib_metadata==8.5.0
+importlib_resources==6.4.5
+itsdangerous==2.2.0
+Jinja2==3.1.4
+kiwisolver==1.4.7
+MarkupSafe==3.0.2
+matplotlib==3.9.2
+numpy==2.1.2
+packaging==24.1
+pandas==2.2.3
+pillow==11.0.0
+pyparsing==3.2.0
+python-dateutil==2.9.0.post0
+pytz==2024.2
+seaborn==0.13.2
+six==1.16.0
+tzdata==2024.2
+Werkzeug==3.0.5
+zipp==3.20.2