diff --git a/backend/routes/projects.py b/backend/routes/projects.py
index e70785c..a984739 100644
--- a/backend/routes/projects.py
+++ b/backend/routes/projects.py
@@ -169,9 +169,10 @@ def update_project(project_id):
 @bp.route("/api/projects/<project_id>", methods=["DELETE"])
 def delete_project(project_id):
     """Delete a project"""
+    user = g.current_user
     project = Project.query.get(project_id)
     
-    if not project:
+    if not project or project.user_id != user.id:
         return err(404, "Project not found")
     
     # Delete project directory
diff --git a/backend/routes/stats.py b/backend/routes/stats.py
index 47a7f9c..4bedc6d 100644
--- a/backend/routes/stats.py
+++ b/backend/routes/stats.py
@@ -73,6 +73,15 @@ def _build_period_result(stats, period, start_date, end_date, days):
         if d not in daily_data:
             daily_data[d] = {"prompt": 0, "completion": 0, "total": 0}
     
+    # Aggregate by model
+    by_model = {}
+    for s in stats:
+        if s.model not in by_model:
+            by_model[s.model] = {"prompt": 0, "completion": 0, "total": 0}
+        by_model[s.model]["prompt"] += s.prompt_tokens
+        by_model[s.model]["completion"] += s.completion_tokens
+        by_model[s.model]["total"] += s.total_tokens
+
     return {
         "period": period,
         "start_date": start_date.isoformat(),
@@ -80,5 +89,6 @@ def _build_period_result(stats, period, start_date, end_date, days):
         "prompt_tokens": sum(s.prompt_tokens for s in stats),
         "completion_tokens": sum(s.completion_tokens for s in stats),
         "total_tokens": sum(s.total_tokens for s in stats),
-        "daily": daily_data
+        "daily": daily_data,
+        "by_model": by_model,
     }
diff --git a/backend/services/chat.py b/backend/services/chat.py
index 4c0af58..858e5ba 100644
--- a/backend/services/chat.py
+++ b/backend/services/chat.py
@@ -55,6 +55,8 @@ class ChatService:
             all_tool_results = []
             all_steps = []      # Collect all ordered steps for DB storage (thinking/text/tool_call/tool_result)
             step_index = 0  # Track global step index for ordering
+            total_completion_tokens = 0  # Accumulated across all iterations
+            total_prompt_tokens = 0      # Accumulated across all iterations
 
             for iteration in range(self.MAX_ITERATIONS):
                 full_content = ""
@@ -96,7 +98,17 @@ class ChatService:
                         except json.JSONDecodeError:
                             continue
 
-                        delta = chunk["choices"][0].get("delta", {})
+                        # Extract usage first (present in last chunk when stream_options is set)
+                        usage = chunk.get("usage", {})
+                        if usage:
+                            token_count = usage.get("completion_tokens", 0)
+                            prompt_tokens = usage.get("prompt_tokens", 0)
+
+                        choices = chunk.get("choices", [])
+                        if not choices:
+                            continue
+
+                        delta = choices[0].get("delta", {})
 
                         # Accumulate thinking content for this iteration
                         reasoning = delta.get("reasoning_content", "")
@@ -112,11 +124,6 @@ class ChatService:
                         # Accumulate tool calls from streaming deltas
                         tool_calls_list = self._process_tool_calls_delta(delta, tool_calls_list)
 
-                        usage = chunk.get("usage", {})
-                        if usage:
-                            token_count = usage.get("completion_tokens", 0)
-                            prompt_tokens = usage.get("prompt_tokens", 0)
-
                 except Exception as e:
                     yield f"event: error\ndata: {json.dumps({'content': str(e)}, ensure_ascii=False)}\n\n"
                     return
@@ -204,6 +211,8 @@ class ChatService:
                     })
                     messages.extend(tool_results)
                     all_tool_results.extend(tool_results)
+                    total_prompt_tokens += prompt_tokens
+                    total_completion_tokens += token_count
                     continue
 
                 # --- No tool calls: final iteration — emit remaining steps and save ---
@@ -230,6 +239,8 @@ class ChatService:
                     step_index += 1
 
                 suggested_title = None
+                total_prompt_tokens += prompt_tokens
+                total_completion_tokens += token_count
                 with app.app_context():
                     # Build content JSON with ordered steps array for DB storage.
                     # 'steps' is the single source of truth for rendering order.
@@ -246,17 +257,19 @@ class ChatService:
                         conversation_id=conv_id,
                         role="assistant",
                         content=json.dumps(content_json, ensure_ascii=False),
-                        token_count=token_count,
+                        token_count=total_completion_tokens,
                     )
                     db.session.add(msg)
                     db.session.commit()
 
-                    user = g.get("current_user")
-                    if user:
-                        record_token_usage(user.id, conv_model, prompt_tokens, token_count)
-
                     # Auto-generate title from first user message if needed
                     conv = db.session.get(Conversation, conv_id)
+
+                    # Record token usage (get user_id from conv, not g —
+                    # app.app_context() creates a new context where g.current_user is lost)
+                    if conv:
+                        record_token_usage(conv.user_id, conv_model, total_prompt_tokens, total_completion_tokens)
+
                     if conv and (not conv.title or conv.title == "新对话"):
                         user_msg = Message.query.filter_by(
                             conversation_id=conv_id, role="user"
@@ -277,7 +290,7 @@ class ChatService:
                         else:
                             suggested_title = None
 
-                yield f"event: done\ndata: {json.dumps({'message_id': msg_id, 'token_count': token_count, 'suggested_title': suggested_title}, ensure_ascii=False)}\n\n"
+                yield f"event: done\ndata: {json.dumps({'message_id': msg_id, 'token_count': total_completion_tokens, 'suggested_title': suggested_title}, ensure_ascii=False)}\n\n"
                 return
             
             yield f"event: error\ndata: {json.dumps({'content': 'exceeded maximum tool call iterations'}, ensure_ascii=False)}\n\n"
diff --git a/backend/services/glm_client.py b/backend/services/glm_client.py
index f1c0a22..e094140 100644
--- a/backend/services/glm_client.py
+++ b/backend/services/glm_client.py
@@ -45,6 +45,7 @@ class GLMClient:
             body["tool_choice"] = "auto"
         if stream:
             body["stream"] = True
+            body["stream_options"] = {"include_usage": True}
 
         return requests.post(
             api_url,