/**
 * Step 12 - Token budget management: multi-tier thresholds, circuit breaker,
 *           tool result truncation, and output token optimization
 *
 * Goal:
 * - parameterize context window by model (+ env override)
 * - adaptive buffer scaling for small windows
 * - four-state warning system: normal → warning → error → blocking
 * - circuit breaker to stop retrying failed auto-compaction
 * - escape condition to prevent compaction-triggers-compaction loops
 * - truncate oversized tool results before they enter the message history
 * - split max_tokens into three tiers (daily / retry / compact)
 * - invalidate usage anchor after compaction to avoid stale estimates
 *
 * Builds on step11.js — token estimation and compaction primitives are imported.
 */

import {
  estimateMessagesTokens,
  tokenCountWithEstimation,
  microCompactMessages,
  compactMessages,
} from "./step11.js";

// ─── Model Context Window ──────────────────────────────────────────

const MODEL_CONTEXT_WINDOW_DEFAULT = 200_000;
const MAX_OUTPUT_TOKENS_FOR_SUMMARY = 20_000;

const MODEL_CONTEXT_WINDOWS = {
  "claude-opus-4-20250514": 200_000,
  "claude-sonnet-4-20250514": 200_000,
  "claude-3-5-sonnet-20241022": 200_000,
};

export function getContextWindowForModel(model) {
  const envOverride = process.env.CLAUDE_CODE_MAX_CONTEXT_TOKENS;
  if (envOverride) {
    const parsed = parseInt(envOverride, 10);
    if (!isNaN(parsed) && parsed > 0) return parsed;
  }
  return MODEL_CONTEXT_WINDOWS[model] ?? MODEL_CONTEXT_WINDOW_DEFAULT;
}

/**
 * Effective window = context window minus space reserved for summary output.
 * For small windows (<100K) use 20% instead of a fixed 20K to avoid
 * the reserved portion exceeding the window itself.
 */
export function getEffectiveContextWindowSize(model) {
  const contextWindow = getContextWindowForModel(model);
  const reserved = Math.min(MAX_OUTPUT_TOKENS_FOR_SUMMARY, Math.floor(contextWindow * 0.2));
  return contextWindow - reserved;
}

// ─── Adaptive Buffer Scaling ───────────────────────────────────────

const AUTOCOMPACT_BUFFER_TOKENS = 13_000;
const WARNING_THRESHOLD_BUFFER_TOKENS = 20_000;
const MANUAL_COMPACT_BUFFER_TOKENS = 3_000;

const REFERENCE_WINDOW = 180_000;

/**
 * When effective window < 180K, scale the buffer proportionally.
 * A 30K window gets roughly 30/180 ≈ 17% of the original buffer,
 * keeping the trigger ratio consistent across window sizes.
 */
function scaleBuffer(buffer, effectiveWindow) {
  if (effectiveWindow >= REFERENCE_WINDOW) return buffer;
  return Math.round(buffer * (effectiveWindow / REFERENCE_WINDOW));
}

function getAutoCompactThreshold(model) {
  const effective = getEffectiveContextWindowSize(model);
  return Math.max(0, effective - scaleBuffer(AUTOCOMPACT_BUFFER_TOKENS, effective));
}

function getBlockingLimit(model) {
  const effective = getEffectiveContextWindowSize(model);
  return Math.max(0, effective - scaleBuffer(MANUAL_COMPACT_BUFFER_TOKENS, effective));
}

function getWarningThreshold(model) {
  const effective = getEffectiveContextWindowSize(model);
  return Math.max(0, effective - scaleBuffer(WARNING_THRESHOLD_BUFFER_TOKENS, effective));
}

// ─── Four-State Warning System ─────────────────────────────────────

export function calculateTokenWarningState(estimatedTokens, model) {
  const contextWindow = getContextWindowForModel(model);
  const blockingLimit = getBlockingLimit(model);
  const autoCompactThreshold = getAutoCompactThreshold(model);
  const warningThreshold = getWarningThreshold(model);

  let state = "normal";
  if (estimatedTokens >= blockingLimit) {
    state = "blocking";
  } else if (estimatedTokens >= autoCompactThreshold) {
    state = "error";
  } else if (estimatedTokens >= warningThreshold) {
    state = "warning";
  }

  return { state, estimatedTokens, threshold: autoCompactThreshold, blockingLimit, contextWindow };
}

// ─── Circuit Breaker ───────────────────────────────────────────────

const MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES = 3;

let consecutiveAutoCompactFailures = 0;

export function resetAutoCompactFailures() {
  consecutiveAutoCompactFailures = 0;
}

/**
 * Decide whether auto-compaction should fire.
 * Returns false when:
 *  1. The request itself is a compaction call (escape condition)
 *  2. Circuit breaker is open (too many consecutive failures)
 *  3. Token usage is below the threshold
 */
export function shouldAutoCompact(estimatedTokens, model, querySource) {
  if (querySource === "compact" || querySource === "session_memory") {
    return false;
  }
  if (consecutiveAutoCompactFailures >= MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES) {
    return false;
  }
  return estimatedTokens >= getAutoCompactThreshold(model);
}

export async function autoCompactIfNeeded(messages, model, callModel, options = {}) {
  const estimatedTokens = tokenCountWithEstimation(messages, options);

  if (!shouldAutoCompact(estimatedTokens, model, options.querySource)) {
    return { result: { messages, didCompact: false, didMicroCompact: false }, didAutoCompact: false };
  }

  try {
    const result = await compactMessages(messages, callModel, { ...options, force: true });
    consecutiveAutoCompactFailures = 0;
    return { result, didAutoCompact: result.didCompact };
  } catch {
    consecutiveAutoCompactFailures++;
    return { result: { messages, didCompact: false, didMicroCompact: false }, didAutoCompact: false };
  }
}

// ─── Tool Result Truncation ────────────────────────────────────────

const DEFAULT_MAX_RESULT_SIZE_CHARS = 100_000;

export function truncateToolResult(content, maxChars = DEFAULT_MAX_RESULT_SIZE_CHARS) {
  if (content.length <= maxChars) return content;
  const truncated = content.slice(0, maxChars);
  return `${truncated}\n\n[Output truncated: ${content.length} chars total, showing first ${maxChars}]`;
}

// ─── Output Token Tiers ────────────────────────────────────────────

export const CAPPED_DEFAULT_MAX_TOKENS = 8_000;
export const ESCALATED_MAX_TOKENS = 64_000;
export const COMPACT_MAX_OUTPUT_TOKENS = 20_000;

/**
 * Simulates the truncation-recovery flow:
 * 1. Send request with 8K max_tokens
 * 2. If response is truncated (stopReason === "max_tokens"), retry with 64K
 */
export async function streamMessageWithRetry(callModel, messages, options = {}) {
  const maxTokens = options.maxTokens ?? CAPPED_DEFAULT_MAX_TOKENS;
  const result = await callModel(messages, { maxTokens });

  if (result.stopReason === "max_tokens" && maxTokens < ESCALATED_MAX_TOKENS) {
    return callModel(messages, { maxTokens: ESCALATED_MAX_TOKENS });
  }

  return result;
}

// ─── Usage Anchor Invalidation ─────────────────────────────────────

/**
 * Manages the usage anchor lifecycle.
 * After compaction the message array is restructured; the old anchor
 * index and usage become stale. Failing to invalidate causes
 * tokenCountWithEstimation to return pre-compaction values, which
 * triggers an immediate re-compaction loop.
 */
export class UsageAnchor {
  constructor() {
    this.index = -1;
    this.usage = null;
  }

  update(index, usage) {
    this.index = index;
    this.usage = usage;
  }

  invalidate() {
    this.index = -1;
    this.usage = null;
  }

  getEstimationOptions() {
    if (this.index < 0 || !this.usage) return {};
    return { usage: this.usage, usageAnchorIndex: this.index };
  }
}

// ─── MicroCompact Enhancements (v2) ───────────────────────────────

const COMPACTABLE_TOOLS_V2 = new Set(["Read", "Grep", "Glob", "Bash", "Edit", "Write"]);

/**
 * Detect binary-only content blocks (images, documents) in tool results
 * and replace them with a lightweight placeholder.
 */
function microCompactToolResultContent(content) {
  if (Array.isArray(content)) {
    const hasOnlyBinary = content.every(
      (b) => b.type === "image" || b.type === "document",
    );
    if (hasOnlyBinary) return "[image]";
  }
  return null;
}

export function microCompactMessageV2(message) {
  if (!Array.isArray(message.content)) return { message, cleared: false };

  let cleared = false;
  const content = message.content.map((block) => {
    if (block.type !== "tool_result") return block;

    // Handle binary content blocks (image, document)
    const binaryReplacement = microCompactToolResultContent(block.content);
    if (binaryReplacement) {
      cleared = true;
      return { ...block, content: binaryReplacement };
    }

    if (typeof block.content !== "string") return block;

    const toolName = block.content.match(/^([A-Za-z0-9_-]+):/)?.[1];
    if (!toolName || !COMPACTABLE_TOOLS_V2.has(toolName)) return block;

    cleared = true;
    return { ...block, content: "[Old tool result content cleared]" };
  });

  return { message: { ...message, content }, cleared };
}

// ─── Compact Message Filtering (UI) ───────────────────────────────

export function isCompactMessage(message) {
  const content = typeof message.content === "string" ? message.content : "";
  return (
    content.startsWith("[CompactBoundary]") ||
    content.startsWith("This session is being continued from a previous conversation")
  );
}

// ─── Demo ──────────────────────────────────────────────────────────

function main() {
  const model = "claude-sonnet-4-20250514";

  console.log("=== Model Context Window ===");
  console.log(`Default window: ${getContextWindowForModel(model)}`);
  console.log(`Effective window: ${getEffectiveContextWindowSize(model)}`);

  console.log("\n=== Threshold System (200K window) ===");
  console.log(`  Warning threshold:     ${getWarningThreshold(model)}`);
  console.log(`  AutoCompact threshold: ${getAutoCompactThreshold(model)}`);
  console.log(`  Blocking limit:        ${getBlockingLimit(model)}`);

  // Simulate a small window via manual calculation
  const smallModel = "test-small";
  process.env.CLAUDE_CODE_MAX_CONTEXT_TOKENS = "30000";
  console.log("\n=== Threshold System (30K window, env override) ===");
  console.log(`  Context window:        ${getContextWindowForModel(smallModel)}`);
  console.log(`  Effective window:      ${getEffectiveContextWindowSize(smallModel)}`);
  console.log(`  Warning threshold:     ${getWarningThreshold(smallModel)}`);
  console.log(`  AutoCompact threshold: ${getAutoCompactThreshold(smallModel)}`);
  console.log(`  Blocking limit:        ${getBlockingLimit(smallModel)}`);
  delete process.env.CLAUDE_CODE_MAX_CONTEXT_TOKENS;

  console.log("\n=== Warning State Transitions ===");
  const thresholds = [150_000, 162_000, 170_000, 179_000];
  for (const tokens of thresholds) {
    const result = calculateTokenWarningState(tokens, model);
    const pct = Math.round((tokens / result.contextWindow) * 100);
    console.log(`  ${tokens} tokens (${pct}%) → ${result.state}`);
  }

  console.log("\n=== Circuit Breaker ===");
  resetAutoCompactFailures();
  console.log(`  Should compact at 170K: ${shouldAutoCompact(170_000, model)}`);
  // Simulate 3 failures
  for (let i = 0; i < 3; i++) consecutiveAutoCompactFailures++;
  console.log(`  After 3 failures:      ${shouldAutoCompact(170_000, model)} (circuit open)`);
  resetAutoCompactFailures();
  console.log(`  After reset:           ${shouldAutoCompact(170_000, model)}`);

  console.log("\n=== Escape Condition ===");
  console.log(`  querySource="compact": ${shouldAutoCompact(170_000, model, "compact")}`);
  console.log(`  querySource=undefined: ${shouldAutoCompact(170_000, model)}`);

  console.log("\n=== Tool Result Truncation ===");
  const longOutput = "x".repeat(200_000);
  const truncated = truncateToolResult(longOutput);
  console.log(`  Input:  ${longOutput.length} chars`);
  console.log(`  Output: ${truncated.length} chars`);
  console.log(`  Ends with: ...${truncated.slice(-60)}`);

  console.log("\n=== Output Token Tiers ===");
  console.log(`  Daily:   ${CAPPED_DEFAULT_MAX_TOKENS}`);
  console.log(`  Retry:   ${ESCALATED_MAX_TOKENS}`);
  console.log(`  Compact: ${COMPACT_MAX_OUTPUT_TOKENS}`);

  console.log("\n=== Usage Anchor Invalidation ===");
  const anchor = new UsageAnchor();
  anchor.update(15, { input_tokens: 50000, output_tokens: 2000 });
  console.log(`  After update: index=${anchor.index}`, anchor.getEstimationOptions());
  anchor.invalidate();
  console.log(`  After invalidate: index=${anchor.index}`, anchor.getEstimationOptions());

  console.log("\n=== MicroCompact V2: binary content ===");
  const msgWithImage = {
    role: "user",
    content: [
      { type: "tool_result", tool_use_id: "t1", content: [{ type: "image", source: "..." }] },
    ],
  };
  const { message: compacted, cleared } = microCompactMessageV2(msgWithImage);
  console.log(`  Cleared: ${cleared}`);
  console.log(`  Result:  ${JSON.stringify(compacted.content[0].content)}`);

  console.log("\n=== Compact Message Filter ===");
  console.log(`  Boundary:  ${isCompactMessage({ content: "[CompactBoundary] type=auto" })}`);
  console.log(`  Summary:   ${isCompactMessage({ content: "This session is being continued from a previous conversation..." })}`);
  console.log(`  Normal:    ${isCompactMessage({ content: "Hello world" })}`);
}

main();