#!/usr/bin/env bash # ============================================================================= # check-no-emoji.sh — Scan source code for emoji and emoji-like characters # # Detects two patterns: # 1. Literal emoji/glyph characters (Unicode ranges) # 2. Escaped unicode sequences in string literals (\u2713, \u21bb, etc.) # # Exits 0 if clean, 1 if violations found. # ============================================================================= set -euo pipefail PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" cd "$PROJECT_ROOT" # Scan paths (exclude docs, markdown, and vendored files) SCAN_PATHS="src/ configs/ tests/ scripts/" # Unicode ranges: Emoji, Misc Symbols/Dingbats, Misc Technical, Geometric Shapes (excl. Box Drawings), Misc Symbols/Arrows # Excluded: Arrows (2190-21FF) — "→" is a pervasive docstring flow indicator, not emoji # Excluded: Box Drawings (2500-257F) — "─" is a pervasive comment section separator, not emoji # ponytail: narrower ranges than the plan's U5 spec; plan scope says "注释、文档除外" and these two ranges # exist almost exclusively in comments/docstrings. Upgrade path: add a comment-aware filter. LITERAL_PATTERN='[\x{1F000}-\x{1FFFF}\x{2600}-\x{27BF}\x{2300}-\x{23FF}\x{25A0}-\x{25FF}\x{2B00}-\x{2BFF}]' # Escaped sequences: \u2713, \u2717, \u25c6, etc. (narrowed to match LITERAL_PATTERN ranges only) # ponytail: original plan spec used 2[0-5][0-9a-fA-F]{2} which matched \u2000-\u25FF (punctuation, math, box drawing) # causing false positives in minified JS bundles. Narrowed to emoji-like ranges only. ESCAPE_PATTERN='\\u(271[0-9a-fA-F]|26[0-9a-fA-F]{2}|23[0-9a-fA-F]{2}|25[a-fA-F][0-9a-fA-F]|2[b-fB-F][0-9a-fA-F]{2})' VIOLATIONS=0 # Check literal emoji characters (exclude minified bundles, test reports, and this script) LITERAL_HITS=$(rg -n --no-heading -P "$LITERAL_PATTERN" \ -g '!**/static/assets/**' -g '!**/playwright-report/**' -g '!check-no-emoji.sh' \ $SCAN_PATHS 2>/dev/null || true) if [[ -n "$LITERAL_HITS" ]]; then echo "[FAIL] Literal emoji/glyph characters found:" echo "$LITERAL_HITS" echo "" VIOLATIONS=1 fi # Check escaped unicode sequences (same exclusions) ESCAPE_HITS=$(rg -n --no-heading -P "$ESCAPE_PATTERN" \ -g '!**/static/assets/**' -g '!**/playwright-report/**' -g '!check-no-emoji.sh' \ $SCAN_PATHS 2>/dev/null || true) if [[ -n "$ESCAPE_HITS" ]]; then echo "[FAIL] Escaped unicode emoji sequences found:" echo "$ESCAPE_HITS" echo "" VIOLATIONS=1 fi if [[ $VIOLATIONS -eq 0 ]]; then echo "[OK] No emoji or emoji-like characters found in $SCAN_PATHS" exit 0 else exit 1 fi