fischer-agentkit/scripts/check-no-emoji.sh

61 lines
2.6 KiB
Bash
Executable File

#!/usr/bin/env bash
# =============================================================================
# check-no-emoji.sh — Scan source code for emoji and emoji-like characters
#
# Detects two patterns:
# 1. Literal emoji/glyph characters (Unicode ranges)
# 2. Escaped unicode sequences in string literals (\u2713, \u21bb, etc.)
#
# Exits 0 if clean, 1 if violations found.
# =============================================================================
set -euo pipefail
PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$PROJECT_ROOT"
# Scan paths (exclude docs, markdown, and vendored files)
SCAN_PATHS="src/ configs/ tests/ scripts/"
# Unicode ranges: Emoji, Misc Symbols/Dingbats, Misc Technical, Geometric Shapes (excl. Box Drawings), Misc Symbols/Arrows
# Excluded: Arrows (2190-21FF) — "→" is a pervasive docstring flow indicator, not emoji
# Excluded: Box Drawings (2500-257F) — "─" is a pervasive comment section separator, not emoji
# ponytail: narrower ranges than the plan's U5 spec; plan scope says "注释、文档除外" and these two ranges
# exist almost exclusively in comments/docstrings. Upgrade path: add a comment-aware filter.
LITERAL_PATTERN='[\x{1F000}-\x{1FFFF}\x{2600}-\x{27BF}\x{2300}-\x{23FF}\x{25A0}-\x{25FF}\x{2B00}-\x{2BFF}]'
# Escaped sequences: \u2713, \u2717, \u25c6, etc. (narrowed to match LITERAL_PATTERN ranges only)
# ponytail: original plan spec used 2[0-5][0-9a-fA-F]{2} which matched \u2000-\u25FF (punctuation, math, box drawing)
# causing false positives in minified JS bundles. Narrowed to emoji-like ranges only.
ESCAPE_PATTERN='\\u(271[0-9a-fA-F]|26[0-9a-fA-F]{2}|23[0-9a-fA-F]{2}|25[a-fA-F][0-9a-fA-F]|2[b-fB-F][0-9a-fA-F]{2})'
VIOLATIONS=0
# Check literal emoji characters (exclude minified bundles, test reports, and this script)
LITERAL_HITS=$(rg -n --no-heading -P "$LITERAL_PATTERN" \
-g '!**/static/assets/**' -g '!**/playwright-report/**' -g '!check-no-emoji.sh' \
$SCAN_PATHS 2>/dev/null || true)
if [[ -n "$LITERAL_HITS" ]]; then
echo "[FAIL] Literal emoji/glyph characters found:"
echo "$LITERAL_HITS"
echo ""
VIOLATIONS=1
fi
# Check escaped unicode sequences (same exclusions)
ESCAPE_HITS=$(rg -n --no-heading -P "$ESCAPE_PATTERN" \
-g '!**/static/assets/**' -g '!**/playwright-report/**' -g '!check-no-emoji.sh' \
$SCAN_PATHS 2>/dev/null || true)
if [[ -n "$ESCAPE_HITS" ]]; then
echo "[FAIL] Escaped unicode emoji sequences found:"
echo "$ESCAPE_HITS"
echo ""
VIOLATIONS=1
fi
if [[ $VIOLATIONS -eq 0 ]]; then
echo "[OK] No emoji or emoji-like characters found in $SCAN_PATHS"
exit 0
else
exit 1
fi