61 lines
2.6 KiB
Bash
Executable File
61 lines
2.6 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# =============================================================================
|
|
# check-no-emoji.sh — Scan source code for emoji and emoji-like characters
|
|
#
|
|
# Detects two patterns:
|
|
# 1. Literal emoji/glyph characters (Unicode ranges)
|
|
# 2. Escaped unicode sequences in string literals (\u2713, \u21bb, etc.)
|
|
#
|
|
# Exits 0 if clean, 1 if violations found.
|
|
# =============================================================================
|
|
set -euo pipefail
|
|
|
|
PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
cd "$PROJECT_ROOT"
|
|
|
|
# Scan paths (exclude docs, markdown, and vendored files)
|
|
SCAN_PATHS="src/ configs/ tests/ scripts/"
|
|
|
|
# Unicode ranges: Emoji, Misc Symbols/Dingbats, Misc Technical, Geometric Shapes (excl. Box Drawings), Misc Symbols/Arrows
|
|
# Excluded: Arrows (2190-21FF) — "→" is a pervasive docstring flow indicator, not emoji
|
|
# Excluded: Box Drawings (2500-257F) — "─" is a pervasive comment section separator, not emoji
|
|
# ponytail: narrower ranges than the plan's U5 spec; plan scope says "注释、文档除外" and these two ranges
|
|
# exist almost exclusively in comments/docstrings. Upgrade path: add a comment-aware filter.
|
|
LITERAL_PATTERN='[\x{1F000}-\x{1FFFF}\x{2600}-\x{27BF}\x{2300}-\x{23FF}\x{25A0}-\x{25FF}\x{2B00}-\x{2BFF}]'
|
|
|
|
# Escaped sequences: \u2713, \u2717, \u25c6, etc. (narrowed to match LITERAL_PATTERN ranges only)
|
|
# ponytail: original plan spec used 2[0-5][0-9a-fA-F]{2} which matched \u2000-\u25FF (punctuation, math, box drawing)
|
|
# causing false positives in minified JS bundles. Narrowed to emoji-like ranges only.
|
|
ESCAPE_PATTERN='\\u(271[0-9a-fA-F]|26[0-9a-fA-F]{2}|23[0-9a-fA-F]{2}|25[a-fA-F][0-9a-fA-F]|2[b-fB-F][0-9a-fA-F]{2})'
|
|
|
|
VIOLATIONS=0
|
|
|
|
# Check literal emoji characters (exclude minified bundles, test reports, and this script)
|
|
LITERAL_HITS=$(rg -n --no-heading -P "$LITERAL_PATTERN" \
|
|
-g '!**/static/assets/**' -g '!**/playwright-report/**' -g '!check-no-emoji.sh' \
|
|
$SCAN_PATHS 2>/dev/null || true)
|
|
if [[ -n "$LITERAL_HITS" ]]; then
|
|
echo "[FAIL] Literal emoji/glyph characters found:"
|
|
echo "$LITERAL_HITS"
|
|
echo ""
|
|
VIOLATIONS=1
|
|
fi
|
|
|
|
# Check escaped unicode sequences (same exclusions)
|
|
ESCAPE_HITS=$(rg -n --no-heading -P "$ESCAPE_PATTERN" \
|
|
-g '!**/static/assets/**' -g '!**/playwright-report/**' -g '!check-no-emoji.sh' \
|
|
$SCAN_PATHS 2>/dev/null || true)
|
|
if [[ -n "$ESCAPE_HITS" ]]; then
|
|
echo "[FAIL] Escaped unicode emoji sequences found:"
|
|
echo "$ESCAPE_HITS"
|
|
echo ""
|
|
VIOLATIONS=1
|
|
fi
|
|
|
|
if [[ $VIOLATIONS -eq 0 ]]; then
|
|
echo "[OK] No emoji or emoji-like characters found in $SCAN_PATHS"
|
|
exit 0
|
|
else
|
|
exit 1
|
|
fi
|