github · pelikhan · Apr 23, 2026 · Apr 23, 2026 · Apr 23, 2026 · Apr 23, 2026
diff --git a/actions/setup/js/sanitize_content.test.cjs b/actions/setup/js/sanitize_content.test.cjs
@@ -1745,6 +1745,56 @@ describe("sanitize_content.cjs", () => {
       });
     });
 
+    describe("Unicode Tag Characters removal (U+E0000–U+E007F, Plane 14)", () => {
+      it("should strip a single Tag Characters codepoint (U+E0041 = TAG LATIN CAPITAL LETTER A)", () => {
+        // \uDB40\uDC41 is the surrogate pair for U+E0041
+        const input = "Hello\uDB40\uDC41World";
+        expect(sanitizeContent(input)).toBe("HelloWorld");
+      });
+
+      it("should strip LANGUAGE TAG (U+E0001) at the boundary of the Tag block", () => {
+        // \uDB40\uDC01 is the surrogate pair for U+E0001
+        const input = "test\uDB40\uDC01";
+        expect(sanitizeContent(input)).toBe("test");
+      });
+
+      it("should strip CANCEL TAG (U+E007F) at the upper boundary of the Tag block", () => {
+        // \uDB40\uDC7F is the surrogate pair for U+E007F
+        const input = "\uDB40\uDC7Ftest";
+        expect(sanitizeContent(input)).toBe("test");
+      });
+
+      it("should strip a full ASCII string encoded in Tag Characters — invisible payload attack", () => {
+        // Encode "SECRET" using Tag Characters: each ASCII char C -> U+E0000+C
+        // S=0x53, E=0x45, C=0x43, R=0x52, E=0x45, T=0x54
+        const tagS = "\uDB40\uDC53";
+        const tagE = "\uDB40\uDC45";
+        const tagC = "\uDB40\uDC43";
+        const tagR = "\uDB40\uDC52";
+        const tagT = "\uDB40\uDC54";
+        const encoded = tagS + tagE + tagC + tagR + tagE + tagT;
+        expect(sanitizeContent(encoded)).toBe("");
+      });
+
+      it("should strip Tag Characters mixed with normal ASCII text", () => {
+        // Tag-encoded 'A' (U+E0041) interspersed with normal letters
+        const input = "a\uDB40\uDC41b\uDB40\uDC42c";
+        expect(sanitizeContent(input)).toBe("abc");
+      });
+
+      it("should strip multiple adjacent Tag Characters", () => {
+        // TAG LATIN CAPITAL LETTER A through D (U+E0041–U+E0044)
+        const input = "\uDB40\uDC41\uDB40\uDC42\uDB40\uDC43\uDB40\uDC44";
+        expect(sanitizeContent(input)).toBe("");
+      });
+
+      it("should neutralize @mention bypass using Tag Characters between @ and username", () => {
+        // Inserting a Tag Character between @ and username to bypass mention detection
+        const input = "@\uDB40\uDC41admin please review";
+        expect(sanitizeContent(input)).toBe("`@admin` please review");
+      });
+    });
+
     describe("@mention bypass prevention via invisible characters", () => {
       it("should neutralize @mention with U+200F (RTL mark) inserted between @ and username", () => {
         const input = "@\u200Fadmin please review";

diff --git a/actions/setup/js/sanitize_content_core.cjs b/actions/setup/js/sanitize_content_core.cjs
@@ -1088,6 +1088,14 @@ function hardenUnicodeText(text) {
   // word joiner, and byte order mark
   result = result.replace(/[\u00AD\u034F\u200B\u200C\u200D\u200E\u200F\u2060\uFEFF]/g, "");
 
+  // Step 3b: Strip Unicode Tag Characters block (U+E0000–U+E007F, Plane 14).
+  // These 128 Cf-category codepoints have exact 1:1 ASCII equivalents
+  // (e.g. U+E0041 = TAG LATIN CAPITAL LETTER A) and are completely invisible
+  // in all standard renderers including GitHub Markdown, enabling fully
+  // invisible prompt-injection payloads that decode 1:1 to ASCII content.
+  // Represented as surrogate pairs \uDB40\uDC00–\uDB40\uDC7F in JavaScript.
+  result = result.replace(/\uDB40[\uDC00-\uDC7F]/g, "");
+
   // Step 4: Remove bidirectional text override controls
   // These can be used to reverse text direction and create visual spoofs
   result = result.replace(/[\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069]/g, "");