166 lines
7.1 KiB
JavaScript
166 lines
7.1 KiB
JavaScript
// ==FINGERPRINT (drop-in utility) ==
|
|
// Purpose: Generate a strong, stable fingerprint string for an assistant message element.
|
|
//
|
|
// This utility is designed to uniquely identify assistant messages across dynamic UIs
|
|
// (ChatGPT, Claude, Gemini). It uses multiple content- and position-aware components:
|
|
// - ch: content hash of the most "command-like" text in the element (prefers fenced code)
|
|
// - ph: hash of the immediately preceding assistant messages' trailing text (context)
|
|
// - ih: hash of the text right before the first @bridge@...@end@ block (intra-message prefix)
|
|
// - hint: small DOM hint (tag#id.class) hashed to keep size small
|
|
// - n: ordinal among elements with same ch+ph+ih on the page at this moment
|
|
//
|
|
// The combination helps disambiguate near-duplicate messages, re-ordered DOM, and small edits.
|
|
// Fingerprints are stable enough to persist in localStorage for de-duplication.
|
|
//
|
|
// Notes:
|
|
// - All hashes are short base36 strings derived from a djb2-xor style hash; fast and sufficient.
|
|
// - Inputs are normalized (trim, strip zero-width spaces, normalize whitespace before newlines).
|
|
// - The algorithm intentionally looks at at most ~2000 chars per slice for performance.
|
|
// - The module is side-effect free except for optional caching of a stable fingerprint in dataset.
|
|
(function(){
|
|
/**
|
|
* CSS selectors that identify assistant messages across supported sites.
|
|
* These are joined with "," and used for querySelectorAll when scanning neighbors.
|
|
*/
|
|
const MSG_SELECTORS = [
|
|
'[data-message-author-role="assistant"]',
|
|
'.chat-message:not([data-message-author-role="user"])',
|
|
'.message-content'
|
|
];
|
|
|
|
/**
|
|
* Normalize text by removing carriage returns and zero-width spaces, squashing trailing
|
|
* whitespace before newlines, and trimming ends. Keeps a consistent basis for hashing.
|
|
* @param {string} s
|
|
* @returns {string}
|
|
*/
|
|
function norm(s){ return (s||'').replace(/\r/g,'').replace(/\u200b/g,'').replace(/[ \t]+\n/g,'\n').trim(); }
|
|
|
|
/**
|
|
* Fast, low-collision string hash. djb2 variant using XOR; returns unsigned base36 string.
|
|
* @param {string} s
|
|
* @returns {string}
|
|
*/
|
|
function hash(s){ let h=5381; for(let i=0;i<s.length;i++) h=((h<<5)+h)^s.charCodeAt(i); return (h>>>0).toString(36); }
|
|
|
|
/**
|
|
* Extract the most relevant, command-like text from a message element.
|
|
* Preference order:
|
|
* - Any code/pre blocks that appear to contain a valid @bridge@ ... @end@ command with `action:`
|
|
* - Otherwise, fall back to the element's textContent (first 2000 chars), normalized.
|
|
* @param {Element} el
|
|
* @returns {string}
|
|
*/
|
|
function commandLikeText(el){
|
|
const blocks = el.querySelectorAll('pre code, pre, code');
|
|
for (const b of blocks) {
|
|
const t = norm(b.textContent || '');
|
|
// Must look like a complete runnable command block
|
|
if (/@end@\s*$/m.test(t) && /(^|\n)\s*@bridge@\b/m.test(t) && /(^|\n)\s*action\s*:/m.test(t)) return t;
|
|
}
|
|
return norm((el.textContent || '').slice(0, 2000));
|
|
}
|
|
|
|
/**
|
|
* Build a hash of the immediate previous assistant messages' trailing text (up to 2000 chars).
|
|
* This captures conversational context that helps distinguish repeated content.
|
|
* @param {Element} el - The current message element
|
|
* @returns {string} base36 hash of the context window
|
|
*/
|
|
function prevContextHash(el) {
|
|
const list = Array.from(document.querySelectorAll(MSG_SELECTORS.join(',')));
|
|
const idx = list.indexOf(el); if (idx <= 0) return '0';
|
|
let rem = 2000, buf = '';
|
|
for (let i=idx-1; i>=0 && rem>0; i--){
|
|
const t = norm(list[i].textContent || ''); if (!t) continue;
|
|
const take = t.slice(-rem); buf = take + buf; rem -= take.length;
|
|
}
|
|
return hash(buf.slice(-2000));
|
|
}
|
|
|
|
/**
|
|
* Compute a hash of the text immediately preceding the first command block within this element.
|
|
* If there is no @bridge@ block, hashes the trailing slice of the whole element text.
|
|
* @param {Element} el
|
|
* @returns {string}
|
|
*/
|
|
function intraPrefixHash(el){
|
|
const t = el.textContent || '';
|
|
const m = t.match(/@bridge@[\s\S]*?@end@/m);
|
|
const endIdx = m ? t.indexOf(m[0]) : t.length;
|
|
return hash(norm(t.slice(Math.max(0, endIdx - 2000), endIdx)));
|
|
}
|
|
|
|
/**
|
|
* Produce a tiny DOM hint string (tag#id.class) to help separate identical content in different
|
|
* containers. Limited to 40 chars and later hashed before inclusion in the final fingerprint.
|
|
* @param {Element} node
|
|
* @returns {string}
|
|
*/
|
|
function domHint(node) {
|
|
if (!node) return '';
|
|
const id = node.id || '';
|
|
const cls = (node.className && typeof node.className === 'string') ? node.className.split(' ')[0] : '';
|
|
return `${node.tagName || ''}#${id}.${cls}`.slice(0, 40);
|
|
}
|
|
|
|
/**
|
|
* Determine the ordinal index (0-based) of this element among all message elements that share
|
|
* the same content/context key on the page. This disambiguates duplicates that have identical
|
|
* ch+ph+ih values by adding their order of appearance.
|
|
* @param {Element} el
|
|
* @param {string} key - The key built from ch|ph|ih for this element
|
|
* @returns {number}
|
|
*/
|
|
function ordinalForKey(el, key) {
|
|
const list = Array.from(document.querySelectorAll(MSG_SELECTORS.join(',')));
|
|
let n = 0;
|
|
for (const node of list) {
|
|
const nodeKey = node === el ? key : (() => {
|
|
const ch = hash(commandLikeText(node).slice(0, 2000));
|
|
const ph = prevContextHash(node);
|
|
const ih = intraPrefixHash(node);
|
|
return `ch:${ch}|ph:${ph}|ih:${ih}`;
|
|
})();
|
|
if (nodeKey === key) {
|
|
if (node === el) return n;
|
|
n++;
|
|
}
|
|
}
|
|
return n;
|
|
}
|
|
|
|
/**
|
|
* Generate a composite fingerprint for the given message element.
|
|
* Format: "ch:<..>|ph:<..>|ih:<..>|hint:<..>|n:<ordinal>"
|
|
* @param {Element} el
|
|
* @returns {string}
|
|
*/
|
|
function fingerprintElement(el){
|
|
const ch = hash(commandLikeText(el).slice(0, 2000));
|
|
const ph = prevContextHash(el);
|
|
const ih = intraPrefixHash(el);
|
|
const dh = hash(domHint(el));
|
|
const key = `ch:${ch}|ph:${ph}|ih:${ih}`;
|
|
const n = ordinalForKey(el, key);
|
|
return `${key}|hint:${dh}|n:${n}`;
|
|
}
|
|
|
|
/**
|
|
* Retrieve (and cache) a stable fingerprint for the element. The first time it's computed,
|
|
* the value is stored in el.dataset.aiRcStableFp so subsequent calls don't recompute.
|
|
* @param {Element} el
|
|
* @returns {string}
|
|
*/
|
|
function getStableFingerprint(el) {
|
|
if (el?.dataset?.aiRcStableFp) return el.dataset.aiRcStableFp;
|
|
const fp = fingerprintElement(el);
|
|
try { if (el && el.dataset) el.dataset.aiRcStableFp = fp; } catch {}
|
|
return fp;
|
|
}
|
|
|
|
// Expose both for backward compatibility with older modules that expect these globals
|
|
window.AI_REPO_FINGERPRINT = fingerprintElement;
|
|
window.AI_REPO_STABLE_FINGERPRINT = getStableFingerprint;
|
|
})();
|