Voyage Embedding Cache

This commit is contained in:
NekoMonci12
2025-06-03 13:21:54 +07:00
parent c368fe1838
commit a2c81409f0
9 changed files with 433 additions and 49 deletions

View File

@@ -1,30 +1,79 @@
// hybridCacheManager.js
const crypto = require('crypto');
const MongoCacheManager = require('./mongoCacheManager');
const { Worker } = require('worker_threads');
const path = require('path');
const { getVoyageEmbeddings } = require('./embedding');
function hashInput(input) {
return crypto.createHash('sha256').update(input.trim().toLowerCase()).digest('hex');
}
async function getEmbedding(text) {
const embeddings = await getVoyageEmbeddings([text]);
return embeddings[0];
}
class HybridCacheManager {
/**
* @param {string} mongoUrl - Connection URL for MongoDB.
* @param {string} dbName - MongoDB database name.
* @param {string} collectionName - MongoDB collection name for cache.
*/
constructor(mongoUrl, dbName, collectionName = 'cache') {
this.mongoCache = new MongoCacheManager(mongoUrl, dbName, collectionName);
}
async getCachedResult(input) {
let result = await this.mongoCache.getCachedResult(input);
if (result) {
console.log("Hybrid Cache: Found result in MongoDB.");
return result;
async getCachedResult(input, threshold = 0.8) {
const inputHash = hashInput(input);
// 🔍 Fast exact-match hash lookup
const exactMatch = await this.mongoCache.getByHash(inputHash);
if (exactMatch) {
console.log("[HybridCache] Exact hash match found.");
return exactMatch.value;
}
console.log("Hybrid Cache: No cached result found.");
return null;
// 🤖 Embedding-based semantic search
const inputEmbedding = await getEmbedding(input);
const cachedEntries = await this.mongoCache.getAllEmbeddings();
if (cachedEntries.length === 0) {
console.log("[HybridCache] No cache entries with embeddings found.");
return null;
}
// Return a Promise that resolves with the worker's result
return new Promise((resolve, reject) => {
const worker = new Worker(path.resolve(__dirname, './cosineSimilarityWorker.js'));
worker.postMessage({ inputEmbedding, cachedEntries, threshold });
worker.on('message', ({ bestMatch, bestScore }) => {
if (bestMatch) {
console.log(`[HybridCache] Semantic match found with similarity ${bestScore.toFixed(2)}`);
resolve(bestMatch.value);
} else {
console.log("[HybridCache] No suitable semantic cache match found.");
resolve(null);
}
worker.terminate();
});
worker.on('error', (err) => {
console.error("[HybridCache] Worker thread error:", err);
reject(err);
});
worker.on('exit', (code) => {
if (code !== 0)
console.warn(`[HybridCache] Worker stopped with exit code ${code}`);
});
});
}
async setCache(input, value) {
await this.mongoCache.setCache(input, value);
console.log("Hybrid Cache: Stored value in both caches for key:", input);
const embedding = await getEmbedding(input);
const hash = hashInput(input);
await this.mongoCache.setCache(input, value, embedding, hash);
console.log("[HybridCache] Stored new cache entry with embedding and hash.");
}
}