Last active
October 10, 2023 07:52
-
-
Save rennokki/bde0f4576eca82c20d04b107dd5c67b4 to your computer and use it in GitHub Desktop.
Revisions
-
rennokki revised this gist
Oct 10, 2023 . 1 changed file with 10 additions and 6 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,3 +1,4 @@ // Create a smol embedding; CloudflareWorkersAIEmbeddings is provided by langchain function setupEmbeddings(env: Env): CloudflareWorkersAIEmbeddings { return new CloudflareWorkersAIEmbeddings({ binding: env.AI as unknown as Fetcher, @@ -9,25 +10,28 @@ function setupEmbeddings(env: Env): CloudflareWorkersAIEmbeddings { }); }; // CloudflareVectorizeStore is provided by langchain function vectorStorage(env: Env): CloudflareVectorizeStore { return new CloudflareVectorizeStore(setupEmbeddings(env), { index: env.V1_VECTORIZE, // index onFailedAttempt: (error) => { console.log('Failed attempt on vectorize', error); }, }); }; // Just a wiki article, tried to find smth small const content = 'https://en.wikipedia.org/wiki/Attiki,_Athens'; // Get the Cheerio Web Loader and split docs const rawDocs = await new CheerioWebBaseLoader(content as string).load(); // tried loadAndSplit() too // HTML -> Docs splitter const splitter = RecursiveCharacterTextSplitter.fromLanguage('html'); const sequence = splitter.pipe(new HtmlToTextTransformer()); // Make the sequence pass through HtmlToTextTransformer() to get the final docs const documents = await sequence.invoke(rawDocs); // This call triggers the error. await vectorStorage(env).addDocuments(documents); -
rennokki revised this gist
Oct 10, 2023 . 2 changed files with 3 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -18,6 +18,8 @@ function vectorStorage(env: Env): CloudflareVectorizeStore { }); }; const content = 'https://en.wikipedia.org/wiki/Attiki,_Athens'; const rawDocs = await new CheerioWebBaseLoader(content as string).load(); // tried loadAndSplit() too const splitter = RecursiveCharacterTextSplitter.fromLanguage('html'); const sequence = splitter.pipe(new HtmlToTextTransformer()); This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,7 +1,6 @@ Script modified; context reset. Script modified; context reset. Failed attempt on vectorize Error: VECTOR_UPSERT_ERROR (code = 4006): Bad Request: Request body JSON schema is invalid; [ { "code": "invalid_union", "unionErrors": [ -
rennokki created this gist
Oct 10, 2023 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,31 @@ function setupEmbeddings(env: Env): CloudflareWorkersAIEmbeddings { return new CloudflareWorkersAIEmbeddings({ binding: env.AI as unknown as Fetcher, modelName: '@cf/baai/bge-small-en-v1.5', stripNewLines: true, onFailedAttempt: (error) => { console.log('Failed attempt on embed', error); } }); }; function vectorStorage(env: Env): CloudflareVectorizeStore { return new CloudflareVectorizeStore(setupEmbeddings(env), { index: env.V1_VECTORIZE, onFailedAttempt: (error) => { console.log('Failed attempt on vectorize', error); }, }); }; const rawDocs = await new CheerioWebBaseLoader(content as string).load(); // tried loadAndSplit() too const splitter = RecursiveCharacterTextSplitter.fromLanguage('html'); const sequence = splitter.pipe(new HtmlToTextTransformer()); const documents = await sequence.invoke(rawDocs); // This triggers the above error. await vectorStorage(env).addDocuments(documents);