Skip to content

Instantly share code, notes, and snippets.

@jonatas
Last active August 21, 2024 19:22
Show Gist options
  • Select an option

  • Save jonatas/2d3f03e7b9cc8c979b609eecc9088914 to your computer and use it in GitHub Desktop.

Select an option

Save jonatas/2d3f03e7b9cc8c979b609eecc9088914 to your computer and use it in GitHub Desktop.

Revisions

  1. jonatas revised this gist Aug 20, 2024. No changes.
  2. jonatas created this gist Aug 20, 2024.
    86 changes: 86 additions & 0 deletions semantic_tracking.sql
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,86 @@
    CREATE EXTENSION IF NOT EXISTS ai CASCADE;
    DROP TABLE IF EXISTS vecs CASCADE;
    DROP TABLE IF EXISTS interests CASCADE;
    DROP TABLE IF EXISTS notifications CASCADE;

    -- Create or replace the function to embed content
    CREATE OR REPLACE FUNCTION embed(content TEXT) RETURNS VECTOR AS $$
    DECLARE
    vectorized_content VECTOR;
    BEGIN
    vectorized_content := openai_embed(
    'text-embedding-ada-002',
    content
    )::VECTOR;
    RETURN vectorized_content;
    END;
    $$ IMMUTABLE LANGUAGE plpgsql;

    CREATE TABLE vecs (
    content TEXT PRIMARY KEY,
    embeds VECTOR GENERATED ALWAYS AS (embed(content)) STORED
    );

    CREATE TABLE interests (
    content TEXT PRIMARY KEY,
    embeds VECTOR GENERATED ALWAYS AS (embed(content)) STORED
    );


    CREATE TABLE notifications (
    id SERIAL PRIMARY KEY,
    created_at TIMESTAMP DEFAULT clock_timestamp(),
    content TEXT REFERENCES vecs(content),
    summary JSONB
    );

    -- fetch all interests in vectors and calculate the similarity
    -- insert notifications in the table in case find cosine similarity < 0.2
    CREATE OR REPLACE FUNCTION check_interests_similarity() RETURNS TRIGGER AS $$
    DECLARE
    interest RECORD;
    similarity_percent NUMERIC;
    distance FLOAT;
    summary jsonb := '{}'::jsonb;
    BEGIN
    for interest in select * from interests
    loop
    distance := NEW.embeds <=> interest.embeds;
    if (distance < 0.2) THEN
    similarity_percent := ((1.0 - distance) * 100)::numeric(10,2);
    summary := summary || jsonb_build_object(
    interest.content, similarity_percent
    );
    END IF;

    end loop;

    -- check if summary has any elements and persist it in the notifications table
    IF (summary != '{}'::jsonb) THEN
    insert into notifications (content, summary) values
    (NEW.content, summary);
    end if;
    RETURN NEW;
    END;
    $$ LANGUAGE plpgsql;


    CREATE TRIGGER check_interests_similarity_trigger
    AFTER INSERT OR UPDATE ON vecs
    FOR EACH ROW EXECUTE FUNCTION check_interests_similarity();


    INSERT INTO interests (content) VALUES
    ('Satisfied customer'),
    ('Unsatisfied customer'),
    ('Product return');

    insert into vecs (content) VALUES
    ('The bike looks good but I want to return it and get my money back'),
    ('Works perfectly for my running days'),
    ('This shoes are terrible, pay me back now!'),
    ('Shoes dont fit so I want to return it'),
    ('The bike is very good for my commuting days');

    table notifications;