Skip to content

Instantly share code, notes, and snippets.

@jexp
Created October 8, 2024 09:04
Show Gist options
  • Select an option

  • Save jexp/45e9902853feebba0a55b8736a59a7e6 to your computer and use it in GitHub Desktop.

Select an option

Save jexp/45e9902853feebba0a55b8736a59a7e6 to your computer and use it in GitHub Desktop.

Revisions

  1. jexp created this gist Oct 8, 2024.
    138 changes: 138 additions & 0 deletions devoxx24.cypher
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,138 @@
    /*
    rows = [
    {
    "id": 12124,
    "title": "Meet Chicory, exploit the power of WebAssembly on the server side!",
    "description": "WebAssembly is a rapidly emerging technology that enables the execution of code written in various languages while providing strong sandboxing and safety guarantees.<br>Initially developed for the web to enhance browser capabilities, developers soon recognized the potential of reusing Wasm modules in server-side applications. wazero, a native Go runtime for Wasm, played a pivotal role in showcasing the versatility and power of this solution. With its widespread adoption and integration into diverse applications, wazero demonstrated the value of using Wasm modules beyond the web environment.<br>Inspired by the goals of wazero, we launched Chicory, a pure Java interpreter, with zero dependencies, for Wasm. Chicory empowers developers to load and execute Wasm modules with fine-grained control over their interactions with the system and memory allocation. Notably, Chicory seamlessly integrates with barebone JVM runtimes, eliminating any system dependencies.<br>In this presentation, we will explore the exciting possibilities that Chicory offers for the JVM ecosystem. Through practical, real-world examples, we will showcase how Chicory can be seamlessly integrated into your application, enabling you to run Wasm programs within minutes. Additionally, we will discuss the various approaches to designing integrations, exploring the trade-offs associated with each option.",
    "summary": "WebAssembly\nWasm Modules\nChicory\nJVM Ecosystem",
    "afterVideoURL": null,
    "podcastURL": null,
    "audienceLevel": "INTERMEDIATE",
    "language": null,
    "totalFavourites": 42,
    "track": {
    "id": 2758,
    "name": "Mind the Geek",
    "description": "Developer candy: stuff we want to know about but dont (generally) at work, Robotics, biological computing, cybernetics, AI, new toys, tomorrows world",
    "imageURL": "https://devoxx-tracks.s3.eu-west-1.amazonaws.com/mind-the-geek.png"
    },
    "sessionType": {
    "id": 957,
    "name": "Tools-in-Action",
    "duration": 30,
    "pause": false,
    "description": "Half an hour sessions focused on demonstrating technical tools or solutions.",
    "cssColor": null
    },
    "speakers": [
    {
    "id": 5729,
    "firstName": "Andrea",
    "lastName": "Peruffo",
    "fullName": "Andrea Peruffo",
    "bio": "With nearly two decades of coding experience, I'm fueled by passion as I continue to type away daily.<br>As a Principal Software Engineer at Red Hat, I actively contribute to diverse Open Source projects, driven by both personal fulfillment and professional advancement. My not-so-secret passion lies in programming languages, developer tools, compilers, and beyond. Come and spot me on a project near you!",
    "anonymizedBio": null,
    "company": "Red Hat",
    "imageUrl": "https://devoxxian-image-thumbnails.s3-eu-west-1.amazonaws.com/profile-ff9843a3-a619-41ad-bcd0-33196cc60504.jpeg",
    "twitterHandle": "@and_prf",
    "linkedInUsername": null
    }
    ],
    "keywords": [
    {
    "name": "WebAssembly"
    },
    {
    "name": "JVM Ecosystem"
    },
    {
    "name": "wazero"
    },
    {
    "name": "Chicory"
    }
    ],
    "timeSlots": []
    }
    ]
    */
    // Adding constraints
    CREATE CONSTRAINT FOR (s:Session) REQUIRE s.id IS UNIQUE;
    CREATE CONSTRAINT FOR (t:Track) REQUIRE t.id IS UNIQUE;
    CREATE CONSTRAINT FOR (st:SessionType) REQUIRE st.id IS UNIQUE;
    CREATE CONSTRAINT FOR (sp:Speaker) REQUIRE sp.id IS UNIQUE;
    CREATE CONSTRAINT FOR (k:Keyword) REQUIRE k.name IS UNIQUE;

    // Load and Import data
    call apoc.load.json("https://dvbe24.cfp.dev/api/public/talks?sort=name,asc") yield value as row
    WITH row, row.track as track, row.sessionType as type

    MERGE (s:Session {id: row.id})
    SET s += row {.title,.description,.summary,.afterVideoURL,.podcastURL,.audienceLevel,.language,.totalFavourites}

    MERGE (t:Track {id: track.id})
    SET t += track {.name, .description,.imageURL}
    MERGE (s)-[:BELONGS_TO]->(t)

    MERGE (st:SessionType {id: type.id})
    SET st += type { .name,.duration,.pause,.description }
    MERGE (s)-[:HAS_TYPE]->(st)

    FOREACH (speaker IN row.speakers |
    MERGE (sp:Speaker {id: speaker.id})
    SET sp += speaker { .firstName, .lastName,.fullName,.bio,.anonymizedBio,.company,.imageUrl,.twitterHandle,.linkedInUsername }
    MERGE (s)-[:HAS_SPEAKER]->(sp)
    )

    FOREACH (keyword IN row.keywords |
    MERGE (k:Keyword {name: keyword.name})
    MERGE (s)-[:HAS_KEYWORD]->(k)
    );

    // create vector indexes
    CREATE VECTOR INDEX speakerEmbeddings IF NOT EXISTS
    FOR (s:Speaker)
    ON s.embedding
    OPTIONS { indexConfig: {
    `vector.dimensions`: 1536,
    `vector.similarity_function`: 'cosine'
    }};

    CREATE VECTOR INDEX sessionEmbeddings IF NOT EXISTS
    FOR (s:Session)
    ON s.embedding
    OPTIONS { indexConfig: {
    `vector.dimensions`: 1536,
    `vector.similarity_function`: 'cosine'
    }};
    create text index for (sp:Speaker) on (sp.fullName);

    // :param token="sk-..."

    // embed text properties
    match (s:Session) where s.embedding is null
    with s, coalesce(s.title,"") + "\n" + coalesce(s.summary,"") + "\n" + coalesce(s.description,"") as text
    with s, genai.vector.encode(text, "OpenAI",{token:$token, model:"text-embedding-3-small"}) as embedding
    call db.create.setNodeVectorProperty(s, 'embedding', embedding);

    match (s:Speaker) where s.embedding is null
    with s, coalesce(s.fullName,"") + "\n" + coalesce(s.company,"") + "\n" + coalesce(s.bio,"") as text
    with s, genai.vector.encode(text, "OpenAI",{token:$token, model:"text-embedding-3-small"}) as embedding
    call db.create.setNodeVectorProperty(s, 'embedding', embedding);

    // embed user question and do vector search and graph search
    with "What's new about Valhalla?" as question
    with genai.vector.encode(question, "OpenAI",{token:$token, model:"text-embedding-3-small"}) as questionEmbedding
    call db.index.vector.queryNodes('sessionEmbeddings',5,questionEmbedding) yield node as s, score
    where score > 0.7
    match path=(kw)<-[:HAS_KEYWORD]-(s)-[:HAS_SPEAKER]->(sp)
    return path, score;

    with "Talks about Kubernetes and Openshift?" as question
    with genai.vector.encode(question, "OpenAI",{token:$token, model:"text-embedding-3-small"}) as questionEmbedding
    call db.index.vector.queryNodes('sessionEmbeddings',5,questionEmbedding) yield node as s, score
    where score > 0.7
    match path=(kw)<-[:HAS_KEYWORD]-(s)-[:HAS_SPEAKER]->(sp)
    return score, s.title, collect(distinct sp.fullName+", "+sp.company) as speakers, collect(distinct kw.name) as keywords, s.summary, s.description
    // return path, score;