Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save wang-zifu/c5e7daf5d26a4af61b13e7b8140dce4b to your computer and use it in GitHub Desktop.

Select an option

Save wang-zifu/c5e7daf5d26a4af61b13e7b8140dce4b to your computer and use it in GitHub Desktop.

Revisions

  1. @Felflare Felflare created this gist May 26, 2020.
    107 changes: 107 additions & 0 deletions sentence_similarity_mult.ipynb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,107 @@
    {
    "cells": [
    {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
    "!git clone [email protected]:UKPLab/sentence-transformers.git\n",
    "!cd sentence-transformers\n",
    "!pip install ."
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
    "import scipy\n",
    "import numpy as np\n",
    "from sentence_transformers import models, SentenceTransformer\n",
    "\n",
    "model = SentenceTransformer('distiluse-base-multilingual-cased')"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
    "# Corpus with example sentences, some sentences were swapped to French, Italian and German\n",
    "corpus = ['Un homme mange de la nourriture.', #FR 'A man is eating food.',\n",
    " 'A man is eating a piece of bread.',\n",
    " 'Das Mädchen trägt ein Baby.', #DE 'The girl is carrying a baby.',\n",
    " 'A man is riding a horse.',\n",
    " 'An elderly man is enjoying dinner.',\n",
    " 'Amis partageant du vin dans un restaurant.', #FR 'Friends sharing wine at a restaurant.',\n",
    " 'A woman is playing violin.',\n",
    " 'A child is learning to play a base guitar.',\n",
    " 'Due uomini hanno spinto i carrelli attraverso i boschi.', #IT 'Two men pushed carts through the woods.',\n",
    " 'A man is riding a white horse on an enclosed ground.',\n",
    " 'Una scimmia suona la batteria.', #IT 'A monkey is playing drums.',\n",
    " 'A cheetah is running behind its prey.']\n",
    "\n",
    "corpus_embeddings = model.encode(corpus)"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
    "# Sample queries to find similar sentences to, some sentences were translated to Russian and German.\n",
    "queries = ['A man is eating pasta.', \n",
    " 'Кто-то в костюме гориллы играет на барабане', #RU 'Someone in a gorilla costume is playing a set of drums.', \n",
    " 'Ein Gepard jagt Beute über ein Feld.'] #DE 'A cheetah chases prey on across a field.']\n",
    "query_embeddings = model.encode(queries)"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
    "# Calculate Cosine similarity of query against each sentence i\n",
    "closest_n = 3\n",
    "for query, query_embedding in zip(queries, query_embeddings):\n",
    " distances = scipy.spatial.distance.cdist([query_embedding], corpus_embeddings, \"cosine\")[0]\n",
    "\n",
    " results = zip(range(len(distances)), distances)\n",
    " results = sorted(results, key=lambda x: x[1])\n",
    "\n",
    " print(\"\\n======================\\n\")\n",
    " print(\"Query:\", query)\n",
    " print(\"\\nTop 3 most similar sentences in corpus:\")\n",
    "\n",
    " for idx, distance in results[0:closest_n]:\n",
    " print(corpus[idx].strip(), \"(Score: %.4f)\" % (1-distance))"
    ]
    }
    ],
    "metadata": {
    "kernelspec": {
    "display_name": "transformers",
    "language": "python",
    "name": "transformers"
    },
    "language_info": {
    "codemirror_mode": {
    "name": "ipython",
    "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.6.10"
    }
    },
    "nbformat": 4,
    "nbformat_minor": 4
    }