|
@@ -0,0 +1,74 @@
|
|
|
+{
|
|
|
+ "cells": [
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 1,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "import sqlite3\n",
|
|
|
+ "from sentence_transformers import SentenceTransformer\n",
|
|
|
+ "from usearch.index import Index\n",
|
|
|
+ "\n",
|
|
|
+ "LM_MODEL = \"all-MiniLM-L6-v2\"\n",
|
|
|
+ "DB_NAME = \"kuberian.db\"\n",
|
|
|
+ "\n",
|
|
|
+ "model = SentenceTransformer(LM_MODEL)\n",
|
|
|
+ "conn = sqlite3.connect(DB_NAME)\n",
|
|
|
+ "cur = conn.cursor()\n",
|
|
|
+ "\n",
|
|
|
+ "index = Index(\n",
|
|
|
+ " ndim=384, \n",
|
|
|
+ " metric='cos',\n",
|
|
|
+ " dtype='f16')"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 2,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "35000\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "from IPython.display import clear_output\n",
|
|
|
+ "for i, row in enumerate(cur.execute(\"SELECT function_id, summary FROM function_analyses\")):\n",
|
|
|
+ " id, summary = row\n",
|
|
|
+ " vec = model.encode(summary)\n",
|
|
|
+ " index.add(id, vec)\n",
|
|
|
+ " if i%100 == 0:\n",
|
|
|
+ " print(i)\n",
|
|
|
+ " clear_output(wait=True)\n",
|
|
|
+ "index.save(\"./kuberian.usearch\")"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "metadata": {
|
|
|
+ "kernelspec": {
|
|
|
+ "display_name": ".venv",
|
|
|
+ "language": "python",
|
|
|
+ "name": "python3"
|
|
|
+ },
|
|
|
+ "language_info": {
|
|
|
+ "codemirror_mode": {
|
|
|
+ "name": "ipython",
|
|
|
+ "version": 3
|
|
|
+ },
|
|
|
+ "file_extension": ".py",
|
|
|
+ "mimetype": "text/x-python",
|
|
|
+ "name": "python",
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
+ "version": "3.11.4"
|
|
|
+ },
|
|
|
+ "orig_nbformat": 4
|
|
|
+ },
|
|
|
+ "nbformat": 4,
|
|
|
+ "nbformat_minor": 2
|
|
|
+}
|