|
@@ -0,0 +1,143 @@
|
|
|
|
+{
|
|
|
|
+ "cells": [
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": 1,
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "outputs": [
|
|
|
|
+ {
|
|
|
|
+ "name": "stdout",
|
|
|
|
+ "output_type": "stream",
|
|
|
|
+ "text": [
|
|
|
|
+ " % Total % Received % Xferd Average Speed Time Time Time Current\n",
|
|
|
|
+ " Dload Upload Total Spent Left Speed\n",
|
|
|
|
+ "100 20.8M 100 20.8M 0 0 10.6M 0 0:00:01 0:00:01 --:--:-- 10.6M\n"
|
|
|
|
+ ]
|
|
|
|
+ }
|
|
|
|
+ ],
|
|
|
|
+ "source": [
|
|
|
|
+ "RAW_DATA = \"https://s3.iwanhae.kr/kuberian/1_prod_kubernetes_v1_27_4.jsonl.gz\"\n",
|
|
|
|
+ "RAW_DATA_NAME = \"data.jsonl\"\n",
|
|
|
|
+ "DB_NAME = \"kuberian.db\"\n",
|
|
|
|
+ "!curl \"{RAW_DATA}\" -o \"{RAW_DATA_NAME}.gz\"\n",
|
|
|
|
+ "!rm \"{RAW_DATA_NAME}\" \"{DB_NAME}\"\n",
|
|
|
|
+ "!gunzip \"{RAW_DATA_NAME}.gz\""
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": 2,
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "import sqlite3\n",
|
|
|
|
+ "tables = [\n",
|
|
|
|
+ " \"\"\"\n",
|
|
|
|
+ "CREATE TABLE functions (\n",
|
|
|
|
+ "\tid INTEGER PRIMARY KEY,\n",
|
|
|
|
+ " \tname TEXT NOT NULL,\n",
|
|
|
|
+ " signature TEXT NOT NULL,\n",
|
|
|
|
+ "\tfile TEXT NOT NULL,\n",
|
|
|
|
+ "\tcode TEXT NOT NULL,\n",
|
|
|
|
+ " line_start INTEGER NOT NULL,\n",
|
|
|
|
+ " line_end INTEGER NOT NULL\n",
|
|
|
|
+ ");\"\"\",\n",
|
|
|
|
+ " \"\"\"\n",
|
|
|
|
+ "CREATE TABLE function_analyses (\n",
|
|
|
|
+ "\tid INTEGER PRIMARY KEY,\n",
|
|
|
|
+ " function_id INTEGER,\n",
|
|
|
|
+ " summary TEXT NOT NULL,\n",
|
|
|
|
+ " background TEXT,\n",
|
|
|
|
+ "\tanalysis TEXT,\n",
|
|
|
|
+ "\tpurpose TEXT,\n",
|
|
|
|
+ " comment TEXT,\n",
|
|
|
|
+ " tldr TEXT\n",
|
|
|
|
+ ");\n",
|
|
|
|
+ "\"\"\"]\n",
|
|
|
|
+ "indexes = [\n",
|
|
|
|
+ " \"CREATE INDEX idx_function_analyses_function_id ON function_analyses (function_id);\"\n",
|
|
|
|
+ "]\n",
|
|
|
|
+ "\n",
|
|
|
|
+ "conn = sqlite3.connect(DB_NAME)\n",
|
|
|
|
+ "cur = conn.cursor()\n",
|
|
|
|
+ "for table in tables:\n",
|
|
|
|
+ " cur.execute(table)\n",
|
|
|
|
+ "conn.commit()"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": 3,
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "import json\n",
|
|
|
|
+ "\n",
|
|
|
|
+ "sql_function = \"\"\"\n",
|
|
|
|
+ "INSERT INTO functions(id,name,signature,file,code,line_start,line_end)\n",
|
|
|
|
+ " VALUES(?,?,?,?,?,?,?);\n",
|
|
|
|
+ "\"\"\"\n",
|
|
|
|
+ "\n",
|
|
|
|
+ "sql_function_analysis = \"\"\"\n",
|
|
|
|
+ "INSERT INTO function_analyses(function_id, summary, background, analysis, purpose, comment, tldr)\n",
|
|
|
|
+ " VALUES(?,?,?,?,?,?,?);\n",
|
|
|
|
+ "\"\"\"\n",
|
|
|
|
+ "\n",
|
|
|
|
+ "data = []\n",
|
|
|
|
+ "with open(\"data.jsonl\") as f:\n",
|
|
|
|
+ " for line in f:\n",
|
|
|
|
+ " d = json.loads(line)\n",
|
|
|
|
+ " cur.execute(sql_function, (\n",
|
|
|
|
+ " d['id'], d['name'], d['signature'], d['file'], d['code'], d['line']['from'], d['line']['to']\n",
|
|
|
|
+ " ))\n",
|
|
|
|
+ " if \"parsed\" in d:\n",
|
|
|
|
+ " parsed = d['parsed']\n",
|
|
|
|
+ " summary = d['result'].split(\n",
|
|
|
|
+ " \"In one sentence, this is a function that\")[1].strip()\n",
|
|
|
|
+ " cur.execute(sql_function_analysis, (d['id'],\n",
|
|
|
|
+ " summary,\n",
|
|
|
|
+ " parsed['background'] if \"background\" in parsed else None,\n",
|
|
|
|
+ " parsed['analysis'] if \"analysis\" in parsed else None,\n",
|
|
|
|
+ " parsed['purpose'] if \"purpose\" in parsed else None,\n",
|
|
|
|
+ " parsed['comment'] if \"comment\" in parsed else None,\n",
|
|
|
|
+ " parsed['tldr'] if \"tldr\" in parsed else None,\n",
|
|
|
|
+ " ))\n",
|
|
|
|
+ "conn.commit()"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": 4,
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "for index in indexes:\n",
|
|
|
|
+ " cur.execute(index)\n",
|
|
|
|
+ "conn.commit()\n",
|
|
|
|
+ "conn.close()"
|
|
|
|
+ ]
|
|
|
|
+ }
|
|
|
|
+ ],
|
|
|
|
+ "metadata": {
|
|
|
|
+ "kernelspec": {
|
|
|
|
+ "display_name": ".venv",
|
|
|
|
+ "language": "python",
|
|
|
|
+ "name": "python3"
|
|
|
|
+ },
|
|
|
|
+ "language_info": {
|
|
|
|
+ "codemirror_mode": {
|
|
|
|
+ "name": "ipython",
|
|
|
|
+ "version": 3
|
|
|
|
+ },
|
|
|
|
+ "file_extension": ".py",
|
|
|
|
+ "mimetype": "text/x-python",
|
|
|
|
+ "name": "python",
|
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
|
+ "version": "3.11.4"
|
|
|
|
+ },
|
|
|
|
+ "orig_nbformat": 4
|
|
|
|
+ },
|
|
|
|
+ "nbformat": 4,
|
|
|
|
+ "nbformat_minor": 2
|
|
|
|
+}
|