ソースを参照

feat(searcher): support sqlite

iwanhae 1 年間 前
コミット
3aae53ba44

+ 4 - 1
searcher/.gitignore

@@ -11,4 +11,7 @@ Cargo.lock
 **/*.rs.bk
 
 # MSVC Windows builds of rustc generate these, which store debugging information
-*.pdb
+*.pdb
+
+kuberian.db
+kuberian.usearch

+ 1 - 0
searcher/Cargo.toml

@@ -19,6 +19,7 @@ actix-web = "4"
 intel-mkl-src = { version = "0.8.1", features = [
     "mkl-static-lp64-iomp",
 ], optional = true }
+diesel = { version = "2.1.0", features = ["sqlite", "r2d2"] }
 
 
 [features]

+ 17 - 7
searcher/Dockerfile

@@ -1,4 +1,7 @@
 FROM rust:1.71-bullseye as builder
+WORKDIR /usr/src/kuberian
+
+# Install intel-mkl
 RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
     | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
     echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \
@@ -6,22 +9,29 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO
 RUN apt update
 RUN apt install -y intel-oneapi-mkl-devel libomp-dev
 
-WORKDIR /usr/src/kuberian
-ENV HF_HOME=/model
+# Make cache layers
 COPY Cargo.toml .
 COPY ci src/
 RUN cargo build -r -F mkl
 RUN rm -rf src
+
+# Build final binaries
 COPY . .
+# to make sure that cargo recompile the kuberian
+RUN touch src/* 
 RUN cargo install --path . -F mkl
-RUN kuberian --ci
 
-FROM debian:bullseye-slim
-RUN apt update && \ 
-    apt install -y libomp-dev && \
-    rm -rf /var/lib/apt/lists/*
+# Downloads Embedding Models or ETC
 ENV HF_HOME=/model
+RUN kuberian --ci
+
+FROM gcr.io/distroless/cc-debian11
+WORKDIR /app
+COPY --from=builder /opt/intel/oneapi/compiler/latest/linux/compiler/lib/intel64_lin/libiomp5.so /usr/lib/
 COPY --from=builder /model /model
 COPY --from=builder /usr/local/cargo/bin/kuberian /usr/local/bin/kuberian
+ADD https://s3.iwanhae.kr/kuberian/kuberian.db ./kuberian.db
+ADD https://s3.iwanhae.kr/kuberian/kuberian.usearch ./kuberian.usearch
 EXPOSE 8080
+ENV HF_HOME=/model
 CMD ["kuberian"]

+ 3 - 0
searcher/Makefile

@@ -0,0 +1,3 @@
+prepare:
+	curl https://s3.iwanhae.kr/kuberian/kuberian.db -o kuberian.db
+	curl https://s3.iwanhae.kr/kuberian/kuberian.usearch -o kuberian.usearch

+ 15 - 0
searcher/src/database/mod.rs

@@ -0,0 +1,15 @@
+pub mod models;
+pub mod schema;
+
+use diesel::r2d2::ConnectionManager;
+use diesel::r2d2::Pool;
+use diesel::sqlite::SqliteConnection;
+use std::env;
+
+pub fn establish_connection() -> Pool<ConnectionManager<SqliteConnection>> {
+    let database_url = env::var("DATABASE_URL").unwrap_or_else(|_| String::from("./kuberian.db"));
+    let manger = ConnectionManager::<SqliteConnection>::new(database_url);
+    Pool::builder()
+        .build(manger)
+        .expect("Could not build connection pool")
+}

+ 28 - 0
searcher/src/database/models.rs

@@ -0,0 +1,28 @@
+use super::schema;
+use diesel::prelude::*;
+
+#[derive(Queryable, Selectable)]
+#[diesel(table_name = schema::function_analyses)]
+#[diesel(check_for_backend(diesel::sqlite::Sqlite))]
+pub struct FunctionAnalyses {
+    pub function_id: i32,
+    pub summary: String,
+    pub background: Option<String>,
+    pub analysis: Option<String>,
+    pub purpose: Option<String>,
+    pub comment: Option<String>,
+    pub tldr: Option<String>,
+}
+
+#[derive(Queryable, Selectable)]
+#[diesel(table_name = schema::functions)]
+#[diesel(check_for_backend(diesel::sqlite::Sqlite))]
+pub struct Functions {
+    pub id: i32,
+    pub name: String,
+    pub signature: String,
+    pub file: String,
+    pub code: String,
+    pub line_start: i32,
+    pub line_end: i32,
+}

+ 28 - 0
searcher/src/database/schema.rs

@@ -0,0 +1,28 @@
+// @generated automatically by Diesel CLI.
+
+diesel::table! {
+    function_analyses (id) {
+        id -> Integer,
+        function_id -> Integer,
+        summary -> Text,
+        background -> Nullable<Text>,
+        analysis -> Nullable<Text>,
+        purpose -> Nullable<Text>,
+        comment -> Nullable<Text>,
+        tldr -> Nullable<Text>,
+    }
+}
+
+diesel::table! {
+    functions (id) {
+        id -> Integer,
+        name -> Text,
+        signature -> Text,
+        file -> Text,
+        code -> Text,
+        line_start -> Integer,
+        line_end -> Integer,
+    }
+}
+
+diesel::allow_tables_to_appear_in_same_query!(function_analyses, functions,);

+ 39 - 4
searcher/src/main.rs

@@ -1,16 +1,41 @@
 #[cfg(feature = "mkl")]
 extern crate intel_mkl_src;
 mod args;
+mod database;
 mod embed;
 
 use actix_web::{get, web, App, HttpResponse, HttpServer, Responder};
 use args::Args;
 use clap::Parser;
+use diesel::prelude::*;
+use diesel::{
+    r2d2::{ConnectionManager, Pool},
+    SqliteConnection,
+};
 use embed::encoder;
+use serde_json::json;
 
 #[get("/")]
-async fn hello() -> impl Responder {
-    HttpResponse::Ok().body(":-)")
+async fn hello(pool: web::Data<Pool<ConnectionManager<SqliteConnection>>>) -> impl Responder {
+    let conn = &mut pool.get().unwrap();
+
+    use database::schema::function_analyses::dsl::*;
+    use database::schema::functions::dsl::*;
+
+    let count_functions = functions
+        .count()
+        .get_result::<i64>(conn)
+        .expect("can not get functions stats");
+
+    let count_analyses = function_analyses
+        .count()
+        .get_result::<i64>(conn)
+        .expect("can not get function_analyses stats");
+
+    HttpResponse::Ok().json(json!({
+        "total": count_functions,
+        "analyzed": count_analyses
+    }))
 }
 
 #[get("/q/{query}")]
@@ -23,16 +48,26 @@ async fn search(path: web::Path<String>, enc: web::Data<encoder::Encoder>) -> im
 #[actix_web::main]
 async fn main() -> std::io::Result<()> {
     let args = Args::parse();
+
+    // [BEGIN] INIT
+
+    // MODEL
     let (model, tokenizer) = args.build_model_and_tokenizer().unwrap();
     let enc = encoder::Encoder::new(model, tokenizer);
-    let mutexed_enc = web::Data::new(enc);
+    let app_data_encoder = web::Data::new(enc);
+
+    // DATABSE
+    let pool = database::establish_connection();
+    let app_data_pool = web::Data::new(pool);
 
+    // [END] INIT
     args.terminate_if_ci();
 
     println!("Listen on 0.0.0.0:8080");
     let result = HttpServer::new(move || {
         App::new()
-            .app_data(mutexed_enc.clone())
+            .app_data(app_data_encoder.clone())
+            .app_data(app_data_pool.clone())
             .service(hello)
             .service(search)
     })