calblueprint · carolynzhuang · Oct 10, 2025 · Oct 10, 2025 · Oct 10, 2025 · Oct 10, 2025
diff --git a/.gitignore b/.gitignore
@@ -34,3 +34,6 @@ yarn-error.log*
 # typescript
 *.tsbuildinfo
 next-env.d.ts
+
+# python
+/adopt-an-inmate-venv
diff --git a/actions/embeddings/__init__.py b/actions/embeddings/__init__.py
diff --git a/actions/embeddings/__pycache__/clients.cpython-313.pyc b/actions/embeddings/__pycache__/clients.cpython-313.pyc
diff --git a/actions/embeddings/__pycache__/config.cpython-313.pyc b/actions/embeddings/__pycache__/config.cpython-313.pyc
diff --git a/actions/embeddings/clients.py b/actions/embeddings/clients.py
@@ -0,0 +1,25 @@
+import os
+from dotenv import load_dotenv
+import supabase
+import vecs
+from sentence_transformers import SentenceTransformer
+from config import MODEL_NAME, MODEL_DIMENSION, VECS_COLLECTION_NAME, SUPABASE_TABLE_NAME
+
+load_dotenv(os.path.join(os.path.dirname(__file__), "../../.env.local"))
+
+# Initialize model
+model = SentenceTransformer(MODEL_NAME)
+
+# Initialize Supabase
+SUPABASE_URL = os.getenv("NEXT_PUBLIC_SUPABASE_URL")
+SUPABASE_ANON_KEY = os.getenv("NEXT_PUBLIC_SUPABASE_ANON_KEY")
+supabase_client = supabase.create_client(SUPABASE_URL, SUPABASE_ANON_KEY)
+adoptee_table = supabase_client.table(SUPABASE_TABLE_NAME).select("*").execute().data
+
+# Initialize Vecs
+DB_CONNECTION = os.getenv("DATABASE_URL")
+vx = vecs.create_client(DB_CONNECTION)
+adoptee_vector_collection = vx.get_or_create_collection(
+  name=VECS_COLLECTION_NAME, 
+  dimension=MODEL_DIMENSION
+)
diff --git a/actions/embeddings/config.py b/actions/embeddings/config.py
@@ -0,0 +1,11 @@
+# Model configuration
+MODEL_NAME = "paraphrase-MiniLM-L3-v2"
+
+dimensions = {"paraphrase-MiniLM-L3-v2": 384}
+MODEL_DIMENSION = dimensions[MODEL_NAME]
+
+# Supabase configuration
+SUPABASE_TABLE_NAME = "adoptee"
+
+# Collection configuration
+VECS_COLLECTION_NAME = "adoptee_vector"
diff --git a/actions/embeddings/embed.py b/actions/embeddings/embed.py
@@ -0,0 +1,45 @@
+from tqdm import tqdm
+from clients import model, vx, adoptee_vector_collection, adoptee_table
+
+def upsert_data(model, database_table, vector_collection, batch_size=64):
+  """
+  Encodes and upserts data to a vector database in batches.
+
+  Args:
+      model: The embedding model.
+      database_table (list): A list of dictionaries containing the data.
+      vector_collection: The vector collection to which to upsert records.
+      batch_size (int): The number of records to process per batch.
+  """
+
+  for i in tqdm(range(0, len(database_table), batch_size)):
+    batch = database_table[i:i + batch_size]
+
+    ids = [row['id'] for row in batch]
+    bios = [row['bio'] for row in batch]
+
+    embeddings = model.encode(bios, show_progress_bar=False).tolist()
+
+    records = []
+
+    for j, row in enumerate(batch):
+      metadata = {
+        "bio": row["bio"], 
+        "gender": row["gender"],
+        "age": row["age"],
+        "veteran_status": row["veteran_status"],
+        "offense": row["offense"],
+        "state": row["state"]
+      }
+
+      records.append(((ids[j], embeddings[j], metadata)))
+
+    try:
+      vector_collection.upsert(records)
+      print(f"Successfully upserted batch starting at index {i}")
+    except Exception as e:
+      print(f"Upsert failed for batch starting at index {i}: {e}")
+
+if __name__ == "__main__":
+  upsert_data(model, adoptee_table, adoptee_vector_collection)
+  vx.disconnect()
diff --git a/eslint.config.mjs b/eslint.config.mjs
@@ -22,6 +22,7 @@ const eslintConfig = [
       'build',
       '.vscode',
       'next-env.d.ts',
+      'adopt-an-inmate-venv',
     ],
   },
   {

diff --git a/requirements.in b/requirements.in
@@ -0,0 +1,5 @@
+sentence-transformers
+supabase
+vecs
+python-dotenv
+tqdm
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,196 @@
+#
+# This file is autogenerated by pip-compile with Python 3.13
+# by the following command:
+#
+#    pip-compile --output-file=requirements.txt requirements.in
+#
+annotated-types==0.7.0
+    # via pydantic
+anyio==4.11.0
+    # via httpx
+certifi==2025.10.5
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
+cffi==2.0.0
+    # via cryptography
+charset-normalizer==3.4.3
+    # via requests
+cryptography==46.0.2
+    # via pyjwt
+deprecated==1.2.18
+    # via vecs
+deprecation==2.1.0
+    # via
+    #   postgrest
+    #   storage3
+filelock==3.20.0
+    # via
+    #   huggingface-hub
+    #   torch
+    #   transformers
+flupy==1.2.3
+    # via vecs
+fsspec==2025.9.0
+    # via
+    #   huggingface-hub
+    #   torch
+h11==0.16.0
+    # via httpcore
+h2==4.3.0
+    # via httpx
+hf-xet==1.1.10
+    # via huggingface-hub
+hpack==4.1.0
+    # via h2
+httpcore==1.0.9
+    # via httpx
+httpx[http2]==0.28.1
+    # via
+    #   postgrest
+    #   storage3
+    #   supabase
+    #   supabase-auth
+    #   supabase-functions
+huggingface-hub==0.35.3
+    # via
+    #   sentence-transformers
+    #   tokenizers
+    #   transformers
+hyperframe==6.1.0
+    # via h2
+idna==3.10
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+    #   yarl
+jinja2==3.1.6
+    # via torch
+joblib==1.5.2
+    # via scikit-learn
+markupsafe==3.0.3
+    # via jinja2
+mpmath==1.3.0
+    # via sympy
+multidict==6.7.0
+    # via yarl
+networkx==3.5
+    # via torch
+numpy==2.3.3
+    # via
+    #   pgvector
+    #   scikit-learn
+    #   scipy
+    #   transformers
+packaging==25.0
+    # via
+    #   deprecation
+    #   huggingface-hub
+    #   transformers
+pgvector==0.3.6
+    # via vecs
+pillow==11.3.0
+    # via sentence-transformers
+postgrest==2.22.0
+    # via supabase
+propcache==0.4.1
+    # via yarl
+psycopg2-binary==2.9.11
+    # via vecs
+pycparser==2.23
+    # via cffi
+pydantic==2.12.0
+    # via
+    #   postgrest
+    #   realtime
+    #   storage3
+    #   supabase-auth
+pydantic-core==2.41.1
+    # via pydantic
+pyjwt[crypto]==2.10.1
+    # via supabase-auth
+python-dotenv==1.1.1
+    # via -r requirements.in
+pyyaml==6.0.3
+    # via
+    #   huggingface-hub
+    #   transformers
+realtime==2.22.0
+    # via supabase
+regex==2025.9.18
+    # via transformers
+requests==2.32.5
+    # via
+    #   huggingface-hub
+    #   transformers
+safetensors==0.6.2
+    # via transformers
+scikit-learn==1.7.2
+    # via sentence-transformers
+scipy==1.16.2
+    # via
+    #   scikit-learn
+    #   sentence-transformers
+sentence-transformers==5.1.1
+    # via -r requirements.in
+sniffio==1.3.1
+    # via anyio
+sqlalchemy==2.0.44
+    # via vecs
+storage3==2.22.0
+    # via supabase
+strenum==0.4.15
+    # via supabase-functions
+supabase==2.22.0
+    # via -r requirements.in
+supabase-auth==2.22.0
+    # via supabase
+supabase-functions==2.22.0
+    # via supabase
+sympy==1.14.0
+    # via torch
+threadpoolctl==3.6.0
+    # via scikit-learn
+tokenizers==0.22.1
+    # via transformers
+torch==2.8.0
+    # via sentence-transformers
+tqdm==4.67.1
+    # via
+    #   -r requirements.in
+    #   huggingface-hub
+    #   sentence-transformers
+    #   transformers
+transformers==4.57.0
+    # via sentence-transformers
+typing-extensions==4.15.0
+    # via
+    #   flupy
+    #   huggingface-hub
+    #   pydantic
+    #   pydantic-core
+    #   realtime
+    #   sentence-transformers
+    #   sqlalchemy
+    #   torch
+    #   typing-inspection
+typing-inspection==0.4.2
+    # via pydantic
+urllib3==2.5.0
+    # via requests
+vecs==0.4.5
+    # via -r requirements.in
+websockets==15.0.1
+    # via realtime
+wrapt==1.17.3
+    # via deprecated
+yarl==1.22.0
+    # via
+    #   postgrest
+    #   storage3
+    #   supabase-functions
+
+# The following packages are considered to be unsafe in a requirements file:
+# setuptools