Initial commit: Egregore db service

PostgreSQL message storage API with asyncpg connection pooling and full-text search.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
egregore 2026-02-02 11:37:55 +00:00
commit 291d664051
5 changed files with 686 additions and 0 deletions

45
.gitignore vendored Normal file
View file

@ -0,0 +1,45 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
*.egg-info/
.eggs/
dist/
build/
# Environment
.env
.env.*
*.local
# IDE
.vscode/
.idea/
*.swp
*.swo
# Secrets - BE PARANOID
*.pem
*.key
*.crt
*.p12
credentials*
secrets*
tokens*
*_secret*
*_token*
*.credentials
# Logs and data
*.log
*.db
*.sqlite
*.sqlite3
*.backup
chat.db*
# OS
.DS_Store
Thumbs.db

28
__init__.py Normal file
View file

@ -0,0 +1,28 @@
"""
Egregore Database - Message storage and retrieval
This module handles all database operations for the chat system.
Currently SQLite, designed for easy migration to PostgreSQL later.
"""
from .messages import (
init_db,
save_message,
save_response_blocks,
get_messages,
get_conversation_history,
search_messages,
MESSAGE_PRIORITIES,
get_priority_for_type,
)
__all__ = [
"init_db",
"save_message",
"save_response_blocks",
"get_messages",
"get_conversation_history",
"search_messages",
"MESSAGE_PRIORITIES",
"get_priority_for_type",
]

116
main.py Normal file
View file

@ -0,0 +1,116 @@
#!/usr/bin/env python3
"""
Egregore DB Service - Message storage API
Provides HTTP API for message storage and retrieval.
Runs on port 8082.
"""
import os
from typing import Optional
from fastapi import FastAPI, Query
from pydantic import BaseModel
from messages import (
init_db,
save_message,
save_response_blocks,
get_messages,
get_conversation_history,
search_messages,
close_pool,
)
app = FastAPI(title="Egregore DB Service", docs_url="/docs")
@app.on_event("startup")
async def startup():
await init_db()
@app.on_event("shutdown")
async def shutdown():
await close_pool()
# Request models
class SaveMessageRequest(BaseModel):
role: str
content: str
msg_type: str = "text"
group_id: Optional[str] = None
metadata: Optional[dict] = None
priority: Optional[int] = None
class SaveBlocksRequest(BaseModel):
blocks: list
group_id: str
# Endpoints
@app.post("/messages")
async def api_save_message(req: SaveMessageRequest):
"""Save a single message"""
msg_id = await save_message(
role=req.role,
content=req.content,
msg_type=req.msg_type,
group_id=req.group_id,
metadata=req.metadata,
priority=req.priority
)
return {"id": msg_id}
@app.post("/messages/blocks")
async def api_save_blocks(req: SaveBlocksRequest):
"""Save multiple response blocks"""
saved = await save_response_blocks(req.blocks, req.group_id)
return {"messages": saved}
@app.get("/messages")
async def api_get_messages(
limit: int = 50,
before: Optional[int] = None,
msg_type: Optional[str] = Query(None, alias="type")
):
"""Get messages with pagination"""
messages, has_more = await get_messages(
limit=min(limit, 100),
before_id=before,
msg_type=msg_type
)
return {"messages": messages, "has_more": has_more}
@app.get("/messages/history")
async def api_get_history(limit: int = 100):
"""Get conversation history in Claude API format"""
history = await get_conversation_history(limit=limit)
return {"history": history}
@app.get("/messages/search")
async def api_search(
q: str,
limit: int = 20,
msg_type: Optional[str] = Query(None, alias="type")
):
"""Search messages by content"""
results = await search_messages(q, limit=limit, msg_type=msg_type)
return {"results": results}
@app.get("/health")
async def health():
"""Health check endpoint"""
return {"status": "ok", "service": "db"}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="127.0.0.1", port=8082)

371
messages.py Normal file
View file

@ -0,0 +1,371 @@
"""
Egregore Database - Message storage operations (PostgreSQL)
"""
import json
import os
import uuid
from datetime import datetime
from typing import Optional
import asyncpg
# Database connection URL - can be overridden via environment
DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://egregore:egregore_db_pass@localhost/egregore")
# Connection pool
_pool: Optional[asyncpg.Pool] = None
def set_db_url(url: str):
"""Set the database URL (call before init_db)"""
global DATABASE_URL
DATABASE_URL = url
async def get_pool() -> asyncpg.Pool:
"""Get or create the connection pool"""
global _pool
if _pool is None:
_pool = await asyncpg.create_pool(DATABASE_URL, min_size=2, max_size=10)
return _pool
async def close_pool():
"""Close the connection pool"""
global _pool
if _pool:
await _pool.close()
_pool = None
# Message type priorities for notifications
MESSAGE_PRIORITIES = {
"text": 2, # Regular messages - notify
"tool_use": 0, # Tool invocation - no notify
"tool_result": 0, # Tool output - no notify
"question": 3, # Questions to user - urgent notify
"mode_change": 1, # State transitions - silent
"thinking": 0, # Reasoning process - no notify
"error": 2, # Error messages - notify
}
def get_priority_for_type(msg_type: str, content: str = "") -> int:
"""Get priority for a message type, with question detection"""
base_priority = MESSAGE_PRIORITIES.get(msg_type, 0)
if msg_type == "text" and content.strip().endswith("?"):
return MESSAGE_PRIORITIES["question"]
return base_priority
async def init_db():
"""Initialize PostgreSQL database with messages table"""
pool = await get_pool()
async with pool.acquire() as conn:
await conn.execute("""
CREATE TABLE IF NOT EXISTS messages (
id SERIAL PRIMARY KEY,
role TEXT NOT NULL,
type TEXT NOT NULL DEFAULT 'text',
content TEXT NOT NULL,
group_id TEXT,
metadata JSONB,
priority INTEGER DEFAULT 0,
timestamp TIMESTAMPTZ NOT NULL DEFAULT NOW()
)
""")
# Create indexes for efficient querying
await conn.execute("CREATE INDEX IF NOT EXISTS idx_messages_role ON messages(role)")
await conn.execute("CREATE INDEX IF NOT EXISTS idx_messages_type ON messages(type)")
await conn.execute("CREATE INDEX IF NOT EXISTS idx_messages_group_id ON messages(group_id)")
await conn.execute("CREATE INDEX IF NOT EXISTS idx_messages_priority ON messages(priority)")
await conn.execute("CREATE INDEX IF NOT EXISTS idx_messages_timestamp ON messages(timestamp)")
# Full-text search index on content
await conn.execute("""
CREATE INDEX IF NOT EXISTS idx_messages_content_search
ON messages USING gin(to_tsvector('english', content))
""")
async def save_message(
role: str,
content: str,
msg_type: str = "text",
group_id: Optional[str] = None,
metadata: Optional[dict] = None,
priority: Optional[int] = None
) -> int:
"""Save a single message row"""
pool = await get_pool()
async with pool.acquire() as conn:
if priority is None:
priority = get_priority_for_type(msg_type, content)
row = await conn.fetchrow(
"""INSERT INTO messages (role, type, content, group_id, metadata, priority)
VALUES ($1, $2, $3, $4, $5, $6)
RETURNING id""",
role, msg_type, content, group_id, json.dumps(metadata) if metadata else None, priority
)
return row['id']
async def save_response_blocks(blocks: list, group_id: str) -> list:
"""
Save each response block as a separate row.
Returns list of saved message dicts with IDs for frontend.
"""
saved_messages = []
pool = await get_pool()
async with pool.acquire() as conn:
timestamp = datetime.utcnow()
for block in blocks:
block_type = block.get("type", "text")
content = ""
metadata = None
priority = MESSAGE_PRIORITIES.get(block_type, 0)
if block_type == "text":
content = block.get("content", "")
if content.strip().endswith("?"):
priority = MESSAGE_PRIORITIES["question"]
elif block_type == "tool_use":
content = json.dumps(block.get("input", {}))
metadata = {"tool_name": block.get("name"), "tool_id": block.get("id")}
elif block_type == "tool_result":
content = block.get("content", "")
metadata = {"tool_name": block.get("tool_name"), "tool_use_id": block.get("tool_use_id")}
else:
content = block.get("content", "")
row = await conn.fetchrow(
"""INSERT INTO messages (role, type, content, group_id, metadata, priority, timestamp)
VALUES ($1, $2, $3, $4, $5, $6, $7)
RETURNING id""",
"assistant", block_type, content, group_id,
json.dumps(metadata) if metadata else None, priority, timestamp
)
msg_id = row['id']
saved_messages.append({
"id": msg_id,
"role": "assistant",
"type": block_type,
"content": content,
"group_id": group_id,
"metadata": metadata,
"priority": priority,
"timestamp": timestamp.isoformat()
})
return saved_messages
async def get_messages(
limit: int = 50,
before_id: int = None,
msg_type: str = None
) -> tuple[list[dict], bool]:
"""Get messages with pagination. Returns (messages, has_more)"""
pool = await get_pool()
async with pool.acquire() as conn:
params = []
where_clauses = []
param_idx = 1
if before_id:
where_clauses.append(f"id < ${param_idx}")
params.append(before_id)
param_idx += 1
if msg_type:
where_clauses.append(f"type = ${param_idx}")
params.append(msg_type)
param_idx += 1
where_sql = " AND ".join(where_clauses) if where_clauses else "TRUE"
params.append(limit + 1)
rows = await conn.fetch(
f"""SELECT id, role, type, content, group_id, metadata, priority, timestamp
FROM messages
WHERE {where_sql}
ORDER BY id DESC
LIMIT ${param_idx}""",
*params
)
has_more = len(rows) > limit
rows = rows[:limit]
messages = []
for row in rows:
metadata = None
if row['metadata']:
try:
metadata = json.loads(row['metadata']) if isinstance(row['metadata'], str) else row['metadata']
except:
pass
messages.append({
"id": row['id'],
"role": row['role'],
"type": row['type'],
"content": row['content'],
"group_id": row['group_id'],
"metadata": metadata,
"priority": row['priority'],
"timestamp": row['timestamp'].isoformat() if row['timestamp'] else None
})
return list(reversed(messages)), has_more
async def get_conversation_history(limit: int = 100) -> list[dict]:
"""
Reconstruct Claude API message format from individual rows.
Groups assistant messages by group_id to build proper content arrays.
"""
messages, _ = await get_messages(limit)
api_messages = []
current_group = None
current_assistant_content = []
for msg in messages:
if msg["role"] == "user":
if current_assistant_content:
api_messages.append({
"role": "assistant",
"content": current_assistant_content
})
current_assistant_content = []
current_group = None
api_messages.append({
"role": "user",
"content": msg["content"]
})
elif msg["role"] == "assistant":
if msg["group_id"] != current_group:
if current_assistant_content:
api_messages.append({
"role": "assistant",
"content": current_assistant_content
})
current_assistant_content = []
current_group = msg["group_id"]
if msg["type"] == "text":
current_assistant_content.append({
"type": "text",
"text": msg["content"]
})
elif msg["type"] == "tool_use":
tool_input = {}
try:
tool_input = json.loads(msg["content"])
except:
pass
metadata = msg.get("metadata") or {}
current_assistant_content.append({
"type": "tool_use",
"id": metadata.get("tool_id", str(uuid.uuid4())),
"name": metadata.get("tool_name", "unknown"),
"input": tool_input
})
elif msg["type"] == "tool_result":
if current_assistant_content:
api_messages.append({
"role": "assistant",
"content": current_assistant_content
})
current_assistant_content = []
metadata = msg.get("metadata") or {}
api_messages.append({
"role": "user",
"content": [{
"type": "tool_result",
"tool_use_id": metadata.get("tool_use_id", ""),
"content": msg["content"]
}]
})
if current_assistant_content:
api_messages.append({
"role": "assistant",
"content": current_assistant_content
})
return api_messages
async def search_messages(
query: str,
limit: int = 20,
msg_type: str = None
) -> list[dict]:
"""Search messages using PostgreSQL full-text search"""
if len(query) < 2:
return []
pool = await get_pool()
async with pool.acquire() as conn:
params = [query]
param_idx = 2
type_filter = ""
if msg_type:
type_filter = f"AND type = ${param_idx}"
params.append(msg_type)
param_idx += 1
params.append(min(limit, 50))
# Use PostgreSQL full-text search with fallback to ILIKE
rows = await conn.fetch(
f"""SELECT id, role, type, content, group_id, metadata, priority, timestamp,
ts_headline('english', content, plainto_tsquery('english', $1),
'StartSel=**, StopSel=**, MaxWords=50, MinWords=20') as snippet
FROM messages
WHERE (to_tsvector('english', content) @@ plainto_tsquery('english', $1)
OR content ILIKE '%' || $1 || '%')
{type_filter}
ORDER BY id DESC
LIMIT ${param_idx}""",
*params
)
results = []
for row in rows:
metadata = None
if row['metadata']:
try:
metadata = json.loads(row['metadata']) if isinstance(row['metadata'], str) else row['metadata']
except:
pass
# Use ts_headline snippet, fallback to manual snippet
snippet = row['snippet'] if row['snippet'] else row['content'][:100]
results.append({
"id": row['id'],
"role": row['role'],
"type": row['type'],
"content": row['content'],
"group_id": row['group_id'],
"metadata": metadata,
"priority": row['priority'],
"timestamp": row['timestamp'].isoformat() if row['timestamp'] else None,
"snippet": snippet
})
return results
# Legacy compatibility - for migration
def set_db_path(path: str):
"""Legacy function for SQLite compatibility - ignored for PostgreSQL"""
pass

126
migrate_to_postgres.py Normal file
View file

@ -0,0 +1,126 @@
#!/usr/bin/env python3
"""
Migration script: SQLite to PostgreSQL
Migrates data from the old SQLite database to PostgreSQL.
Run once after setting up PostgreSQL.
"""
import asyncio
import json
import os
import sqlite3
from datetime import datetime
import asyncpg
# Paths
SQLITE_DB = "/home/admin/services/db/chat.db"
POSTGRES_URL = os.getenv("DATABASE_URL", "postgresql://egregore:egregore_db_pass@localhost/egregore")
async def migrate():
print(f"Connecting to PostgreSQL...")
pool = await asyncpg.create_pool(POSTGRES_URL)
async with pool.acquire() as conn:
# Create table if not exists
print("Creating messages table...")
await conn.execute("""
CREATE TABLE IF NOT EXISTS messages (
id SERIAL PRIMARY KEY,
role TEXT NOT NULL,
type TEXT NOT NULL DEFAULT 'text',
content TEXT NOT NULL,
group_id TEXT,
metadata JSONB,
priority INTEGER DEFAULT 0,
timestamp TIMESTAMPTZ NOT NULL DEFAULT NOW()
)
""")
# Create indexes
await conn.execute("CREATE INDEX IF NOT EXISTS idx_messages_role ON messages(role)")
await conn.execute("CREATE INDEX IF NOT EXISTS idx_messages_type ON messages(type)")
await conn.execute("CREATE INDEX IF NOT EXISTS idx_messages_group_id ON messages(group_id)")
await conn.execute("CREATE INDEX IF NOT EXISTS idx_messages_priority ON messages(priority)")
await conn.execute("CREATE INDEX IF NOT EXISTS idx_messages_timestamp ON messages(timestamp)")
await conn.execute("""
CREATE INDEX IF NOT EXISTS idx_messages_content_search
ON messages USING gin(to_tsvector('english', content))
""")
# Check if SQLite database exists
if not os.path.exists(SQLITE_DB):
print(f"No SQLite database found at {SQLITE_DB}, nothing to migrate.")
await pool.close()
return
# Connect to SQLite
print(f"Reading from SQLite: {SQLITE_DB}")
sqlite_conn = sqlite3.connect(SQLITE_DB)
sqlite_conn.row_factory = sqlite3.Row
cursor = sqlite_conn.cursor()
# Check if messages_v2 table exists
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='messages_v2'")
if not cursor.fetchone():
print("No messages_v2 table in SQLite, nothing to migrate.")
sqlite_conn.close()
await pool.close()
return
# Fetch all messages from SQLite
cursor.execute("""
SELECT id, role, type, content, group_id, metadata, priority, timestamp
FROM messages_v2
ORDER BY id ASC
""")
rows = cursor.fetchall()
print(f"Found {len(rows)} messages to migrate")
if not rows:
print("No messages to migrate.")
sqlite_conn.close()
await pool.close()
return
# Insert into PostgreSQL
async with pool.acquire() as conn:
migrated = 0
for row in rows:
timestamp = datetime.fromisoformat(row['timestamp']) if row['timestamp'] else datetime.utcnow()
await conn.execute(
"""INSERT INTO messages (role, type, content, group_id, metadata, priority, timestamp)
VALUES ($1, $2, $3, $4, $5, $6, $7)""",
row['role'],
row['type'],
row['content'],
row['group_id'],
row['metadata'], # Already JSON string, PostgreSQL will handle it
row['priority'],
timestamp
)
migrated += 1
print(f"Migrated {migrated} messages to PostgreSQL")
# Verify
async with pool.acquire() as conn:
count = await conn.fetchval("SELECT COUNT(*) FROM messages")
print(f"PostgreSQL now has {count} messages")
sqlite_conn.close()
await pool.close()
# Backup SQLite database
backup_path = SQLITE_DB + ".backup"
print(f"Backing up SQLite database to {backup_path}")
os.rename(SQLITE_DB, backup_path)
print("Migration complete!")
if __name__ == "__main__":
asyncio.run(migrate())