update db str
This commit is contained in:
1
backend/scripts/__init__.py
Normal file
1
backend/scripts/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Database migration and setup scripts for Grateful Journal."""
|
||||
136
backend/scripts/create_indexes.py
Normal file
136
backend/scripts/create_indexes.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""
|
||||
MongoDB Index Creation Script
|
||||
|
||||
Creates all necessary indexes for optimized queries.
|
||||
Run this script after migration to ensure indexes are in place.
|
||||
|
||||
Usage:
|
||||
python backend/scripts/create_indexes.py
|
||||
"""
|
||||
|
||||
from pymongo import MongoClient
|
||||
from config import get_settings
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
|
||||
def create_indexes():
|
||||
"""Create all required MongoDB indexes."""
|
||||
|
||||
settings = get_settings()
|
||||
client = MongoClient(settings.mongodb_uri)
|
||||
db = client[settings.mongodb_db_name]
|
||||
|
||||
print(f"✓ Connected to MongoDB: {settings.mongodb_db_name}\n")
|
||||
|
||||
indexes_created = []
|
||||
|
||||
# ========== USERS COLLECTION INDEXES ==========
|
||||
print("Creating indexes for 'users' collection...")
|
||||
|
||||
# Unique index on email
|
||||
try:
|
||||
db.users.create_index(
|
||||
[("email", 1)],
|
||||
unique=True,
|
||||
name="email_unique"
|
||||
)
|
||||
indexes_created.append(("users", "email_unique"))
|
||||
print(" ✓ Created unique index on email")
|
||||
except Exception as e:
|
||||
print(f" ⚠ Email index: {e}")
|
||||
|
||||
# Index on createdAt for sorting
|
||||
try:
|
||||
db.users.create_index(
|
||||
[("createdAt", -1)],
|
||||
name="createdAt_desc"
|
||||
)
|
||||
indexes_created.append(("users", "createdAt_desc"))
|
||||
print(" ✓ Created index on createdAt")
|
||||
except Exception as e:
|
||||
print(f" ⚠ createdAt index: {e}")
|
||||
|
||||
# ========== ENTRIES COLLECTION INDEXES ==========
|
||||
print("\nCreating indexes for 'entries' collection...")
|
||||
|
||||
# Compound index: userId + createdAt (for history pagination)
|
||||
try:
|
||||
db.entries.create_index(
|
||||
[("userId", 1), ("createdAt", -1)],
|
||||
name="userId_createdAt"
|
||||
)
|
||||
indexes_created.append(("entries", "userId_createdAt"))
|
||||
print(" ✓ Created compound index on (userId, createdAt)")
|
||||
except Exception as e:
|
||||
print(f" ⚠ userId_createdAt index: {e}")
|
||||
|
||||
# Compound index: userId + entryDate (for calendar queries)
|
||||
try:
|
||||
db.entries.create_index(
|
||||
[("userId", 1), ("entryDate", 1)],
|
||||
name="userId_entryDate"
|
||||
)
|
||||
indexes_created.append(("entries", "userId_entryDate"))
|
||||
print(" ✓ Created compound index on (userId, entryDate)")
|
||||
except Exception as e:
|
||||
print(f" ⚠ userId_entryDate index: {e}")
|
||||
|
||||
# Index on tags for searching (optional, for future)
|
||||
try:
|
||||
db.entries.create_index(
|
||||
[("tags", 1)],
|
||||
name="tags"
|
||||
)
|
||||
indexes_created.append(("entries", "tags"))
|
||||
print(" ✓ Created index on tags")
|
||||
except Exception as e:
|
||||
print(f" ⚠ tags index: {e}")
|
||||
|
||||
# Index on entryDate range queries (for calendar)
|
||||
try:
|
||||
db.entries.create_index(
|
||||
[("entryDate", -1)],
|
||||
name="entryDate_desc"
|
||||
)
|
||||
indexes_created.append(("entries", "entryDate_desc"))
|
||||
print(" ✓ Created index on entryDate")
|
||||
except Exception as e:
|
||||
print(f" ⚠ entryDate index: {e}")
|
||||
|
||||
# TTL Index on entries (optional: for auto-deleting old entries if needed)
|
||||
# Uncomment if you want entries to auto-delete after 2 years
|
||||
# try:
|
||||
# db.entries.create_index(
|
||||
# [("createdAt", 1)],
|
||||
# expireAfterSeconds=63072000, # 2 years
|
||||
# name="createdAt_ttl"
|
||||
# )
|
||||
# print(" ✓ Created TTL index on createdAt (2 years)")
|
||||
# except Exception as e:
|
||||
# print(f" ⚠ TTL index: {e}")
|
||||
|
||||
# ========== SUMMARY ==========
|
||||
print(f"\n{'='*60}")
|
||||
print(f"✓ Index Creation Complete")
|
||||
print(f"{'='*60}")
|
||||
print(f"Total indexes created: {len(indexes_created)}")
|
||||
for collection, index_name in indexes_created:
|
||||
print(f" • {collection}.{index_name}")
|
||||
|
||||
# Optional: Print summary of all indexes
|
||||
print(f"\n{'='*60}")
|
||||
print("All Indexes Summary")
|
||||
print(f"{'='*60}")
|
||||
|
||||
for collection_name in ["users", "entries"]:
|
||||
print(f"\n{collection_name}:")
|
||||
collection = db[collection_name]
|
||||
for index_info in collection.list_indexes():
|
||||
print(f" • {index_info['name']}")
|
||||
|
||||
client.close()
|
||||
print("\n✓ Disconnected from MongoDB")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
create_indexes()
|
||||
248
backend/scripts/migrate_data.py
Normal file
248
backend/scripts/migrate_data.py
Normal file
@@ -0,0 +1,248 @@
|
||||
"""
|
||||
MongoDB Data Migration Script
|
||||
|
||||
Migrates data from the old schema to the new refactored schema.
|
||||
|
||||
Changes performed:
|
||||
1. Deduplicate users by email (keep oldest)
|
||||
2. Convert entries.userId from string to ObjectId
|
||||
3. Add entryDate field to entries (defaults to createdAt)
|
||||
4. Add encryption metadata to entries
|
||||
5. Create compound indexes
|
||||
|
||||
Usage:
|
||||
python backend/scripts/migrate_data.py
|
||||
|
||||
IMPORTANT: Backup your database before running this script!
|
||||
mongodump --db grateful_journal_old --out ./backup
|
||||
"""
|
||||
|
||||
from pymongo import MongoClient
|
||||
from bson import ObjectId
|
||||
from datetime import datetime
|
||||
from config import get_settings
|
||||
from typing import Dict, List, Set
|
||||
import sys
|
||||
|
||||
|
||||
def migrate_data():
|
||||
"""Perform complete data migration."""
|
||||
|
||||
settings = get_settings()
|
||||
client = MongoClient(settings.mongodb_uri)
|
||||
db = client[settings.mongodb_db_name]
|
||||
|
||||
print(f"✓ Connected to MongoDB: {settings.mongodb_db_name}\n")
|
||||
|
||||
# ========== STEP 1: DEDUPLICATE USERS ==========
|
||||
print("=" * 70)
|
||||
print("STEP 1: Deduplicating Users (keeping oldest)")
|
||||
print("=" * 70)
|
||||
|
||||
duplicate_count = 0
|
||||
user_mapping = {} # Maps old duplicates to canonical user ID
|
||||
|
||||
# Group users by email
|
||||
email_groups = {}
|
||||
for user in db.users.find():
|
||||
email = user["email"]
|
||||
if email not in email_groups:
|
||||
email_groups[email] = []
|
||||
email_groups[email].append(user)
|
||||
|
||||
# Process each email group
|
||||
for email, users in email_groups.items():
|
||||
if len(users) > 1:
|
||||
# Sort by createdAt, keep oldest
|
||||
users.sort(key=lambda u: u["createdAt"])
|
||||
canonical_user = users[0]
|
||||
canonical_id = canonical_user["_id"]
|
||||
|
||||
print(f"\n📧 Email: {email}")
|
||||
print(f" Found {len(users)} duplicate users")
|
||||
print(f" Keeping (earliest): {canonical_id}")
|
||||
|
||||
# Map all other users to canonical
|
||||
for dup_user in users[1:]:
|
||||
dup_id = dup_user["_id"]
|
||||
user_mapping[str(dup_id)] = canonical_id
|
||||
duplicate_count += 1
|
||||
print(f" Deleting (later): {dup_id}")
|
||||
|
||||
# Delete duplicate users
|
||||
for user in users[1:]:
|
||||
db.users.delete_one({"_id": user["_id"]})
|
||||
|
||||
if duplicate_count == 0:
|
||||
print("\n✓ No duplicate users found")
|
||||
else:
|
||||
print(f"\n✓ Removed {duplicate_count} duplicate users")
|
||||
|
||||
# ========== STEP 2: MIGRATE ENTRIES ==========
|
||||
print("\n" + "=" * 70)
|
||||
print("STEP 2: Migrating Entries (userId string → ObjectId, add entryDate)")
|
||||
print("=" * 70)
|
||||
|
||||
total_entries = db.entries.count_documents({})
|
||||
entries_updated = 0
|
||||
entries_with_issues = []
|
||||
|
||||
print(f"\nTotal entries to process: {total_entries}\n")
|
||||
|
||||
for entry in db.entries.find():
|
||||
try:
|
||||
entry_id = entry["_id"]
|
||||
old_user_id_str = entry.get("userId", "")
|
||||
|
||||
# Convert userId: string → ObjectId
|
||||
if isinstance(old_user_id_str, str):
|
||||
# Check if this userId is in the duplicate mapping
|
||||
if old_user_id_str in user_mapping:
|
||||
new_user_id = user_mapping[old_user_id_str]
|
||||
print(f" → Entry {entry_id}: userId mapped {old_user_id_str[:8]}... → {str(new_user_id)[:8]}...")
|
||||
else:
|
||||
new_user_id = ObjectId(old_user_id_str)
|
||||
|
||||
update_data = {
|
||||
"userId": new_user_id,
|
||||
}
|
||||
else:
|
||||
# Already an ObjectId
|
||||
new_user_id = old_user_id_str
|
||||
update_data = {}
|
||||
|
||||
# Add entryDate if missing (default to createdAt)
|
||||
if "entryDate" not in entry:
|
||||
entry_date = entry.get("createdAt", datetime.utcnow())
|
||||
# Set to start of day
|
||||
entry_date = entry_date.replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
update_data["entryDate"] = entry_date
|
||||
|
||||
# Add encryption metadata if missing
|
||||
if "encryption" not in entry:
|
||||
update_data["encryption"] = {
|
||||
"encrypted": False,
|
||||
"iv": None,
|
||||
"algorithm": None
|
||||
}
|
||||
|
||||
# Perform update if there are changes
|
||||
if update_data:
|
||||
update_data["updatedAt"] = datetime.utcnow()
|
||||
db.entries.update_one(
|
||||
{"_id": entry_id},
|
||||
{"$set": update_data}
|
||||
)
|
||||
entries_updated += 1
|
||||
|
||||
if entries_updated % 100 == 0:
|
||||
print(f" ✓ Processed {entries_updated}/{total_entries} entries")
|
||||
|
||||
except Exception as e:
|
||||
entries_with_issues.append({
|
||||
"entry_id": str(entry_id),
|
||||
"error": str(e)
|
||||
})
|
||||
print(f" ⚠ Error processing entry {entry_id}: {e}")
|
||||
|
||||
print(f"\n✓ Updated {entries_updated}/{total_entries} entries")
|
||||
|
||||
if entries_with_issues:
|
||||
print(f"\n⚠ {len(entries_with_issues)} entries had issues:")
|
||||
for issue in entries_with_issues[:5]: # Show first 5
|
||||
print(f" - {issue['entry_id']}: {issue['error']}")
|
||||
|
||||
# ========== STEP 3: VERIFY DATA INTEGRITY ==========
|
||||
print("\n" + "=" * 70)
|
||||
print("STEP 3: Verifying Data Integrity")
|
||||
print("=" * 70)
|
||||
|
||||
# Check for orphaned entries (userId doesn't exist in users)
|
||||
orphaned_count = 0
|
||||
users_ids = set(str(u["_id"]) for u in db.users.find({}, {"_id": 1}))
|
||||
|
||||
for entry in db.entries.find({}, {"userId": 1}):
|
||||
user_id = entry.get("userId")
|
||||
if isinstance(user_id, ObjectId):
|
||||
user_id = str(user_id)
|
||||
if user_id not in users_ids:
|
||||
orphaned_count += 1
|
||||
|
||||
print(f"\nUsers collection: {db.users.count_documents({})}")
|
||||
print(f"Entries collection: {db.entries.count_documents({})}")
|
||||
|
||||
if orphaned_count > 0:
|
||||
print(f"\n⚠ WARNING: Found {orphaned_count} orphaned entries (no corresponding user)")
|
||||
else:
|
||||
print(f"✓ All entries have valid user references")
|
||||
|
||||
# Sample entry check
|
||||
sample_entry = db.entries.find_one()
|
||||
if sample_entry:
|
||||
print(f"\nSample entry structure:")
|
||||
print(f" _id (entry): {sample_entry['_id']} (ObjectId: {isinstance(sample_entry['_id'], ObjectId)})")
|
||||
print(f" userId: {sample_entry.get('userId')} (ObjectId: {isinstance(sample_entry.get('userId'), ObjectId)})")
|
||||
print(f" entryDate present: {'entryDate' in sample_entry}")
|
||||
print(f" encryption present: {'encryption' in sample_entry}")
|
||||
if "entryDate" in sample_entry:
|
||||
print(f" → entryDate: {sample_entry['entryDate'].isoformat()}")
|
||||
if "encryption" in sample_entry:
|
||||
print(f" → encryption: {sample_entry['encryption']}")
|
||||
|
||||
# ========== SUMMARY ==========
|
||||
print(f"\n{'='*70}")
|
||||
print("✓ Migration Complete")
|
||||
print(f"{'='*70}")
|
||||
print(f"Duplicate users removed: {duplicate_count}")
|
||||
print(f"Entries migrated: {entries_updated}")
|
||||
print(f"Orphaned entries found: {orphaned_count}")
|
||||
|
||||
if orphaned_count == 0:
|
||||
print("\n✓ Data integrity verified successfully!")
|
||||
else:
|
||||
print(f"\n⚠ Please review {orphaned_count} orphaned entries")
|
||||
|
||||
client.close()
|
||||
print("\n✓ Disconnected from MongoDB")
|
||||
|
||||
|
||||
def rollback_warning():
|
||||
"""Display rollback warning."""
|
||||
print("\n" + "!" * 70)
|
||||
print("⚠ IMPORTANT REMINDERS")
|
||||
print("!" * 70)
|
||||
print("""
|
||||
This script modifies your MongoDB database. Before running:
|
||||
|
||||
1. BACKUP YOUR DATABASE:
|
||||
mongodump --db grateful_journal --out ./backup-$(date +%Y%m%d)
|
||||
|
||||
2. TEST IN DEVELOPMENT first
|
||||
|
||||
3. This migration includes:
|
||||
- Removing duplicate users
|
||||
- Converting userId field types
|
||||
- Adding new entryDate field
|
||||
- Adding encryption metadata
|
||||
|
||||
4. All changes are permanent unless you restore from backup
|
||||
|
||||
5. This script is idempotent for most operations (safe to run multiple times)
|
||||
but the deduplication will only work on the first run.
|
||||
""")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
rollback_warning()
|
||||
|
||||
response = input("\nDo you want to proceed with migration? (yes/no): ").strip().lower()
|
||||
if response != "yes":
|
||||
print("Migration cancelled.")
|
||||
sys.exit(0)
|
||||
|
||||
try:
|
||||
migrate_data()
|
||||
except Exception as e:
|
||||
print(f"\n✗ Migration failed with error:")
|
||||
print(f" {e}")
|
||||
sys.exit(1)
|
||||
Reference in New Issue
Block a user