eventstore: index tag letter together with the value.

This commit is contained in:
fiatjaf
2025-06-10 13:46:29 -03:00
parent 287d8c23c7
commit e9a08d669e
8 changed files with 250 additions and 98 deletions

View File

@@ -99,14 +99,16 @@ func (b *LMDBBackend) getIndexKeysForEvent(evt nostr.Event) iter.Seq[key] {
// not indexable
continue
}
firstIndex := slices.IndexFunc(evt.Tags, func(t nostr.Tag) bool { return len(t) >= 2 && t[1] == tag[1] })
firstIndex := slices.IndexFunc(evt.Tags, func(t nostr.Tag) bool {
return len(t) >= 2 && t[0] == tag[0] && t[1] == tag[1]
})
if firstIndex != i {
// duplicate
continue
}
// get key prefix (with full length) and offset where to write the created_at
dbi, k, offset := b.getTagIndexPrefix(tag[1])
dbi, k, offset := b.getTagIndexPrefix(tag[0], tag[1])
binary.BigEndian.PutUint32(k[offset:], uint32(evt.CreatedAt))
if !yield(key{dbi: dbi, key: k}) {
return
@@ -136,47 +138,53 @@ func (b *LMDBBackend) getIndexKeysForEvent(evt nostr.Event) iter.Seq[key] {
}
}
func (b *LMDBBackend) getTagIndexPrefix(tagValue string) (lmdb.DBI, []byte, int) {
func (b *LMDBBackend) getTagIndexPrefix(tagName string, tagValue string) (lmdb.DBI, []byte, int) {
var k []byte // the key with full length for created_at and idx at the end, but not filled with these
var offset int // the offset -- i.e. where the prefix ends and the created_at and idx would start
var dbi lmdb.DBI
letterPrefix := byte(int(tagName[0]) % 256)
// if it's 32 bytes as hex, save it as bytes
if len(tagValue) == 64 {
// but we actually only use the first 8 bytes
k = make([]byte, 8+4)
if _, err := hex.Decode(k[0:8], []byte(tagValue[0:8*2])); err == nil {
offset = 8
// but we actually only use the first 8 bytes, with tag name prefix
k = make([]byte, 1+8+4)
if _, err := hex.Decode(k[1:1+8], []byte(tagValue[0:8*2])); err == nil {
k[0] = letterPrefix
offset = 1 + 8
dbi = b.indexTag32
return dbi, k[0 : 8+4], offset
return dbi, k[0 : 1+8+4], offset
}
}
// if it looks like an "a" tag, index it in this special format
// if it looks like an "a" tag, index it in this special format (no tag name prefix for special indexes)
spl := strings.Split(tagValue, ":")
if len(spl) == 3 && len(spl[1]) == 64 {
k = make([]byte, 2+8+30)
if _, err := hex.Decode(k[2:2+8], []byte(tagValue[0:8*2])); err == nil {
k = make([]byte, 1+2+8+30)
if _, err := hex.Decode(k[1+2:1+2+8], []byte(spl[1][0:8*2])); err == nil {
if kind, err := strconv.ParseUint(spl[0], 10, 16); err == nil {
k[0] = byte(kind >> 8)
k[1] = byte(kind)
k[0] = letterPrefix
k[1] = byte(kind >> 8)
k[2] = byte(kind)
// limit "d" identifier to 30 bytes (so we don't have to grow our byte slice)
n := copy(k[2+8:2+8+30], spl[2])
offset = 2 + 8 + n
n := copy(k[1+2+8:1+2+8+30], spl[2])
offset = 1 + 2 + 8 + n
dbi = b.indexTagAddr
return dbi, k[0 : offset+4], offset
}
}
}
// index whatever else as a md5 hash of the contents
// index whatever else as a md5 hash of the contents with tag name prefix
h := md5.New()
h.Write([]byte(tagValue))
k = make([]byte, 0, 16+4)
k = make([]byte, 1, 1+16+4)
k[0] = letterPrefix
k = h.Sum(k)
offset = 16
offset = 1 + 16
dbi = b.indexTag
return dbi, k[0 : 16+4], offset
return dbi, k[0 : 1+16+4], offset
}
func (b *LMDBBackend) dbiName(dbi lmdb.DBI) string {

View File

@@ -3,7 +3,10 @@ package lmdb
import (
"encoding/binary"
"fmt"
"log"
"fiatjaf.com/nostr"
"fiatjaf.com/nostr/eventstore/codec/betterbinary"
"github.com/PowerDNS/lmdb-go/lmdb"
)
@@ -13,36 +16,91 @@ const (
func (b *LMDBBackend) runMigrations() error {
return b.lmdbEnv.Update(func(txn *lmdb.Txn) error {
var version uint16
v, err := txn.Get(b.settingsStore, []byte{DB_VERSION})
if err != nil {
if lmdb.IsNotFound(err) {
version = 0
} else if v == nil {
return fmt.Errorf("failed to read database version: %w", err)
}
} else {
version = binary.BigEndian.Uint16(v)
val, err := txn.Get(b.settingsStore, []byte("version"))
if err != nil && !lmdb.IsNotFound(err) {
return fmt.Errorf("failed to get db version: %w", err)
}
var version uint16 = 0
if err == nil {
version = binary.BigEndian.Uint16(val)
}
// do the migrations in increasing steps (there is no rollback)
//
// this is when we reindex everything
if version < 1 {
}
if version < 9 {
log.Println("[lmdb] migration 9: reindex everything")
// bump version
// if err := b.setVersion(txn, 1); err != nil {
// return err
// }
if err := txn.Drop(b.indexId, false); err != nil {
return err
}
if err := txn.Drop(b.indexKind, false); err != nil {
return err
}
if err := txn.Drop(b.indexPubkey, false); err != nil {
return err
}
if err := txn.Drop(b.indexPubkeyKind, false); err != nil {
return err
}
if err := txn.Drop(b.indexTag, false); err != nil {
return err
}
if err := txn.Drop(b.indexTag32, false); err != nil {
return err
}
if err := txn.Drop(b.indexTagAddr, false); err != nil {
return err
}
if err := txn.Drop(b.indexPTagKind, false); err != nil {
return err
}
cursor, err := txn.OpenCursor(b.rawEventStore)
if err != nil {
return fmt.Errorf("failed to open cursor in migration 9: %w", err)
}
defer cursor.Close()
var idx, val []byte
var evt nostr.Event
for {
idx, val, err = cursor.Get(nil, nil, lmdb.Next)
if lmdb.IsNotFound(err) {
break
}
if err != nil {
return fmt.Errorf("failed to get next in migration 9: %w", err)
}
if err := betterbinary.Unmarshal(val, &evt); err != nil {
log.Printf("failed to unmarshal event %x, skipping: %s", idx, err)
continue
}
for key := range b.getIndexKeysForEvent(evt) {
if err := txn.Put(key.dbi, key.key, idx, 0); err != nil {
return fmt.Errorf("failed to save index %s for event %s (%v) on migration 9: %w",
b.keyName(key), evt.ID, idx, err)
}
}
}
// bump version
if err := b.setVersion(txn, 9); err != nil {
return err
}
}
return nil
})
}
func (b *LMDBBackend) setVersion(txn *lmdb.Txn, version uint16) error {
buf, err := txn.PutReserve(b.settingsStore, []byte{DB_VERSION}, 4, 0)
binary.BigEndian.PutUint16(buf, version)
return err
func (b *LMDBBackend) setVersion(txn *lmdb.Txn, v uint16) error {
var newVersion [2]byte
binary.BigEndian.PutUint16(newVersion[:], v)
return txn.Put(b.settingsStore, []byte("version"), newVersion[:], 0)
}

View File

@@ -118,11 +118,10 @@ func (b *LMDBBackend) prepareQueries(filter nostr.Filter) (
queries = make([]query, len(tagValues))
for i, value := range tagValues {
// get key prefix (with full length) and offset where to write the created_at
dbi, k, offset := b.getTagIndexPrefix(value)
dbi, k, offset := b.getTagIndexPrefix(tagKey, value)
// remove the last parts part to get just the prefix we want here
prefix := k[0:offset]
queries[i] = query{i: i, dbi: dbi, prefix: prefix, keySize: len(prefix) + 4, timestampSize: 4}
i++
}
// add an extra kind filter if available (only do this on plain tag index, not on ptag-kind index)