From 4cbf64ecd25b5ebcd1b9f79f80c49732e34dd03a Mon Sep 17 00:00:00 2001 From: Alcor Date: Thu, 12 Mar 2026 17:49:46 +0100 Subject: [PATCH 1/3] Tree-based autocompletion (v2) Store completions in a treap (tree-heap) structure, thus making lookups & searches O(log n) --- Makefile | 3 +- chat.c | 1 + chat.h | 4 +- complete.c | 243 +++++++++++++++++--------------- treap.c | 401 +++++++++++++++++++++++++++++++++++++++++++++++++++++ treap.h | 65 +++++++++ 6 files changed, 606 insertions(+), 111 deletions(-) create mode 100644 treap.c create mode 100644 treap.h diff --git a/Makefile b/Makefile index e08e8e3..0a4d7ac 100644 --- a/Makefile +++ b/Makefile @@ -27,6 +27,7 @@ OBJS += handle.o OBJS += input.o OBJS += irc.o OBJS += log.o +OBJS += treap.o OBJS += ui.o OBJS += url.o OBJS += window.o @@ -34,7 +35,7 @@ OBJS += xdg.o OBJS.sandman = sandman.o -TESTS += edit.t +TESTS += edit.t treap.t dev: tags all check diff --git a/chat.c b/chat.c index 755bd24..daefcec 100644 --- a/chat.c +++ b/chat.c @@ -351,6 +351,7 @@ int main(int argc, char *argv[]) { self.plainPass = readpassphrase("Account password: ", buf, 512, 0); if (!self.plainPass) errx(1, "unable to read passphrase"); } + srand(time(NULL)); // Modes defined in RFC 1459: set(&network.chanTypes, "#&"); diff --git a/chat.h b/chat.h index bd69b5e..329b199 100644 --- a/chat.h +++ b/chat.h @@ -408,9 +408,11 @@ int bufferReflow( struct Buffer *buffer, int cols, enum Heat thresh, size_t tail ); +struct treap; struct Cursor { uint gen; - struct Node *node; + struct treap *node; + uint n; }; void completePush(uint id, const char *str, enum Color color); void completePull(uint id, const char *str, enum Color color); diff --git a/complete.c b/complete.c index d7108e6..7c24acc 100644 --- a/complete.c +++ b/complete.c @@ -28,178 +28,203 @@ #include #include #include +#include +#include #include "chat.h" +#include "treap.h" -struct Node { - uint id; +struct Entry { char *str; enum Color color; uint bits; - struct Node *prev; - struct Node *next; + unsigned char on[IDCap / CHAR_BIT]; }; static uint gen; -static struct Node *head; -static struct Node *tail; - -static struct Node *alloc(uint id, const char *str, enum Color color) { - struct Node *node = calloc(1, sizeof(*node)); - if (!node) err(1, "calloc"); - node->id = id; - node->str = strdup(str); - if (!node->str) err(1, "strdup"); - node->color = color; - node->bits = 0; - return node; -} - -static struct Node *detach(struct Node *node) { - if (node->prev) node->prev->next = node->next; - if (node->next) node->next->prev = node->prev; - if (head == node) head = node->next; - if (tail == node) tail = node->prev; - node->prev = NULL; - node->next = NULL; - return node; -} - -static struct Node *prepend(struct Node *node) { - node->prev = NULL; - node->next = head; - if (head) head->prev = node; - head = node; - tail = (tail ?: node); - return node; -} - -static struct Node *append(struct Node *node) { - node->next = NULL; - node->prev = tail; - if (tail) tail->next = node; - tail = node; - head = (head ?: node); - return node; -} - -static struct Node *find(uint id, const char *str) { - for (struct Node *node = head; node; node = node->next) { - if (node->id == id && !strcmp(node->str, str)) return node; +static treap *root; + +static int entryCmp(const void *a, const void *b) { + const struct Entry *ea = a, *eb = b; + return strcasecmp(ea->str, eb->str); +} + +static bool entryInUse(const struct Entry *e) { + for (size_t i = 0; i < ARRAY_LEN(e->on); ++i) { + if (e->on[i]) return true; } - return NULL; + return false; +} + +static void entrySetID(struct Entry *e, uint id) { + assert(id < IDCap); + e->on[id / CHAR_BIT] |= (unsigned char)(1 << (id % CHAR_BIT)); +} + +static bool entryContains(const struct Entry *e, uint id) { + assert(id < IDCap); + return (e->on[id / CHAR_BIT] & ((unsigned char)(1 << (id % CHAR_BIT)))) != 0; +} + +static void deleteEntry(struct Entry *e, uint id) { + assert(id < IDCap); + if (id) { + e->on[id / CHAR_BIT] &= ~((unsigned char)(1 << (id % CHAR_BIT))); + if (entryInUse(e)) goto invalidate; + } + free(e->str); + treaprem(&root, (treap *)treapcontainer(e)); +invalidate: + ++gen; } void completePush(uint id, const char *str, enum Color color) { - struct Node *node = find(id, str); + struct Entry entry = { .str = (char *)str }; + const struct treap *temp = root; + const struct treap *node = treapget(&temp, entryCmp, &entry); if (node) { - if (color != Default) node->color = color; + struct Entry *data = (struct Entry *)treapdata(node); + data->color = color; + entrySetID(data, id); + if (strcmp(str, data->str)) { + free(data->str); + data->str = strdup(str); + if (!data->str) err(1, "strdup"); + } } else { - append(alloc(id, str, color)); + struct Entry data = { .str = strdup(str), .color = color }; + if (!data.str) err(1, "strdup"); + entrySetID(&data, id); + treap *t = treapush(&root, entryCmp, -rand(), sizeof(data), &data); + if (!t) err(1, "treapush"); } } void completePull(uint id, const char *str, enum Color color) { - struct Node *node = find(id, str); + struct Entry entry = { .str = (char *)str }; + const treap *temp = root; + const treap *node = treapget(&temp, entryCmp, &entry); if (node) { - if (color != Default) node->color = color; - prepend(detach(node)); + struct Entry *data = (struct Entry *)treapdata(node); + if (color != Default) data->color = color; + entrySetID(data, id); } else { - prepend(alloc(id, str, color)); + struct Entry data = { .str = strdup(str), .color = color }; + if (!data.str) err(1, "strdup"); + entrySetID(&data, id); + treap *t = treapushat(&root, (treap *)temp, entryCmp, -rand(), sizeof(data), &data); + if (!t) err(1, "treapushat"); } } void completeReplace(const char *old, const char *new) { - struct Node *next = NULL; - for (struct Node *node = head; node; node = next) { - next = node->next; - if (strcmp(node->str, old)) continue; - free(node->str); - node->str = strdup(new); - if (!node->str) err(1, "strdup"); - prepend(detach(node)); - } + struct Entry entry = { .str = (char *)old }; + const treap *temp = root; + const treap *node = treapget(&temp, entryCmp, &entry); + if (!node) return; + struct Entry *e = (struct Entry *)treapdata(node); + free(e->str); + e->str = strdup(new); + if (!e->str) err(1, "strdup"); } void completeRemove(uint id, const char *str) { - struct Node *next = NULL; - for (struct Node *node = head; node; node = next) { - next = node->next; - if (id && node->id != id) continue; - if (str && strcmp(node->str, str)) continue; - detach(node); - free(node->str); - free(node); + if (str) { + struct Entry entry = { .str = (char *)str }; + const treap *temp = root; + const treap *node = treapget(&temp, entryCmp, &entry); + if (!node) return; + deleteEntry((struct Entry *)treapdata(node), id); + } else { + treap *t = (treap *)treapiter(root); + while (t) { + struct Entry *data = (struct Entry *)treapdata(t); + t = (treap *)treapnext(t); + deleteEntry(data, id); + } } - gen++; } enum Color completeColor(uint id, const char *str) { - struct Node *node = find(id, str); - return (node ? node->color : Default); + struct Entry entry = { .str = (char *)str }; + const treap *temp = root; + const treap *node = treapget(&temp, entryCmp, &entry); + if (node) { + struct Entry *data = (struct Entry *)treapdata(node); + if (entryContains(data, id)) return data->color; + } + return Default; } uint *completeBits(uint id, const char *str) { - struct Node *node = find(id, str); - return (node ? &node->bits : NULL); + struct Entry entry = { .str = (char *)str }; + const treap *temp = root; + const treap *node = treapget(&temp, entryCmp, &entry); + if (node) { + struct Entry *data = (struct Entry *)treapdata(node); + if (entryContains(data, id)) return &data->bits; + } + return NULL; } const char *completePrefix(struct Cursor *curs, uint id, const char *prefix) { size_t len = strlen(prefix); if (curs->gen != gen) curs->node = NULL; - for ( - curs->gen = gen, curs->node = (curs->node ? curs->node->next : head); - curs->node; - curs->node = curs->node->next - ) { - if (curs->node->id && curs->node->id != id) continue; - if (!strncasecmp(curs->node->str, prefix, len)) return curs->node->str; + curs->gen = gen; + struct Entry query = { .str = (char *)prefix }; + curs->node = curs->node ? (treap *)treapnext(curs->node) : (treap *)treapfirst(root, entryCmp, &query); + for (; curs->node; curs->node = (treap *)treapnext(curs->node)) { + const struct Entry *data = treapdata(curs->node); + if (!entryContains(data, None) && !entryContains(data, id)) continue; + if (!strncasecmp(data->str, prefix, len)) return data->str; } return NULL; } const char *completeSubstr(struct Cursor *curs, uint id, const char *substr) { if (curs->gen != gen) curs->node = NULL; - for ( - curs->gen = gen, curs->node = (curs->node ? curs->node->next : head); - curs->node; - curs->node = curs->node->next - ) { - if (curs->node->id && curs->node->id != id) continue; - if (strstr(curs->node->str, substr)) return curs->node->str; + curs->gen = gen; + curs->node = curs->node ? (treap *)treapnext(curs->node) : (treap *)treapiter(root); + for (; curs->node; curs->node = (treap *)treapnext(curs->node)) { + const struct Entry *data = treapdata(curs->node); + if (!entryContains(data, None) && !entryContains(data, id)) continue; + if (strstr(data->str, substr)) return data->str; } return NULL; } const char *completeEach(struct Cursor *curs, uint id) { if (curs->gen != gen) curs->node = NULL; - for ( - curs->gen = gen, curs->node = (curs->node ? curs->node->next : head); - curs->node; - curs->node = curs->node->next - ) { - if (curs->node->id == id) return curs->node->str; + curs->gen = gen; + curs->node = curs->node ? (treap *)treapnext(curs->node) : (treap *)treapiter(root); + for (; curs->node; curs->node = (treap *)treapnext(curs->node)) { + const struct Entry *data = treapdata(curs->node); + if (entryContains(data, id)) return data->str; } return NULL; } uint completeEachID(struct Cursor *curs, const char *str) { if (curs->gen != gen) curs->node = NULL; - for ( - curs->gen = gen, curs->node = (curs->node ? curs->node->next : head); - curs->node; - curs->node = curs->node->next - ) { - if (!curs->node->id) continue; - if (!strcmp(curs->node->str, str)) return curs->node->id; + curs->gen = gen; + if (!curs->node) { + curs->n = 1; + struct Entry entry = { .str = (char *)str }; + const treap *temp = root; + curs->node = (treap *)treapget(&temp, entryCmp, &entry); + } + if (curs->node) { + struct Entry *data = (struct Entry *)treapdata(curs->node); + for (; curs->n < IDCap; ++curs->n) + if (entryContains(data, curs->n)) return curs->n++; + curs->node = NULL; } return None; } void completeAccept(struct Cursor *curs) { - if (curs->gen == gen && curs->node) { - prepend(detach(curs->node)); - } + if (curs->gen == gen && curs->node) + treaprio(&root, curs->node, entryCmp, rand()); curs->node = NULL; } diff --git a/treap.c b/treap.c new file mode 100644 index 0000000..b852413 --- /dev/null +++ b/treap.c @@ -0,0 +1,401 @@ +/* Copyright (C) 2026 Alcor + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Additional permission under GNU GPL version 3 section 7: + * + * If you modify this Program, or any covered work, by linking or + * combining it with OpenSSL (or a modified version of that library), + * containing parts covered by the terms of the OpenSSL License and the + * original SSLeay license, the licensors of this Program grant you + * additional permission to convey the resulting work. Corresponding + * Source for a non-source form of such a combination shall include the + * source code for the parts of OpenSSL used as well as that of the + * covered work. + */ + +#include "treap.h" +#include +#include +#include + +struct treap { + int prio; + struct treap *parent, *left, *right; + char data[]; +}; + +static void treaprotate(treap **t, treap *node) +{ + treap *p = node->parent; + treap *pp = p->parent; + treap **link = pp == NULL ? t : (pp->left == p ? &pp->left : &pp->right); + + if (p->left == node) { + p->left = node->right; + if (node->right != NULL) node->right->parent = p; + node->right = p; + } else { + p->right = node->left; + if (node->left != NULL) node->left->parent = p; + node->left = p; + } + + node->parent = pp; + p->parent = node; + *link = node; +} + +static void siftup(treap **t, treap *node) +{ + while (node->parent != NULL && node->parent->prio < node->prio) + treaprotate(t, node); +} + +static void siftdown(treap **t, treap *node) +{ + for (;;) { + treap *child = NULL; + + if (node->left != NULL && node->right != NULL) + child = node->left->prio > node->right->prio ? node->left : node->right; + else + child = node->left != NULL ? node->left : node->right; + + if (child == NULL || child->prio <= node->prio) + break; + + treaprotate(t, child); + } +} + +void treaprio(treap **t, treap *node, treapcmpfn cmp, int prio) +{ + if (prio > node->prio) { + if (node->parent != NULL + && node->parent->right == node + && cmp(node->data, node->parent->data) == 0 + && prio > node->parent->prio) prio = node->parent->prio; + node->prio = prio; + siftup(t, node); + } else { + node->prio = prio; + siftdown(t, node); + } +} + +treap *treapushat(treap **t, treap *parent, treapcmpfn cmp, int prio, size_t count, const void *data) +{ + treap *node = parent == NULL ? *t : parent; + int c = 0; + + while (node != NULL) { + parent = node; + c = cmp(data, node->data); + if (c == 0 && prio > node->prio) + prio = node->prio; + node = c < 0 ? node->left : node->right; + } + + node = calloc(1, sizeof(treap) + count); + if (node == NULL) + return NULL; + node->prio = prio; + node->parent = parent; + memcpy(node->data, data, count); + + if (parent == NULL) { + *t = node; + return node; + } + + if (c < 0) + parent->left = node; + else + parent->right = node; + + siftup(t, node); + + return node; +} + +treap *treapush(treap **t, treapcmpfn cmp, int prio, size_t count, const void *data) +{ + return treapushat(t, NULL, cmp, prio, count, data); +} + +void treaprem(treap **t, treap *node) +{ + node->prio = INT_MIN; + siftdown(t, node); + + treap *child = node->left != NULL ? node->left : node->right; + treap *p = node->parent; + treap **link = p == NULL ? t : (p->left == node ? &p->left : &p->right); + + if (child != NULL) child->parent = p; + *link = child; + + free(node); +} + +const treap *treapget(const treap **t, treapcmpfn cmp, const void *data) +{ + const treap *node = *t; + + while (node != NULL) { + *t = node; + int c = cmp(data, node->data); + if (c < 0) + node = node->left; + else if (c > 0) + node = node->right; + else + break; + } + + return node; +} + +const treap *treapiter(const treap *node) +{ + if (node == NULL) + return NULL; + + while (node->left != NULL) + node = node->left; + + return node; +} + +const treap *treapnext(const treap *node) +{ + if (node == NULL) + return NULL; + + if (node->right != NULL) { + node = node->right; + while (node->left != NULL) + node = node->left; + } else { + while (node->parent != NULL && node == node->parent->right) + node = node->parent; + node = node->parent; + } + + return node; +} + +const treap *treapcontainer(const void *data) +{ + return (const treap *)(((char *)data) - offsetof(treap, data)); +} + +const void *treapdata(const treap *t) +{ + return t->data; +} + +int treapriov(const treap *t) +{ + return t->prio; +} + +treap *treaplt(const treap *t) +{ + return t->left; +} + +treap *treapgteq(const treap *t) +{ + return t->right; +} + +const treap *treapfirst(const treap *t, treapcmpfn cmp, const void *data) { + const treap *first = NULL; + while (t != NULL) { + if (cmp(data, t->data) <= 0) { + first = t; + t = t->left; + } else { + t = t->right; + } + } + return first; +} + +#ifdef TEST +#undef NDEBUG +#include +#include +#include +static const char *strs[] = { + "a", + "b", + "b", + "b", + "b", + "b", + "b", + "b", + "b", + "bat", + "bb", + "c", + "c", + "c", + "c", + "c", + "cat", + "clam", + "fire", + "fun", + "marisa", + "punycode", + "purr", + "puzzle", + "scroll", + "slime", + "spider", + NULL +}; + +static int treapstrcasecmp(const void *a, const void *b) +{ + const char *const *s1 = a; + const char *const *s2 = b; + return strcasecmp(*s1, *s2); +} + +static bool isbintree(treap *t, treapcmpfn cmp) +{ + return t == NULL + || ((treaplt(t) == NULL + || (cmp(treapdata(treaplt(t)), treapdata(t)) < 0 + && isbintree(treaplt(t), cmp))) + && (treapgteq(t) == NULL + || (cmp(treapdata(t), treapdata(treapgteq(t))) <= 0 + && isbintree(treapgteq(t), cmp)))); +} + +static bool isheap(treap *t) +{ + return t == NULL + || ((treaplt(t) == NULL + || treapriov(t) >= treapriov(treaplt(t))) + && (treapgteq(t) == NULL + || treapriov(t) >= treapriov(treapgteq(t)))); +} + +static void printtreap(const treap *t) +{ + int cnt = 0; + for (const treap *it = treapiter(t); it != NULL; it = treapnext(it)) { + printf("\"%p\" [label=\"%s\\n<%d>\"];\n", it, *(const char **)treapdata(it), treapriov(it)); + if (treaplt(it) == NULL) { + printf("\"nil%d\" [label=\"∅\" shape=plaintext];\n", cnt); + printf("\"%p\" -> \"nil%d\" [label=L];\n", it, cnt++); + } else + printf("\"%p\" -> \"%p\" [label=L];\n", it, treaplt(it)); + if (treapgteq(it) == NULL) { + printf("\"nil%d\" [label=\"∅\" shape=plaintext];\n", cnt); + printf("\"%p\" -> \"nil%d\" [label=R];\n", it, cnt++); + } else + printf("\"%p\" -> \"%p\" [label=R];\n", it, treapgteq(it)); + } +} + +int main() +{ + srand(12); + + treap *t = NULL; + + for (int i = 0; strs[i] != NULL; i++) + treapush(&t, treapstrcasecmp, -rand(), sizeof(const char *), &strs[i]); + + printtreap(t); + + assert((isbintree(t, treapstrcasecmp))); + assert(isheap(t)); + + int i = 0; + for (const treap *it = treapiter(t); it != NULL; it = treapnext(it)) { + assert(*(const char **)treapdata(it) == strs[i++]); + } + + printf("\n"); + + const treap *parent = t; + const treap *child = treapget(&parent, treapstrcasecmp, &strs[0]); + assert(child != NULL); + assert(parent != NULL); + assert(child == parent); + + treaprem(&t, (treap *)child); + printtreap(t); + assert((isbintree(t, treapstrcasecmp))); + assert(isheap(t)); + + parent = t; + child = treapget(&parent, treapstrcasecmp, &strs[0]); + assert(child == NULL); + assert(parent != NULL); + + parent = t; + child = treapget(&parent, treapstrcasecmp, &strs[4]); + assert(child != NULL); + assert(parent != NULL); + + treaprio(&t, (treap *)child, treapstrcasecmp, 1); + assert(*(const char **)treapdata(t) == strs[4]); + assert(treapriov(t) == 1); + + const char *s0 = "bbb"; + const treap *tt = t; + const treap *cc = treapget(&tt, treapstrcasecmp, &s0); + assert(cc == NULL); + treapushat(&t, (treap *)tt, treapstrcasecmp, -rand(), sizeof(const char *), &s0); + treapushat(&t, (treap *)tt, treapstrcasecmp, -rand(), sizeof(const char *), &s0); + assert((isbintree(t, treapstrcasecmp))); + assert(isheap(t)); + + const treap *first = treapfirst(t, treapstrcasecmp, &s0); + assert(first != NULL); + assert(*(const char **)treapdata(first) == s0); + first = treapnext(first); + assert(first != NULL); + assert(*(const char **)treapdata(first) == s0); + first = treapnext(first); + assert(first != NULL); + assert(*(const char **)treapdata(first) != s0); + + const char *k = "pu"; + first = treapfirst(t, treapstrcasecmp, &k); + assert(*(const char **)treapdata(first) == strs[21]); + first = treapnext(first); + assert(*(const char **)treapdata(first) == strs[22]); + first = treapnext(first); + assert(*(const char **)treapdata(first) == strs[23]); + + + printf("\n"); + printtreap(t); + + while (t != NULL) treaprem(&t, t); + + return 0; +} + +#endif /* #ifdef TEST */ diff --git a/treap.h b/treap.h new file mode 100644 index 0000000..232561b --- /dev/null +++ b/treap.h @@ -0,0 +1,65 @@ +/* Copyright (C) 2026 Alcor + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Additional permission under GNU GPL version 3 section 7: + * + * If you modify this Program, or any covered work, by linking or + * combining it with OpenSSL (or a modified version of that library), + * containing parts covered by the terms of the OpenSSL License and the + * original SSLeay license, the licensors of this Program grant you + * additional permission to convey the resulting work. Corresponding + * Source for a non-source form of such a combination shall include the + * source code for the parts of OpenSSL used as well as that of the + * covered work. + */ + +#ifndef TREAP_H +#define TREAP_H +#include + +/** + * Opaque type + */ +typedef struct treap treap; + +/** + * Comparator function for treap data + */ +typedef int (*treapcmpfn)(const void *, const void *); + +/* Mutators */ + +treap *treapushat(treap **t, treap *parent, treapcmpfn cmp, int prio, size_t count, const void *data); +treap *treapush (treap **t, treapcmpfn cmp, int prio, size_t count, const void *data); +void treaprem (treap **t, treap *node); +void treaprio (treap **t, treap *node, treapcmpfn cmp, int prio); + +/* Observers */ + +const treap *treapget (const treap **t, treapcmpfn cmp, const void *data); +int treapriov (const treap *t); +treap *treaplt (const treap *t); +treap *treapgteq (const treap *t); +const treap *treapiter (const treap *t); +const treap *treapnext (const treap *t); +const void *treapdata (const treap *t); +const treap *treapfirst(const treap *t, treapcmpfn cmp, const void *data); + +/** + * treapcontainer returns the treap node pointer from a data pointer + */ +const treap *treapcontainer(const void *data); + +#endif /* #ifndef TREAP_H */ -- 2.47.3