OK, turing.

<- leave blank

Sun May 26 17:00:32 EDT 2024

From: Ori Bernstein <ori@eigenstate.org>
Date: Mon, 27 May 2024 00:17:35 +0000
Subject: [PATCH] gefs: improve heuristic for arena selection

---
diff 80ed3922c3c6b9e2b24a07a96bf7d9267fce7d2e
90d8e4d9442cd3219211f12a2cc66ee5418c6b57
--- a/sys/src/cmd/gefs/blk.c
+++ b/sys/src/cmd/gefs/blk.c
@@ -134,14 +134,14 @@
 static Arena*
 pickarena(uint ty, uint hint, int tries)
 {
- uint n;
+ uint n, r;

- n = hint + tries + ainc(&fs->roundrobin)/1024;
+ r = ainc(&fs->roundrobin)/2048;
	if(ty == Tdat)
- n++;
- if(hint % fs->narena == 0)
- n++;
- return &fs->arenas[n%fs->narena];
+ n = hint % (fs->narena - 1) + r + 1;
+ else
+ n = r;
+ return &fs->arenas[(n + tries) % fs->narena];
 }

 Arena*


Sun May 26 17:00:07 EDT 2024
From: Ori Bernstein <ori@eigenstate.org>
Date: Mon, 27 May 2024 00:17:35 +0000
Subject: [PATCH] gefs: improve heuristic for arena selection

---
diff 80ed3922c3c6b9e2b24a07a96bf7d9267fce7d2e
90d8e4d9442cd3219211f12a2cc66ee5418c6b57
--- a/sys/src/cmd/gefs/blk.c
+++ b/sys/src/cmd/gefs/blk.c
@@ -134,14 +134,14 @@
 static Arena*
 pickarena(uint ty, uint hint, int tries)
 {
- uint n;
+ uint n, r;

- n = hint + tries + ainc(&fs->roundrobin)/1024;
+ r = ainc(&fs->roundrobin)/2048;
	if(ty == Tdat)
- n++;
- if(hint % fs->narena == 0)
- n++;
- return &fs->arenas[n%fs->narena];
+ n = hint % (fs->narena - 1) + r + 1;
+ else
+ n = r;
+ return &fs->arenas[(n + tries) % fs->narena];
 }

 Arena*


Sun May 26 16:45:47 EDT 2024
From: Ori Bernstein <ori@eigenstate.org>
Date: Mon, 27 May 2024 00:42:36 +0000
Subject: [PATCH] gefs: tweak block address heuristic to sequentialize allocation


When churning through data blocks and writing out of order, we would
copy blocks and release them often, leading to gaps at the back of
the file.  Retrying an allocation could also lead to allocating a block
in the same arena as the metadata.

This would leave gaps in the allocated files, which would cause seeks
when accessing the data later.

This set of changes allocates blocks that are expected to be accessed
sequentially from the start of the arena, and blocks that are expected
to be accessed out of order at the end of the arena.

For full data blocks (ie, where the write is at the end of the block),
we assume that the accesses will be sequential.  For partial data blocks,
when the write offset is not at the end of the data block, we assume
that future appends are inbound, and we allocate the block in the non
sequential part of the arena.  As a result, when we eventuall fill the
block, we will allocate the sequential block.

This doesn't help us too much if we have writes to files interleaved,
or we overwrite the same section of a file over and over, but it's
better than nothing.
---
diff 90d8e4d9442cd3219211f12a2cc66ee5418c6b57
843b2168c6a41b5fd91a7e9993d14dc0c186d4ca
--- a/sys/src/cmd/gefs/blk.c
+++ b/sys/src/cmd/gefs/blk.c
@@ -7,8 +7,8 @@
 #include "fns.h"
 #include "atomic.h"

-static vlong blkalloc_lk(Arena*);
-static vlong blkalloc(int, uint);
+static vlong blkalloc_lk(Arena*, int);
+static vlong blkalloc(int, uint, int);
 static void blkdealloc_lk(Arena*, vlong);
 static Blk* initblk(Blk*, vlong, vlong, int);

@@ -286,7 +286,7 @@
	 * and chaining.
	 */
	if(lb == nil || lb->logsz >= Logspc - Logslop){
- o = blkalloc_lk(a);
+ o = blkalloc_lk(a, 0);
		if(o == -1)
			error(Efull);
		nl = mklogblk(a, o);
@@ -430,7 +430,7 @@
		nexterror();
	}
	for(i = 0; i < nblks; i++){
- blks[i] = blkalloc_lk(a);
+ blks[i] = blkalloc_lk(a, 1);
		if(blks[i] == -1)
			error(Efull);
	}
@@ -495,14 +495,15 @@
  * the alloc log.
  */
 static vlong
-blkalloc_lk(Arena *a)
+blkalloc_lk(Arena *a, int seq)
 {
- Avltree *t;
	Arange *r;
	vlong b;

- t = a->free;
- r = (Arange*)t->root;
+ if(seq)
+ r = (Arange*)avlmin(a->free);
+ else
+ r = (Arange*)avlmax(a->free);
	if(!usereserve && a->size - a->used <= a->reserve)
		return -1;
	if(r == nil)
@@ -515,11 +516,16 @@
	 * the sort order because the tree
	 * covers disjoint ranges
	 */
- b = r->off;
- r->len -= Blksz;
- r->off += Blksz;
+ if(seq){
+ b = r->off;
+ r->len -= Blksz;
+ r->off += Blksz;
+ }else{
+ r->len -= Blksz;
+ b = r->off + r->len;
+ }
	if(r->len == 0){
- avldelete(t, r);
+ avldelete(a->free, r);
		free(r);
	}
	a->used += Blksz;
@@ -548,7 +554,7 @@
 }

 static vlong
-blkalloc(int ty, uint hint)
+blkalloc(int ty, uint hint, int seq)
 {
	Arena *a;
	vlong b;
@@ -575,7 +581,7 @@
		qunlock(a);
		nexterror();
	}
- b = blkalloc_lk(a);
+ b = blkalloc_lk(a, seq);
	if(b == -1){
		qunlock(a);
		poperror();
@@ -633,13 +639,28 @@
 }

 Blk*
-newblk(Tree *t, int ty, vlong hint)
+newdblk(Tree *t, vlong hint, int seq)
 {
	vlong bp;
	Blk *b;

- bp = blkalloc(ty, hint);
+ bp = blkalloc(Tdat, hint, seq);
	b = cachepluck();
+ initblk(b, bp, t->memgen, Tdat);
+ b->alloced = getcallerpc(&t);
+ tracex("newblk" , b->bp, Tdat, -1);
+ return b;
+
+}
+
+Blk*
+newblk(Tree *t, int ty)
+{
+ vlong bp;
+ Blk *b;
+
+ bp = blkalloc(ty, 0, 0);
+ b = cachepluck();
	initblk(b, bp, t->memgen, ty);
	b->alloced = getcallerpc(&t);
	tracex("newblk" , b->bp, ty, -1);
@@ -651,7 +672,7 @@
 {
	Blk *r;

- if((r = newblk(t, b->type, 0)) == nil)
+ if((r = newblk(t, b->type)) == nil)
		return nil;

	tracex("dup" , b->bp, b->type, t->gen);
--- a/sys/src/cmd/gefs/fns.h
+++ b/sys/src/cmd/gefs/fns.h
@@ -35,8 +35,9 @@

 void* emalloc(usize, int);

-Blk* newblk(Tree *, int, vlong);
-Blk* dupblk(Tree *, Blk*);
+Blk* newdblk(Tree*, vlong, int);
+Blk* newblk(Tree*, int);
+Blk* dupblk(Tree*, Blk*);
 Blk* getroot(Tree*, int*);
 Blk* getblk(Bptr, int);
 Blk* holdblk(Blk*);
--- a/sys/src/cmd/gefs/fs.c
+++ b/sys/src/cmd/gefs/fs.c
@@ -471,6 +471,7 @@
	char buf[Kvmax];
	vlong fb, fo;
	Blk *b, *t;
+ int seq;
	Tree *r;
	Bptr bp;
	Kvp kv;
@@ -482,7 +483,11 @@
	PACK64(m->k+1, f->qpath);
	PACK64(m->k+9, fb);

- b = newblk(f->mnt->root, Tdat, f->qpath);
+ if(fo+n >= Blksz)
+ seq = 1;
+ else
+ seq = 0;
+ b = newdblk(f->mnt->root, f->qpath, seq);
	t = nil;
	r = f->mnt->root;
	if(btlookup(r, m, &kv, buf, sizeof(buf))){
--- a/sys/src/cmd/gefs/ream.c
+++ b/sys/src/cmd/gefs/ream.c
@@ -286,7 +286,7 @@
		loadlog(a, a->loghd);
	}

- if((mb = newblk(mnt->root, Tleaf, 0)) == nil)
+ if((mb = newblk(mnt->root, Tleaf)) == nil)
		sysfatal("ream: allocate root: %r");
	holdblk(mb);
	initroot(mb);
@@ -296,9 +296,9 @@
	mnt->root->ht = 1;
	mnt->root->bp = mb->bp;

- if((ab = newblk(adm->root, Tleaf, 0)) == nil)
+ if((ab = newblk(adm->root, Tleaf)) == nil)
		sysfatal("ream: allocate root: %r");
- if((ub = newblk(adm->root, Tdat, 0)) == nil)
+ if((ub = newdblk(adm->root, 0, 1)) == nil)
		sysfatal("ream: allocate root: %r");
	holdblk(ab);
	holdblk(ub);
@@ -322,7 +322,7 @@
	 * a single snap block that the tree will insert
	 * into, and take a snapshot as the initial state.
	 */
- if((tb = newblk(mnt->root, Tleaf, 0)) == nil)
+ if((tb = newblk(mnt->root, Tleaf)) == nil)
		sysfatal("ream: allocate snaps: %r");
	holdblk(tb);
	initsnap(tb, mb, ab);
--- a/sys/src/cmd/gefs/snap.c
+++ b/sys/src/cmd/gefs/snap.c
@@ -596,7 +596,7 @@
		nexterror();
	}
	if(dl->ins == nil || Logspc - dl->ins->logsz < Logslop){
- b = newblk(&fs->snap, Tdlist, 0);
+ b = newblk(&fs->snap, Tdlist);
		if(dl->ins != nil){
			enqueue(dl->ins);
			dropblk(dl->ins);
--- a/sys/src/cmd/gefs/tree.c
+++ b/sys/src/cmd/gefs/tree.c
@@ -482,7 +482,7 @@
	 */
	full = 0;
	spc = Leafspc - blkfill(b);
- n = newblk(t, b->type, 0);
+ n = newblk(t, b->type);
	assert(i >= 0 && j >= 0);
	while(i < b->nval || j < up->hi){
		if(i >= b->nval)
@@ -573,7 +573,7 @@
	Msg m, u;

	b = p->b;
- n = newblk(t, b->type, 0);
+ n = newblk(t, b->type);
	for(i = 0; i < b->nval; i++){
		if(pp != nil && i == p->midx){
			copyup(n, pp, nil);
@@ -657,8 +657,8 @@
		efreeblk(t, r);
		nexterror();
	}
- l = newblk(t, b->type, 0);
- r = newblk(t, b->type, 0);
+ l = newblk(t, b->type);
+ r = newblk(t, b->type);

	d = l;
	i = 0;
@@ -770,8 +770,8 @@
		efreeblk(t, r);
		nexterror();
	}
- l = newblk(t, b->type, 0);
- r = newblk(t, b->type, 0);
+ l = newblk(t, b->type);
+ r = newblk(t, b->type);
	d = l;
	copied = 0;
	halfsz = (2*b->nval + b->valsz)/2;
@@ -820,7 +820,7 @@
	Msg m;
	int i;

- d = newblk(t, a->type, 0);
+ d = newblk(t, a->type);
	for(i = 0; i < a->nval; i++){
		getval(a, i, &m);
		setval(d, &m);
@@ -904,8 +904,8 @@
		efreeblk(t, r);
		nexterror();
	}
- l = newblk(t, a->type, 0);
- r = newblk(t, a->type, 0);
+ l = newblk(t, a->type);
+ r = newblk(t, a->type);
	d = l;
	cp = 0;
	sp = -1;
@@ -1088,7 +1088,7 @@
	}
	if(pp->nl != nil && pp->nr != nil){
		rp = &path[0];
- rp->nl = newblk(t, Tpivot, 0);
+ rp->nl = newblk(t, Tpivot);
		rp->npull = pp->npull;
		rp->pullsz = pp->pullsz;
		copyup(rp->nl, pp, nil);


Sun May 26 15:15:31 EDT 2024
Hey, care to join me for a candlelit bubble bath?  - https://u.to/NsOtIA?El

Sun May 26 15:12:30 EDT 2024
In Etsy, Amazon, eBay, Shopify https://pint77.com Pinterest+SEO +II = high sales
results

Sun May 26 14:53:00 EDT 2024
diff 8a2efea90ce8ced888e0138b9f33e7f6179ae949 uncommitted
--- a/sys/src/cmd/gefs/fs.c
+++ b/sys/src/cmd/gefs/fs.c
@@ -471,6 +471,7 @@
	char buf[Kvmax];
	vlong fb, fo;
	Blk *b, *t;
+ int seq;
	Tree *r;
	Bptr bp;
	Kvp kv;
@@ -482,7 +483,11 @@
	PACK64(m->k+1, f->qpath);
	PACK64(m->k+9, fb);

- b = newblk(f->mnt->root, Tdat, f->qpath);
+ if(o+n % Blksz == 0)
+ seq = 1;
+ else
+ seq = 0;
+ b = newblk(f->mnt->root, Tdat, f->qpath, seq);
	t = nil;
	r = f->mnt->root;
	if(btlookup(r, m, &kv, buf, sizeof(buf))){


Sun May 26 14:52:27 EDT 2024
 static vlong
-blkalloc_lk(Arena *a)
+blkalloc_lk(Arena *a, int seq)
 {
	Avltree *t;
	Arange *r;
@@ -502,7 +504,10 @@
	vlong b;

	t = a->free;
- r = (Arange*)t->root;
+ if(seq)
+ r = (Arange*)avlmin(t);
+ else
+ r = (Arange*)avlmax(t);
	if(!usereserve && a->size - a->used <= a->reserve)
		return -1;
	if(r == nil)
@@ -515,9 +520,14 @@
	 * the sort order because the tree
	 * covers disjoint ranges
	 */
- b = r->off;
- r->len -= Blksz;
- r->off += Blksz;
+ if(seq){
+ b = r->off;
+ r->len -= Blksz;
+ r->off += Blksz;
+ }else{
+ r->len -= Blksz;
+ b = r->off+r->len;
+ }
	if(r->len == 0){
		avldelete(t, r);
		free(r);
@@ -548,7 +558,7 @@
 }


Sun May 26 14:52:16 EDT 2024


Sun May 26 14:24:38 EDT 2024
diff 8a2efea90ce8ced888e0138b9f33e7f6179ae949 uncommitted
--- a/sys/src/cmd/tar.c
+++ b/sys/src/cmd/tar.c
@@ -56,8 +56,7 @@
	Binsize = 0x80, /* flag in size[0], from gnu: positive binary size */
	Binnegsz = 0xff, /* flag in size[0]: negative binary size */

- Nblock = 40, /* maximum blocksize */
- Dblock = 20, /* default blocksize */
+ Dblock = IOUNIT/Tblock, /* blocksize */
	Debug = 0,
 };



Sun May 26 14:18:34 EDT 2024
diff 8a2efea90ce8ced888e0138b9f33e7f6179ae949 uncommitted
--- a/sys/src/cmd/tar.c
+++ b/sys/src/cmd/tar.c
@@ -56,8 +56,7 @@
	Binsize = 0x80, /* flag in size[0], from gnu: positive binary size */
	Binnegsz = 0xff, /* flag in size[0]: negative binary size */

- Nblock = 40, /* maximum blocksize */
- Dblock = 20, /* default blocksize */
+ Dblock = IOUNIT/Tblock, /* blocksize */
	Debug = 0,
 };



Sun May 26 14:14:39 EDT 2024
diff ded308e5762d26ed79d26d540dfba8d336f20390 uncommitted
--- a/sys/src/cmd/gefs/blk.c
+++ b/sys/src/cmd/gefs/blk.c
@@ -50,11 +50,16 @@
 void
 syncblk(Blk *b)
 {
+ vlong t;
+
	assert(checkflag(b, Bfinal));
	assert(b->bp.addr >= 0);
	clrflag(b, Bdirty);
+
+ t = nsec();
	if(pwrite(fs->fd, b->buf, Blksz, b->bp.addr) == -1)
		broke("%B %s: %r", b->bp, Eio);
+ fprint(2, "%d: syncblk %.16llux %x %lld\n", getpid(), b->bp.addr, b->type,
nsec() - t);
 }

 static Blk*
@@ -134,14 +139,14 @@
 static Arena*
 pickarena(uint ty, uint hint, int tries)
 {
- uint n;
+ uint n, r;

- n = hint + tries + ainc(&fs->roundrobin)/1024;
+if(tries) fprint(2, "pickarena %ux %ux %d\n", ty, hint, tries);
+ r = ainc(&fs->roundrobin)/16384;
+ n = (hint+tries) % (fs->narena-1);
	if(ty == Tdat)
		n++;
- if(hint % fs->narena == 0)
- n++;
- return &fs->arenas[n%fs->narena];
+ return &fs->arenas[(n+r)%fs->narena];
 }

 Arena*
@@ -502,7 +507,8 @@
	vlong b;

	t = a->free;
- r = (Arange*)t->root;
+ //r = (Arange*)t->root;
+ r = (Arange*)avlmin(t);
	if(!usereserve && a->size - a->used <= a->reserve)
		return -1;
	if(r == nil)
@@ -1074,8 +1080,12 @@
			break;
		case Qwrite:
			tracex("qsyncb", qe.bp, qe.qgen, -1);
- if(checkflag(qe.b, Bfreed) == 0)
+ if(checkflag(qe.b, Bfreed) == 0){
				syncblk(qe.b);
+ } else {
+ fprint(2, "%d: skipblk %.16llux %x\n",
+ getpid(), qe.b->bp.addr, qe.b->type);
+ }
			dropblk(qe.b);
			break;
		default:
--- a/sys/src/cmd/gefs/cons.c
+++ b/sys/src/cmd/gefs/cons.c
@@ -338,6 +338,24 @@
 }

 static void
+showfree(int fd, char **, int)
+{
+ Arange *r;
+ Arena *a;
+ int i;
+
+ for(i = 0; i < fs->narena; i++){
+ a = &fs->arenas[i];
+ qlock(a);
+ fprint(fd, "arena %d %llx+%llx{\n", i, a->h0->bp.addr, a->size);
+ for(r = (Arange*)avlmin(a->free); r != nil; r = (Arange*)avlnext(r))
+ fprint(fd, "\t%llx..%llx (%llx)\n", r->off, r->off+r->len, r->len);
+ fprint(fd, "}\n");
+ qunlock(a);
+ }
+}
+
+static void
 unreserve(int fd, char **ap, int)
 {
	if(strcmp(ap[0], "on") == 0)
@@ -387,6 +405,7 @@
	{.name="show", .sub="tree", .minarg=0, .maxarg=1, .fn=showtree},
	{.name="show", .sub="users", .minarg=0, .maxarg=0, .fn=showusers},
	{.name="show", .sub="bstate", .minarg=0, .maxarg=0, .fn=showbstate},
+ {.name="show", .sub="free", .minarg=0, .maxarg=0, .fn=showfree},
	{.name="debug", .sub=nil, .minarg=0, .maxarg=1, .fn=setdbg},
	{.name="save", .sub="trace", .minarg=0, .maxarg=1, .fn=savetrace},
	{.name=nil, .sub=nil},
--- a/sys/src/cmd/gefs/fs.c
+++ b/sys/src/cmd/gefs/fs.c
@@ -168,8 +168,8 @@
	 * Pass 4: clean up the old snap tree's deadlist
	 */
	tracem("snapdl");
- wrbarrier();
	qunlock(&fs->mutlk);
+ wrbarrier();
	freedl(&dl, 1);
	qunlock(&fs->synclk);
	tracem("synced");
--- a/sys/src/cmd/gefs/main.c
+++ b/sys/src/cmd/gefs/main.c
@@ -397,7 +397,7 @@
	loadfs(dev);
	fs->wrchan = mkchan(32);
	fs->admchan = mkchan(32);
- fs->nsyncers = nproc/2;
+ fs->nsyncers = 1;
	fs->nreaders = nproc/2;
	if(fs->nsyncers > fs->narena)
		fs->nsyncers = fs->narena;


prev | next