Summary
Ruby extension for GNU dbm (gdbm) – a simple database engine for storing key-value pairs on disk.
Description
GNU dbm is a library for simple databases. A database is a file that stores key-value pairs. Gdbm allows the user to store, retrieve, and delete data by key. It furthermore allows a non-sorted traversal of all key-value pairs. A gdbm database thus provides the same functionality as a hash. As with objects of the Hash
class, elements can be accessed with []
. Furthermore, GDBM
mixes in the Enumerable
module, thus providing convenient methods such as find
, collect
, map
, etc.
A process is allowed to open several different databases at the same time. A process can open a database as a “reader” or a “writer”. Whereas a reader has only read-access to the database, a writer has read- and write-access. A database can be accessed either by any number of readers or by exactly one writer at the same time.
Examples
-
Opening/creating a database, and filling it with some entries:
require 'gdbm' gdbm = GDBM.new("fruitstore.db") gdbm["ananas"] = "3" gdbm["banana"] = "8" gdbm["cranberry"] = "4909" gdbm.close
-
Reading out a database:
require 'gdbm' gdbm = GDBM.new("fruitstore.db") gdbm.each_pair do |key, value| print "#{key}: #{value}\n" end gdbm.close
produces
banana: 8 ananas: 3 cranberry: 4909
Links
open database as a reader
open database as a writer
open database as a writer; if the database does not exist, create a new one
open database as a writer; overwrite any existing databases
version of the gdbm library
static VALUE
fgdbm_initialize(int argc, VALUE *argv, VALUE obj)
{
VALUE file, vmode, vflags;
GDBM_FILE dbm;
struct dbmdata *dbmp;
int mode, flags = 0;
if (rb_scan_args(argc, argv, "12", &file, &vmode, &vflags) == 1) {
mode = 0666; /* default value */
}
else if (NIL_P(vmode)) {
mode = -1; /* return nil if DB does not exist */
}
else {
mode = NUM2INT(vmode);
}
if (!NIL_P(vflags))
flags = NUM2INT(vflags);
FilePathValue(file);
#ifdef GDBM_CLOEXEC
/* GDBM_CLOEXEC is available since gdbm 1.10. */
flags |= GDBM_CLOEXEC;
#endif
if (flags & RUBY_GDBM_RW_BIT) {
flags &= ~RUBY_GDBM_RW_BIT;
dbm = gdbm_open(RSTRING_PTR(file), MY_BLOCK_SIZE,
flags, mode, MY_FATAL_FUNC);
}
else {
dbm = 0;
if (mode >= 0)
dbm = gdbm_open(RSTRING_PTR(file), MY_BLOCK_SIZE,
GDBM_WRCREAT|flags, mode, MY_FATAL_FUNC);
if (!dbm)
dbm = gdbm_open(RSTRING_PTR(file), MY_BLOCK_SIZE,
GDBM_WRITER|flags, 0, MY_FATAL_FUNC);
if (!dbm)
dbm = gdbm_open(RSTRING_PTR(file), MY_BLOCK_SIZE,
GDBM_READER|flags, 0, MY_FATAL_FUNC);
}
if (dbm) {
rb_fd_fix_cloexec(gdbm_fdesc(dbm));
}
if (!dbm) {
if (mode == -1) return Qnil;
if (gdbm_errno == GDBM_FILE_OPEN_ERROR ||
gdbm_errno == GDBM_CANT_BE_READER ||
gdbm_errno == GDBM_CANT_BE_WRITER)
rb_sys_fail_str(file);
else
rb_raise(rb_eGDBMError, "%s", gdbm_strerror(gdbm_errno));
}
dbmp = ALLOC(struct dbmdata);
free_dbm(DATA_PTR(obj));
DATA_PTR(obj) = dbmp;
dbmp->di_dbm = dbm;
dbmp->di_size = -1;
return obj;
}
Creates a new GDBM
instance by opening a gdbm file named filename. If the file does not exist, a new file with file mode mode will be created. flags may be one of the following:
-
READER - open as a reader
-
WRITER - open as a writer
-
WRCREAT - open as a writer; if the database does not exist, create a new one
-
NEWDB - open as a writer; overwrite any existing databases
The values WRITER, WRCREAT and NEWDB may be combined with the following values by bitwise or:
-
SYNC - cause all database operations to be synchronized to the disk
-
NOLOCK - do not lock the database file
If no flags are specified, the GDBM
object will try to open the database file as a writer and will create it if it does not already exist (cf. flag WRCREAT
). If this fails (for instance, if another process has already opened the database as a reader), it will try to open the database file as a reader (cf. flag READER
).
static VALUE
fgdbm_s_open(int argc, VALUE *argv, VALUE klass)
{
VALUE obj = fgdbm_s_alloc(klass);
if (NIL_P(fgdbm_initialize(argc, argv, obj))) {
return Qnil;
}
if (rb_block_given_p()) {
return rb_ensure(rb_yield, obj, fgdbm_close, obj);
}
return obj;
}
If called without a block, this is synonymous to GDBM::new
. If a block is given, the new GDBM
instance will be passed to the block as a parameter, and the corresponding database file will be closed after the execution of the block code has been finished.
Example for an open call with a block:
require 'gdbm' GDBM.open("fruitstore.db") do |gdbm| gdbm.each_pair do |key, value| print "#{key}: #{value}\n" end end
static VALUE
fgdbm_aref(VALUE obj, VALUE keystr)
{
return rb_gdbm_fetch3(obj, keystr);
}
Retrieves the value corresponding to key.
static VALUE
fgdbm_store(VALUE obj, VALUE keystr, VALUE valstr)
{
datum key, val;
struct dbmdata *dbmp;
GDBM_FILE dbm;
rb_gdbm_modify(obj);
StringValue(keystr);
StringValue(valstr);
key.dptr = RSTRING_PTR(keystr);
key.dsize = RSTRING_LENINT(keystr);
val.dptr = RSTRING_PTR(valstr);
val.dsize = RSTRING_LENINT(valstr);
GetDBM2(obj, dbmp, dbm);
dbmp->di_size = -1;
if (gdbm_store(dbm, key, val, GDBM_REPLACE)) {
if (errno == EPERM) rb_sys_fail(0);
rb_raise(rb_eGDBMError, "%s", gdbm_strerror(gdbm_errno));
}
return valstr;
}
Associates the value value with the specified key.
static VALUE
fgdbm_set_cachesize(VALUE obj, VALUE val)
{
struct dbmdata *dbmp;
GDBM_FILE dbm;
int optval;
GetDBM2(obj, dbmp, dbm);
optval = FIX2INT(val);
if (gdbm_setopt(dbm, GDBM_CACHESIZE, &optval, sizeof(optval)) == -1) {
rb_raise(rb_eGDBMError, "%s", gdbm_strerror(gdbm_errno));
}
return val;
}
Sets the size of the internal bucket cache to size.
static VALUE
fgdbm_clear(VALUE obj)
{
datum key, nextkey;
struct dbmdata *dbmp;
GDBM_FILE dbm;
rb_gdbm_modify(obj);
GetDBM2(obj, dbmp, dbm);
dbmp->di_size = -1;
#if 0
while (key = gdbm_firstkey(dbm), key.dptr) {
if (gdbm_delete(dbm, key)) {
free(key.dptr);
rb_raise(rb_eGDBMError, "%s", gdbm_strerror(gdbm_errno));
}
free(key.dptr);
}
#else
while (key = gdbm_firstkey(dbm), key.dptr) {
for (; key.dptr; key = nextkey) {
nextkey = gdbm_nextkey(dbm, key);
if (gdbm_delete(dbm, key)) {
free(key.dptr);
if (nextkey.dptr) free(nextkey.dptr);
rb_raise(rb_eGDBMError, "%s", gdbm_strerror(gdbm_errno));
}
free(key.dptr);
}
}
#endif
dbmp->di_size = 0;
return obj;
}
Removes all the key-value pairs within gdbm.
static VALUE
fgdbm_close(VALUE obj)
{
struct dbmdata *dbmp;
GetDBM(obj, dbmp);
gdbm_close(dbmp->di_dbm);
dbmp->di_dbm = 0;
return Qnil;
}
Closes the associated database file.
static VALUE
fgdbm_closed(VALUE obj)
{
struct dbmdata *dbmp;
TypedData_Get_Struct(obj, struct dbmdata, &dbm_type, dbmp);
if (dbmp == 0)
return Qtrue;
if (dbmp->di_dbm == 0)
return Qtrue;
return Qfalse;
}
Returns true if the associated database file has been closed.
static VALUE
fgdbm_delete(VALUE obj, VALUE keystr)
{
VALUE valstr;
valstr = fgdbm_fetch(obj, keystr, Qnil);
rb_gdbm_delete(obj, keystr);
return valstr;
}
Removes the key-value-pair with the specified key from this database and returns the corresponding value. Returns nil if the database is empty.
static VALUE
fgdbm_delete_if(VALUE obj)
{
struct dbmdata *dbmp;
GDBM_FILE dbm;
VALUE keystr, valstr;
VALUE ret, ary = rb_ary_tmp_new(0);
long i;
int status = 0, n;
rb_gdbm_modify(obj);
GetDBM2(obj, dbmp, dbm);
n = dbmp->di_size;
dbmp->di_size = -1;
for (keystr = rb_gdbm_firstkey(dbm); RTEST(keystr);
keystr = rb_gdbm_nextkey(dbm, keystr)) {
OBJ_FREEZE(keystr);
valstr = rb_gdbm_fetch2(dbm, keystr);
ret = rb_protect(rb_yield, rb_assoc_new(rb_str_dup(keystr), valstr), &status);
if (status != 0) break;
if (RTEST(ret)) rb_ary_push(ary, keystr);
GetDBM2(obj, dbmp, dbm);
}
for (i = 0; i < RARRAY_LEN(ary); i++)
rb_gdbm_delete(obj, RARRAY_AREF(ary, i));
if (status) rb_jump_tag(status);
if (n > 0) dbmp->di_size = n - (int)RARRAY_LEN(ary);
rb_ary_clear(ary);
return obj;
}
Deletes every key-value pair from gdbm for which block evaluates to true.
static VALUE
fgdbm_each_pair(VALUE obj)
{
GDBM_FILE dbm;
struct dbmdata *dbmp;
VALUE keystr;
RETURN_ENUMERATOR(obj, 0, 0);
GetDBM2(obj, dbmp, dbm);
for (keystr = rb_gdbm_firstkey(dbm); RTEST(keystr);
keystr = rb_gdbm_nextkey(dbm, keystr)) {
rb_yield(rb_assoc_new(keystr, rb_gdbm_fetch2(dbm, keystr)));
GetDBM2(obj, dbmp, dbm);
}
return obj;
}
Executes block for each key in the database, passing the key and the corresponding value as a parameter.
static VALUE
fgdbm_each_key(VALUE obj)
{
struct dbmdata *dbmp;
GDBM_FILE dbm;
VALUE keystr;
RETURN_ENUMERATOR(obj, 0, 0);
GetDBM2(obj, dbmp, dbm);
for (keystr = rb_gdbm_firstkey(dbm); RTEST(keystr);
keystr = rb_gdbm_nextkey(dbm, keystr)) {
rb_yield(keystr);
GetDBM2(obj, dbmp, dbm);
}
return obj;
}
Executes block for each key in the database, passing the key as a parameter.
static VALUE
fgdbm_each_value(VALUE obj)
{
struct dbmdata *dbmp;
GDBM_FILE dbm;
VALUE keystr;
RETURN_ENUMERATOR(obj, 0, 0);
GetDBM2(obj, dbmp, dbm);
for (keystr = rb_gdbm_firstkey(dbm); RTEST(keystr);
keystr = rb_gdbm_nextkey(dbm, keystr)) {
rb_yield(rb_gdbm_fetch2(dbm, keystr));
GetDBM2(obj, dbmp, dbm);
}
return obj;
}
Executes block for each key in the database, passing the corresponding value as a parameter.
static VALUE
fgdbm_empty_p(VALUE obj)
{
datum key;
struct dbmdata *dbmp;
GDBM_FILE dbm;
GetDBM(obj, dbmp);
if (dbmp->di_size < 0) {
dbm = dbmp->di_dbm;
key = gdbm_firstkey(dbm);
if (key.dptr) {
free(key.dptr);
return Qfalse;
}
return Qtrue;
}
if (dbmp->di_size == 0) return Qtrue;
return Qfalse;
}
Returns true if the database is empty.
static VALUE
fgdbm_set_fastmode(VALUE obj, VALUE val)
{
struct dbmdata *dbmp;
GDBM_FILE dbm;
int optval;
GetDBM2(obj, dbmp, dbm);
optval = 0;
if (RTEST(val))
optval = 1;
if (gdbm_setopt(dbm, GDBM_FASTMODE, &optval, sizeof(optval)) == -1) {
rb_raise(rb_eGDBMError, "%s", gdbm_strerror(gdbm_errno));
}
return val;
}
Turns the database’s fast mode on or off. If fast mode is turned on, gdbm does not wait for writes to be flushed to the disk before continuing.
This option is obsolete for gdbm >= 1.8 since fast mode is turned on by default. See also: syncmode=
static VALUE
fgdbm_fetch_m(int argc, VALUE *argv, VALUE obj)
{
VALUE keystr, valstr, ifnone;
rb_scan_args(argc, argv, "11", &keystr, &ifnone);
valstr = fgdbm_fetch(obj, keystr, ifnone);
if (argc == 1 && !rb_block_given_p() && NIL_P(valstr))
rb_raise(rb_eIndexError, "key not found");
return valstr;
}
Retrieves the value corresponding to key. If there is no value associated with key, default will be returned instead.
static VALUE
fgdbm_has_value(VALUE obj, VALUE valstr)
{
struct dbmdata *dbmp;
GDBM_FILE dbm;
VALUE keystr, valstr2;
StringValue(valstr);
GetDBM2(obj, dbmp, dbm);
for (keystr = rb_gdbm_firstkey(dbm); RTEST(keystr);
keystr = rb_gdbm_nextkey(dbm, keystr)) {
valstr2 = rb_gdbm_fetch2(dbm, keystr);
if (!NIL_P(valstr2) &&
(int)RSTRING_LEN(valstr) == (int)RSTRING_LEN(valstr2) &&
memcmp(RSTRING_PTR(valstr), RSTRING_PTR(valstr2),
(int)RSTRING_LEN(valstr)) == 0) {
return Qtrue;
}
}
return Qfalse;
}
Returns true if the given value v exists within the database. Returns false otherwise.
static VALUE
fgdbm_has_key(VALUE obj, VALUE keystr)
{
datum key;
struct dbmdata *dbmp;
GDBM_FILE dbm;
long len;
StringValue(keystr);
len = RSTRING_LENINT(keystr);
if (TOO_LONG(len)) return Qfalse;
key.dptr = RSTRING_PTR(keystr);
key.dsize = (int)len;
GetDBM2(obj, dbmp, dbm);
if (gdbm_exists(dbm, key))
return Qtrue;
return Qfalse;
}
Returns true if the given key k exists within the database. Returns false otherwise.
static VALUE
fgdbm_invert(VALUE obj)
{
struct dbmdata *dbmp;
GDBM_FILE dbm;
VALUE keystr, valstr;
VALUE hash = rb_hash_new();
GetDBM2(obj, dbmp, dbm);
for (keystr = rb_gdbm_firstkey(dbm); RTEST(keystr);
keystr = rb_gdbm_nextkey(dbm, keystr)) {
valstr = rb_gdbm_fetch2(dbm, keystr);
rb_hash_aset(hash, valstr, keystr);
}
return hash;
}
Returns a hash created by using gdbm’s values as keys, and the keys as values.
static VALUE
fgdbm_key(VALUE obj, VALUE valstr)
{
struct dbmdata *dbmp;
GDBM_FILE dbm;
VALUE keystr, valstr2;
StringValue(valstr);
GetDBM2(obj, dbmp, dbm);
for (keystr = rb_gdbm_firstkey(dbm); RTEST(keystr);
keystr = rb_gdbm_nextkey(dbm, keystr)) {
valstr2 = rb_gdbm_fetch2(dbm, keystr);
if (!NIL_P(valstr2) &&
(int)RSTRING_LEN(valstr) == (int)RSTRING_LEN(valstr2) &&
memcmp(RSTRING_PTR(valstr), RSTRING_PTR(valstr2),
(int)RSTRING_LEN(valstr)) == 0) {
return keystr;
}
}
return Qnil;
}
Returns the key for a given value. If several keys may map to the same value, the key that is found first will be returned.
static VALUE
fgdbm_keys(VALUE obj)
{
struct dbmdata *dbmp;
GDBM_FILE dbm;
VALUE keystr, ary;
GetDBM2(obj, dbmp, dbm);
ary = rb_ary_new();
for (keystr = rb_gdbm_firstkey(dbm); RTEST(keystr);
keystr = rb_gdbm_nextkey(dbm, keystr)) {
rb_ary_push(ary, keystr);
}
return ary;
}
Returns an array of all keys of this database.
static VALUE
fgdbm_length(VALUE obj)
{
datum key, nextkey;
struct dbmdata *dbmp;
GDBM_FILE dbm;
int i = 0;
GetDBM2(obj, dbmp, dbm);
if (dbmp->di_size > 0) return INT2FIX(dbmp->di_size);
for (key = gdbm_firstkey(dbm); key.dptr; key = nextkey) {
nextkey = gdbm_nextkey(dbm, key);
free(key.dptr);
i++;
}
dbmp->di_size = i;
return INT2FIX(i);
}
Returns the number of key-value pairs in this database.
static VALUE
fgdbm_reject(VALUE obj)
{
return rb_hash_delete_if(fgdbm_to_hash(obj));
}
Returns a hash copy of gdbm where all key-value pairs from gdbm for which block evaluates to true are removed. See also: delete_if
static VALUE
fgdbm_reorganize(VALUE obj)
{
struct dbmdata *dbmp;
GDBM_FILE dbm;
rb_gdbm_modify(obj);
GetDBM2(obj, dbmp, dbm);
gdbm_reorganize(dbm);
rb_fd_fix_cloexec(gdbm_fdesc(dbm));
return obj;
}
Reorganizes the database file. This operation removes reserved space of elements that have already been deleted. It is only useful after a lot of deletions in the database.
static VALUE
fgdbm_replace(VALUE obj, VALUE other)
{
fgdbm_clear(obj);
rb_block_call(other, rb_intern("each_pair"), 0, 0, update_i, obj);
return obj;
}
Replaces the content of gdbm with the key-value pairs of other. other must have an each_pair
method.
static VALUE
fgdbm_select(VALUE obj)
{
VALUE new = rb_ary_new();
GDBM_FILE dbm;
struct dbmdata *dbmp;
VALUE keystr;
GetDBM2(obj, dbmp, dbm);
for (keystr = rb_gdbm_firstkey(dbm); RTEST(keystr);
keystr = rb_gdbm_nextkey(dbm, keystr)) {
VALUE assoc = rb_assoc_new(keystr, rb_gdbm_fetch2(dbm, keystr));
VALUE v = rb_yield(assoc);
if (RTEST(v)) {
rb_ary_push(new, assoc);
}
GetDBM2(obj, dbmp, dbm);
}
return new;
}
Returns a new array of all key-value pairs of the database for which block evaluates to true.
static VALUE
fgdbm_shift(VALUE obj)
{
struct dbmdata *dbmp;
GDBM_FILE dbm;
VALUE keystr, valstr;
rb_gdbm_modify(obj);
GetDBM2(obj, dbmp, dbm);
keystr = rb_gdbm_firstkey(dbm);
if (NIL_P(keystr)) return Qnil;
valstr = rb_gdbm_fetch2(dbm, keystr);
rb_gdbm_delete(obj, keystr);
return rb_assoc_new(keystr, valstr);
}
Removes a key-value-pair from this database and returns it as a two-item array [ key, value ]. Returns nil if the database is empty.
static VALUE
fgdbm_sync(VALUE obj)
{
struct dbmdata *dbmp;
GDBM_FILE dbm;
rb_gdbm_modify(obj);
GetDBM2(obj, dbmp, dbm);
gdbm_sync(dbm);
return obj;
}
Unless the gdbm object has been opened with the SYNC flag, it is not guaranteed that database modification operations are immediately applied to the database file. This method ensures that all recent modifications to the database are written to the file. Blocks until all writing operations to the disk have been finished.
static VALUE
fgdbm_set_syncmode(VALUE obj, VALUE val)
{
#if !defined(GDBM_SYNCMODE)
fgdbm_set_fastmode(obj, RTEST(val) ? Qfalse : Qtrue);
return val;
#else
struct dbmdata *dbmp;
GDBM_FILE dbm;
int optval;
GetDBM2(obj, dbmp, dbm);
optval = 0;
if (RTEST(val))
optval = 1;
if (gdbm_setopt(dbm, GDBM_FASTMODE, &optval, sizeof(optval)) == -1) {
rb_raise(rb_eGDBMError, "%s", gdbm_strerror(gdbm_errno));
}
return val;
#endif
}
Turns the database’s synchronization mode on or off. If the synchronization mode is turned on, the database’s in-memory state will be synchronized to disk after every database modification operation. If the synchronization mode is turned off, GDBM
does not wait for writes to be flushed to the disk before continuing.
This option is only available for gdbm >= 1.8 where syncmode is turned off by default. See also: fastmode=
static VALUE
fgdbm_to_a(VALUE obj)
{
struct dbmdata *dbmp;
GDBM_FILE dbm;
VALUE keystr, ary;
GetDBM2(obj, dbmp, dbm);
ary = rb_ary_new();
for (keystr = rb_gdbm_firstkey(dbm); RTEST(keystr);
keystr = rb_gdbm_nextkey(dbm, keystr)) {
rb_ary_push(ary, rb_assoc_new(keystr, rb_gdbm_fetch2(dbm, keystr)));
}
return ary;
}
Returns an array of all key-value pairs contained in the database.
static VALUE
fgdbm_to_hash(VALUE obj)
{
struct dbmdata *dbmp;
GDBM_FILE dbm;
VALUE keystr, hash;
GetDBM2(obj, dbmp, dbm);
hash = rb_hash_new();
for (keystr = rb_gdbm_firstkey(dbm); RTEST(keystr);
keystr = rb_gdbm_nextkey(dbm, keystr)) {
rb_hash_aset(hash, keystr, rb_gdbm_fetch2(dbm, keystr));
}
return hash;
}
Returns a hash of all key-value pairs contained in the database.
static VALUE
fgdbm_update(VALUE obj, VALUE other)
{
rb_block_call(other, rb_intern("each_pair"), 0, 0, update_i, obj);
return obj;
}
Adds the key-value pairs of other to gdbm, overwriting entries with duplicate keys with those from other. other must have an each_pair
method.
static VALUE
fgdbm_values(VALUE obj)
{
datum key, nextkey;
struct dbmdata *dbmp;
GDBM_FILE dbm;
VALUE valstr, ary;
GetDBM2(obj, dbmp, dbm);
ary = rb_ary_new();
for (key = gdbm_firstkey(dbm); key.dptr; key = nextkey) {
nextkey = gdbm_nextkey(dbm, key);
valstr = rb_gdbm_fetch(dbm, key);
free(key.dptr);
rb_ary_push(ary, valstr);
}
return ary;
}
Returns an array of all values of this database.
static VALUE
fgdbm_values_at(int argc, VALUE *argv, VALUE obj)
{
VALUE new = rb_ary_new2(argc);
int i;
for (i=0; i<argc; i++) {
rb_ary_push(new, rb_gdbm_fetch3(obj, argv[i]));
}
return new;
}
Returns an array of the values associated with each specified key.