~ehmry/xapian-nim

ecd5ee5eb09c794cac8400ff976e74173e03f0d8 — Emery Hemingway 2 years ago trunk
Initial commit
2 files changed, 292 insertions(+), 0 deletions(-)

A src/xapian.nim
A xapian.nimble
A  => src/xapian.nim +280 -0
@@ 1,280 @@
# SPDX-FileCopyrightText: 2022 Emery Hemingway
#
# SPDX-License-Identifier: GPL-2.0-or-later

{.passC: staticExec("pkg-config --cflags xapian-core").}
{.passL: staticExec("pkg-config --libs xapian-core").}

{.pragma: xapianHeader, header: "xapian.h".}
{.pragma: importXapian, xapianHeader, importcpp: "Xapian::$1".}
{.pragma: importGetter, importcpp: "#.get_$1()".}

type CppString {.importcpp: "std::string", header: "<string>", byRef.} = object
proc data(s: CppString): pointer {.importcpp.}
proc length(s: CppString): csize_t {.importcpp.}

proc basic_string(str: ptr char; len: int): CppString {.
  constructor, importcpp: "std::basic_string(@)".}

proc toCpp(s: string): CppString = basic_string(unsafeAddr s[0], s.len)

proc `$`(cpp: CppString): string =
  if cpp.length > 0:
    result.setLen(cpp.length)
    copyMem(addr result[0], cpp.data, result.len)

const
  DB_CREATE_OR_OPEN* = 0x00
  DB_CREATE_OR_OVERWRITE* = 0x01
  DB_CREATE* = 0x02
  DB_OPEN* = 0x03
  DB_NO_SYNC* = 0x04
  DB_FULL_SYNC* = 0x08
  DB_DANGEROUS* = 0x10
  DB_NO_TERMLIST* = 0x20
  DB_RETRY_LOCK* = 0x40
  DB_BACKEND_GLASS* = 0x100
  DB_BACKEND_CHERT* = 0x200
  DB_BACKEND_STUB* = 0x300
  DB_BACKEND_INMEMORY* = 0x400
  DB_BACKEND_HONEY* = 0x500

when defined(enable_64bit_docid):
  type
    DocId* = distinct uint64
    DocCount* = uint64
else:
  type
    DocId* = distinct uint32
    DocCount* = uint32

when defined(enable_64bit_termcount):
  type TermCount* = uint64
else:
  type TermCount* = uint32

when defined(enable_64bit_termpos):
  type TermPos* = uint64
else:
  type TermPos* = uint32

proc `$`*(id: DocId): string {.borrow.}
proc `==`*(a, b: DocId): bool {.borrow.}

type
  Rev* = int64
  TotalLength* = int64

  Error* {.importXapian, inheritable.} = object
  LogicError* {.importXapian.} = object of Error
  RuntimeError* {.importXapian.} = object of Error
  AssertionError* {.importXapian.} = object of LogicError
  InvalidArgumentError* {.importXapian.} = object of LogicError
  InvalidOperationError* {.importXapian.} = object of LogicError
  UnimplementedError* {.importXapian.} = object of LogicError
  DatabaseError* {.importXapian.} = object of RuntimeError
  DatabaseCorruptError* {.importXapian.} = object of DatabaseError
  DatabaseCreateError* {.importXapian.} = object of DatabaseError
  DatabaseLockError* {.importXapian.} = object of DatabaseError
  DatabaseModifiedError* {.importXapian.} = object of DatabaseError
  DatabaseOpeningError* {.importXapian.} = object of DatabaseError
  DatabaseVersionError* {.importXapian.} = object of DatabaseOpeningError
  DocNotFoundError* {.importXapian.} = object of RuntimeError
  FeatureUnavailableError* {.importXapian.} = object of RuntimeError
  InternalError* {.importXapian.} = object of RuntimeError
  NetworkError* {.importXapian.} = object of RuntimeError
  NetworkTimeoutError* {.importXapian.} = object of NetworkError
  QueryParserError* {.importXapian.} = object of RuntimeError
  SerialisationError* {.importXapian.} = object of RuntimeError
  RangeError* {.importXapian.} = object of RuntimeError
  WildcardError* {.importXapian.} = object of RuntimeError
  DatabaseNotFoundError* {.importXapian.} = object of DatabaseOpeningError
  DatabaseClosedError* {.importXapian.} = object of DatabaseError

  Database* {.importXapian, byRef.} = object
  WritableDatabase* {.importXapian, byRef.} = object
  Document* {.importXapian, byRef.} = object
  Enquire* {.importXapian, byRef.} = object
  MSet* {.importXapian, byRef.} = object
  MSetIterator {.importXapian, byRef.} = object
  Query* {.importXapian, byRef.} = object
  QueryParser* {.importXapian, byRef.} = object
  Stem* {.importXapian, byRef.} = object
  TermGenerator* {.importXapian, byRef.} = object

proc sortable_serialise(n: cdouble): CppString {.importcpp: "Xapian::$1(@)".}
proc sortable_unserialise(s: CppString): cdouble {.importcpp: "Xapian::$1(@)".}

# Database

proc initDatabase*(path: cstring; flags = cint 0): Database {.
  constructor, importcpp: "Xapian::Database(@)".}

proc reopen*(db: Database): bool {.importcpp.}
proc close*(db: Database) {.importcpp.}
proc has_positions*(db: Database): bool {.importcpp.}
proc get_doccount*(db: Database): DocCount {.importcpp.}
proc get_lastdocid*(db: Database): DocId {.importcpp.}
proc get_average_length*(db: Database): float64 {.importcpp.}
proc get_total_length*(db: Database): TotalLength {.importcpp.}
proc get_document*(db: Database; id: DocId; flags = cint 0): Document {.importcpp.}
proc get_uuid(db: Database): CppString {.importcpp.}
proc uuid*(db: Database): string = $get_uuid(db)
proc locked*(db: Database): bool {.importcpp.}
proc lock*(db: Database; flags = cint 0): WritableDatabase {.importcpp.}
proc unlock*(db: Database|WritableDatabase): Database {.importcpp.}

proc initWritableDatabase*(path: cstring; flags = cint 0; blockSize = cint 0): WritableDatabase {.
  constructor, importcpp: "Xapian::WritableDatabase(@)".}

proc add_document*(db: WritableDatabase; doc: Document): DocId {.importcpp.}


# Document

proc get_data(doc: Document): CppString {.importcpp.}

proc data*(doc: Document): string = $get_data(doc)

proc set_data(doc: Document; data: ptr char; len: int) {.
  importcpp: "#.set_data(std::basic_string(@))".}

proc `data=`*(doc: Document; data: string) =
  doc.set_data(unsafeAddr data[0], len data)

proc get_value(doc: Document; slot: Natural): CppString {.importcpp.}
proc add_value(doc: Document; slot: Natural; s: CppString) {.importcpp.}
proc remove_value(doc: Document; slot: Natural) {.importcpp.}
proc clear_values*(doc: Document) {.importcpp.}
proc values_count*(doc: Document): cuint {.importcpp.}

proc `[]`*(doc: Document; slot: Natural): string =
  $get_value(doc, slot)
proc `[]=`*(doc: Document; slot: Natural; val: string) =
  add_value(doc, slot, val.toCpp)
proc `[]=`*(doc: Document; slot: Natural; val: SomeNumber) =
  add_value(doc, slot, sortable_serialise(cdouble val))

proc value*[T : SomeNumber](doc: Document; slot: Natural; default: T): T =
  var val = doc.get_value(slot)
  if val.length == 0:
    result = default
  else:
    result = T sortable_unserialise(val)

proc `del`*(doc: Document; slot: Natural) =
  remove_value(doc, slot)

# Enquire

proc initEnquire*(db: Database): Enquire {.
  constructor, importcpp: "Xapian::Enquire(@)".}

proc `query`*(e: Enquire): Query {.
  importcpp: "#.get_query(@)".}

proc `query=`*(e: Enquire; q: Query) {.
  importcpp: "#.set_query(@)".}

proc `set_query`*(e: Enquire; q: Query; query_length = TermCount 0) {.
  importcpp.}

proc get_mset*(e: Enquire;
    first, maxItems: DocCount;
    checkAtLeast = DocCount 0): MSet {.
  importcpp.}

# MSet
proc size*(m: MSet): DocCount {.importcpp.}
proc get_matches_estimated*(m: MSet): DocCount {.importcpp.}
proc get_matches_upper_bound*(m: MSet): DocCount {.importcpp.}
proc get_matches_lower_bound*(m: MSet): DocCount {.importcpp.}
proc get_uncollapsed_matches_lower_bound*(m: MSet): DocCount {.importcpp.}
proc get_uncollapsed_matches_estimated*(m: MSet): DocCount {.importcpp.}
proc get_uncollapsed_matches_upper_bound*(m: MSet): DocCount {.importcpp.}
proc get_max_attained*(m: MSet): DocCount {.importcpp.}
proc get_max_possible*(m: MSet): DocCount {.importcpp.}
proc get_firstitem*(m: MSet): DocCount {.importcpp.}

proc begin(m: MSet): MSetIterator {.importcpp.}
proc `end`(m: MSet): MSetIterator {.importcpp.}
proc `[]`*(m: MSet; i: int|DocCount): MSetIterator {.importcpp: "#[#]".}

proc next(iter: MSetIterator) {.importcpp: "++#".}

proc `==`(a, b: MSetIterator): bool {.importcpp: "# == #".}

proc get_docid*(iter: MSetIterator): DocId {.importcpp: "*#".} # C++ dumbfucks
proc get_rank(iter: MSetIterator): DocCount {.importcpp.}
proc get_document*(iter: MSetIterator): Document {.importcpp.}
proc get_weight*(iter: MSetIterator): float {.importcpp.}
proc get_collapse_key*(iter: MSetIterator): CppString {.importcpp.}
proc get_collapse_count*(iter: MSetIterator): DocCount {.importcpp.}
proc get_sort_key*(iter: MSetIterator): CppString {.importcpp.}
proc get_percent*(iter: MSetIterator): int {.importcpp.}

type MSetItem* = object
  mset: MSet
  firstItem: DocCount
  docid*: DocId
  weight*: float64
  rank*: DocCount
  percent*: int
  collapseKey*: string
  collapseCount*: DocCount

proc initMSetItem(iter: MSetIterator; mset: MSet): MSetItem =
  result = MSetItem(
    mset: mset,
    firstitem: mset.get_firstitem(),
    docid: iter.get_docid(),
    weight: iter.get_weight(),
    rank: iter.get_rank(),
    percent: iter.get_percent(),
    collapse_key: $iter.get_collapse_key(),
    collapse_count: iter.get_collapse_count())

proc document*(item: MSetItem): Document =
  item.mset[item.rank].get_document()

iterator items*(mset: MSet): MSetItem =
  var iter =  mset.begin()
  let `end` = mset.`end`()
  while iter != `end`:
    var item = initMSetItem(iter, mset)
    yield item
    next(iter)

# Query

# QueryParser

type StemStrategy* {.importcpp: "Xapian::QueryParser::stem_strategy".} = enum
  STEM_NONE, STEM_SOME, STEM_ALL, STEM_ALL_Z, STEM_SOME_FULL_POS

proc set_stemmer*(qp: QueryParser; s: Stem) {.importcpp.}
proc set_stemming_strategy*(qp: QueryParser; s: StemStrategy) {.importcpp.}
proc set_database*(qp: QueryParser; db: Database) {.importcpp.}

proc parse_query*(qp: QueryParser; query: cstring): Query {.importcpp.}

# Stem

proc initStem*(language: cstring; fallback = false): Stem {.
  constructor, importcpp: "Xapian::Stem(@)".}

proc `document`*(tg: TermGenerator; doc: Document) {.
  importcpp: "#.get_document(@)".}

proc `document=`*(tg: TermGenerator; data: Document) {.
  importcpp: "#.set_document(@)".}

proc index_text*(tg: TermGenerator;
    text: cstring; wdf_inc = TermCount 1; prefix = cstring "") {.
  importcpp.}

proc set_stemmer*(tg: TermGenerator; stemmer: Stem) {.importcpp.}

type Describeable = Enquire | Database | Document | MSet | MSetIterator | Query | QueryParser | Stem
proc get_description(x: Describeable): CppString {.importcpp.}
proc `$`*(x: Describeable): string  = $get_description(x)

A  => xapian.nimble +12 -0
@@ 1,12 @@
# Package

version       = "20220421"
author        = "Emery Hemingway"
description   = "Xapian library wrapper"
license       = "GPL-2.0-or-later"
srcDir        = "src"


# Dependencies

requires "nim >= 1.6.4"