mirror of
https://github.com/opnsense/src.git
synced 2026-05-28 04:12:45 -04:00
sqlite3: Vendor import of sqlite3 3.37.1
Changes at https://www.sqlite.org/releaselog/3_37_1.html.
This commit is contained in:
parent
0511e356f5
commit
9b0b0740be
1940 changed files with 974318 additions and 277136 deletions
1
.fossil-settings/empty-dirs
Normal file
1
.fossil-settings/empty-dirs
Normal file
|
|
@ -0,0 +1 @@
|
|||
compat
|
||||
1
.fossil-settings/ignore-glob
Normal file
1
.fossil-settings/ignore-glob
Normal file
|
|
@ -0,0 +1 @@
|
|||
compat/*
|
||||
6
LICENSE.md
Normal file
6
LICENSE.md
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
The author disclaims copyright to this source code. In place of
|
||||
a legal notice, here is a blessing:
|
||||
|
||||
* May you do good and not evil.
|
||||
* May you find forgiveness for yourself and forgive others.
|
||||
* May you share freely, never taking more than you give.
|
||||
2485
Makefile.in
2485
Makefile.in
File diff suppressed because it is too large
Load diff
115
Makefile.linux-gcc
Normal file
115
Makefile.linux-gcc
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
#!/usr/make
|
||||
#
|
||||
# Makefile for SQLITE
|
||||
#
|
||||
# This is a template makefile for SQLite. Most people prefer to
|
||||
# use the autoconf generated "configure" script to generate the
|
||||
# makefile automatically. But that does not work for everybody
|
||||
# and in every situation. If you are having problems with the
|
||||
# "configure" script, you might want to try this makefile as an
|
||||
# alternative. Create a copy of this file, edit the parameters
|
||||
# below and type "make".
|
||||
#
|
||||
|
||||
#### The toplevel directory of the source tree. This is the directory
|
||||
# that contains this "Makefile.in" and the "configure.in" script.
|
||||
#
|
||||
TOP = ../sqlite
|
||||
|
||||
#### C Compiler and options for use in building executables that
|
||||
# will run on the platform that is doing the build.
|
||||
#
|
||||
BCC = gcc -g -O0
|
||||
#BCC = /opt/ancic/bin/c89 -0
|
||||
|
||||
#### If the target operating system supports the "usleep()" system
|
||||
# call, then define the HAVE_USLEEP macro for all C modules.
|
||||
#
|
||||
#USLEEP =
|
||||
USLEEP = -DHAVE_USLEEP=1
|
||||
|
||||
#### If you want the SQLite library to be safe for use within a
|
||||
# multi-threaded program, then define the following macro
|
||||
# appropriately:
|
||||
#
|
||||
#THREADSAFE = -DTHREADSAFE=1
|
||||
THREADSAFE = -DTHREADSAFE=0
|
||||
|
||||
#### Specify any extra linker options needed to make the library
|
||||
# thread safe
|
||||
#
|
||||
THREADLIB = -lpthread -lm -ldl
|
||||
#THREADLIB =
|
||||
|
||||
#### Specify any extra libraries needed to access required functions.
|
||||
#
|
||||
#TLIBS = -lrt # fdatasync on Solaris 8
|
||||
TLIBS =
|
||||
|
||||
#### Leave SQLITE_DEBUG undefined for maximum speed. Use SQLITE_DEBUG=1
|
||||
# to check for memory leaks. Use SQLITE_DEBUG=2 to print a log of all
|
||||
# malloc()s and free()s in order to track down memory leaks.
|
||||
#
|
||||
# SQLite uses some expensive assert() statements in the inner loop.
|
||||
# You can make the library go almost twice as fast if you compile
|
||||
# with -DNDEBUG=1
|
||||
#
|
||||
OPTS += -DSQLITE_DEBUG=1
|
||||
OPTS += -DSQLITE_ENABLE_WHERETRACE
|
||||
OPTS += -DSQLITE_ENABLE_SELECTTRACE
|
||||
|
||||
#### The suffix to add to executable files. ".exe" for windows.
|
||||
# Nothing for unix.
|
||||
#
|
||||
#EXE = .exe
|
||||
EXE =
|
||||
|
||||
#### C Compile and options for use in building executables that
|
||||
# will run on the target platform. This is usually the same
|
||||
# as BCC, unless you are cross-compiling.
|
||||
#
|
||||
TCC = gcc -O0
|
||||
#TCC = gcc -g -O0 -Wall
|
||||
#TCC = gcc -g -O0 -Wall -fprofile-arcs -ftest-coverage
|
||||
#TCC = /opt/mingw/bin/i386-mingw32-gcc -O6
|
||||
#TCC = /opt/ansic/bin/c89 -O +z -Wl,-a,archive
|
||||
|
||||
#### Tools used to build a static library.
|
||||
#
|
||||
AR = ar cr
|
||||
#AR = /opt/mingw/bin/i386-mingw32-ar cr
|
||||
RANLIB = ranlib
|
||||
#RANLIB = /opt/mingw/bin/i386-mingw32-ranlib
|
||||
|
||||
MKSHLIB = gcc -shared
|
||||
SO = so
|
||||
SHPREFIX = lib
|
||||
# SO = dll
|
||||
# SHPREFIX =
|
||||
|
||||
#### Extra compiler options needed for programs that use the TCL library.
|
||||
#
|
||||
TCL_FLAGS = -I/home/drh/tcl/include/tcl8.6
|
||||
|
||||
#### Linker options needed to link against the TCL library.
|
||||
#
|
||||
#LIBTCL = -ltcl -lm -ldl
|
||||
LIBTCL = /home/drh/tcl/lib/libtcl8.6.a -lm -lpthread -ldl -lz
|
||||
|
||||
#### Additional objects for SQLite library when TCL support is enabled.
|
||||
#TCLOBJ =
|
||||
TCLOBJ = tclsqlite.o
|
||||
|
||||
#### Compiler options needed for programs that use the readline() library.
|
||||
#
|
||||
READLINE_FLAGS =
|
||||
#READLINE_FLAGS = -DHAVE_READLINE=1 -I/usr/include/readline
|
||||
|
||||
#### Linker options needed by programs using readline() must link against.
|
||||
#
|
||||
LIBREADLINE =
|
||||
#LIBREADLINE = -static -lreadline -ltermcap
|
||||
|
||||
# You should not have to change anything below this line
|
||||
###############################################################################
|
||||
include $(TOP)/main.mk
|
||||
1679
Makefile.msc
1679
Makefile.msc
File diff suppressed because it is too large
Load diff
327
README.md
Normal file
327
README.md
Normal file
|
|
@ -0,0 +1,327 @@
|
|||
<h1 align="center">SQLite Source Repository</h1>
|
||||
|
||||
This repository contains the complete source code for the
|
||||
[SQLite database engine](https://sqlite.org/). Some test scripts
|
||||
are also included. However, many other test scripts
|
||||
and most of the documentation are managed separately.
|
||||
|
||||
## Version Control
|
||||
|
||||
SQLite sources are managed using the
|
||||
[Fossil](https://www.fossil-scm.org/), a distributed version control system
|
||||
that was specifically designed and written to support SQLite development.
|
||||
The [Fossil repository](https://sqlite.org/src/timeline) contains the urtext.
|
||||
|
||||
If you are reading this on GitHub or some other Git repository or service,
|
||||
then you are looking at a mirror. The names of check-ins and
|
||||
other artifacts in a Git mirror are different from the official
|
||||
names for those objects. The offical names for check-ins are
|
||||
found in a footer on the check-in comment for authorized mirrors.
|
||||
The official check-in name can also be seen in the `manifest.uuid` file
|
||||
in the root of the tree. Always use the official name, not the
|
||||
Git-name, when communicating about an SQLite check-in.
|
||||
|
||||
If you pulled your SQLite source code from a secondary source and want to
|
||||
verify its integrity, there are hints on how to do that in the
|
||||
[Verifying Code Authenticity](#vauth) section below.
|
||||
|
||||
## Obtaining The Code
|
||||
|
||||
If you do not want to use Fossil, you can download tarballs or ZIP
|
||||
archives or [SQLite archives](https://sqlite.org/cli.html#sqlar) as follows:
|
||||
|
||||
* Lastest trunk check-in as
|
||||
[Tarball](https://www.sqlite.org/src/tarball/sqlite.tar.gz),
|
||||
[ZIP-archive](https://www.sqlite.org/src/zip/sqlite.zip), or
|
||||
[SQLite-archive](https://www.sqlite.org/src/sqlar/sqlite.sqlar).
|
||||
|
||||
* Latest release as
|
||||
[Tarball](https://www.sqlite.org/src/tarball/sqlite.tar.gz?r=release),
|
||||
[ZIP-archive](https://www.sqlite.org/src/zip/sqlite.zip?r=release), or
|
||||
[SQLite-archive](https://www.sqlite.org/src/sqlar/sqlite.sqlar?r=release).
|
||||
|
||||
* For other check-ins, substitute an appropriate branch name or
|
||||
tag or hash prefix in place of "release" in the URLs of the previous
|
||||
bullet. Or browse the [timeline](https://www.sqlite.org/src/timeline)
|
||||
to locate the check-in desired, click on its information page link,
|
||||
then click on the "Tarball" or "ZIP Archive" links on the information
|
||||
page.
|
||||
|
||||
If you do want to use Fossil to check out the source tree,
|
||||
first install Fossil version 2.0 or later.
|
||||
(Source tarballs and precompiled binaries available
|
||||
[here](https://www.fossil-scm.org/fossil/uv/download.html). Fossil is
|
||||
a stand-alone program. To install, simply download or build the single
|
||||
executable file and put that file someplace on your $PATH.)
|
||||
Then run commands like this:
|
||||
|
||||
mkdir -p ~/sqlite ~/Fossils
|
||||
cd ~/sqlite
|
||||
fossil clone https://www.sqlite.org/src ~/Fossils/sqlite.fossil
|
||||
fossil open ~/Fossils/sqlite.fossil
|
||||
|
||||
After setting up a repository using the steps above, you can always
|
||||
update to the lastest version using:
|
||||
|
||||
fossil update trunk ;# latest trunk check-in
|
||||
fossil update release ;# latest official release
|
||||
|
||||
Or type "fossil ui" to get a web-based user interface.
|
||||
|
||||
## Compiling for Unix-like systems
|
||||
|
||||
First create a directory in which to place
|
||||
the build products. It is recommended, but not required, that the
|
||||
build directory be separate from the source directory. Cd into the
|
||||
build directory and then from the build directory run the configure
|
||||
script found at the root of the source tree. Then run "make".
|
||||
|
||||
For example:
|
||||
|
||||
tar xzf sqlite.tar.gz ;# Unpack the source tree into "sqlite"
|
||||
mkdir bld ;# Build will occur in a sibling directory
|
||||
cd bld ;# Change to the build directory
|
||||
../sqlite/configure ;# Run the configure script
|
||||
make ;# Run the makefile.
|
||||
make sqlite3.c ;# Build the "amalgamation" source file
|
||||
make test ;# Run some tests (requires Tcl)
|
||||
|
||||
See the makefile for additional targets.
|
||||
|
||||
The configure script uses autoconf 2.61 and libtool. If the configure
|
||||
script does not work out for you, there is a generic makefile named
|
||||
"Makefile.linux-gcc" in the top directory of the source tree that you
|
||||
can copy and edit to suit your needs. Comments on the generic makefile
|
||||
show what changes are needed.
|
||||
|
||||
## Using MSVC for Windows systems
|
||||
|
||||
On Windows, all applicable build products can be compiled with MSVC.
|
||||
First open the command prompt window associated with the desired compiler
|
||||
version (e.g. "Developer Command Prompt for VS2013"). Next, use NMAKE
|
||||
with the provided "Makefile.msc" to build one of the supported targets.
|
||||
|
||||
For example, from the parent directory of the source subtree named "sqlite":
|
||||
|
||||
mkdir bld
|
||||
cd bld
|
||||
nmake /f ..\sqlite\Makefile.msc TOP=..\sqlite
|
||||
nmake /f ..\sqlite\Makefile.msc sqlite3.c TOP=..\sqlite
|
||||
nmake /f ..\sqlite\Makefile.msc sqlite3.dll TOP=..\sqlite
|
||||
nmake /f ..\sqlite\Makefile.msc sqlite3.exe TOP=..\sqlite
|
||||
nmake /f ..\sqlite\Makefile.msc test TOP=..\sqlite
|
||||
|
||||
There are several build options that can be set via the NMAKE command
|
||||
line. For example, to build for WinRT, simply add "FOR_WINRT=1" argument
|
||||
to the "sqlite3.dll" command line above. When debugging into the SQLite
|
||||
code, adding the "DEBUG=1" argument to one of the above command lines is
|
||||
recommended.
|
||||
|
||||
SQLite does not require [Tcl](http://www.tcl.tk/) to run, but a Tcl installation
|
||||
is required by the makefiles (including those for MSVC). SQLite contains
|
||||
a lot of generated code and Tcl is used to do much of that code generation.
|
||||
|
||||
## Source Code Tour
|
||||
|
||||
Most of the core source files are in the **src/** subdirectory. The
|
||||
**src/** folder also contains files used to build the "testfixture" test
|
||||
harness. The names of the source files used by "testfixture" all begin
|
||||
with "test".
|
||||
The **src/** also contains the "shell.c" file
|
||||
which is the main program for the "sqlite3.exe"
|
||||
[command-line shell](https://sqlite.org/cli.html) and
|
||||
the "tclsqlite.c" file which implements the
|
||||
[Tcl bindings](https://sqlite.org/tclsqlite.html) for SQLite.
|
||||
(Historical note: SQLite began as a Tcl
|
||||
extension and only later escaped to the wild as an independent library.)
|
||||
|
||||
Test scripts and programs are found in the **test/** subdirectory.
|
||||
Addtional test code is found in other source repositories.
|
||||
See [How SQLite Is Tested](http://www.sqlite.org/testing.html) for
|
||||
additional information.
|
||||
|
||||
The **ext/** subdirectory contains code for extensions. The
|
||||
Full-text search engine is in **ext/fts3**. The R-Tree engine is in
|
||||
**ext/rtree**. The **ext/misc** subdirectory contains a number of
|
||||
smaller, single-file extensions, such as a REGEXP operator.
|
||||
|
||||
The **tool/** subdirectory contains various scripts and programs used
|
||||
for building generated source code files or for testing or for generating
|
||||
accessory programs such as "sqlite3_analyzer(.exe)".
|
||||
|
||||
### Generated Source Code Files
|
||||
|
||||
Several of the C-language source files used by SQLite are generated from
|
||||
other sources rather than being typed in manually by a programmer. This
|
||||
section will summarize those automatically-generated files. To create all
|
||||
of the automatically-generated files, simply run "make target_source".
|
||||
The "target_source" make target will create a subdirectory "tsrc/" and
|
||||
fill it with all the source files needed to build SQLite, both
|
||||
manually-edited files and automatically-generated files.
|
||||
|
||||
The SQLite interface is defined by the **sqlite3.h** header file, which is
|
||||
generated from src/sqlite.h.in, ./manifest.uuid, and ./VERSION. The
|
||||
[Tcl script](http://www.tcl.tk) at tool/mksqlite3h.tcl does the conversion.
|
||||
The manifest.uuid file contains the SHA3 hash of the particular check-in
|
||||
and is used to generate the SQLITE\_SOURCE\_ID macro. The VERSION file
|
||||
contains the current SQLite version number. The sqlite3.h header is really
|
||||
just a copy of src/sqlite.h.in with the source-id and version number inserted
|
||||
at just the right spots. Note that comment text in the sqlite3.h file is
|
||||
used to generate much of the SQLite API documentation. The Tcl scripts
|
||||
used to generate that documentation are in a separate source repository.
|
||||
|
||||
The SQL language parser is **parse.c** which is generate from a grammar in
|
||||
the src/parse.y file. The conversion of "parse.y" into "parse.c" is done
|
||||
by the [lemon](./doc/lemon.html) LALR(1) parser generator. The source code
|
||||
for lemon is at tool/lemon.c. Lemon uses the tool/lempar.c file as a
|
||||
template for generating its parser.
|
||||
Lemon also generates the **parse.h** header file, at the same time it
|
||||
generates parse.c.
|
||||
|
||||
The **opcodes.h** header file contains macros that define the numbers
|
||||
corresponding to opcodes in the "VDBE" virtual machine. The opcodes.h
|
||||
file is generated by the scanning the src/vdbe.c source file. The
|
||||
Tcl script at ./mkopcodeh.tcl does this scan and generates opcodes.h.
|
||||
A second Tcl script, ./mkopcodec.tcl, then scans opcodes.h to generate
|
||||
the **opcodes.c** source file, which contains a reverse mapping from
|
||||
opcode-number to opcode-name that is used for EXPLAIN output.
|
||||
|
||||
The **keywordhash.h** header file contains the definition of a hash table
|
||||
that maps SQL language keywords (ex: "CREATE", "SELECT", "INDEX", etc.) into
|
||||
the numeric codes used by the parse.c parser. The keywordhash.h file is
|
||||
generated by a C-language program at tool mkkeywordhash.c.
|
||||
|
||||
The **pragma.h** header file contains various definitions used to parse
|
||||
and implement the PRAGMA statements. The header is generated by a
|
||||
script **tool/mkpragmatab.tcl**. If you want to add a new PRAGMA, edit
|
||||
the **tool/mkpragmatab.tcl** file to insert the information needed by the
|
||||
parser for your new PRAGMA, then run the script to regenerate the
|
||||
**pragma.h** header file.
|
||||
|
||||
### The Amalgamation
|
||||
|
||||
All of the individual C source code and header files (both manually-edited
|
||||
and automatically-generated) can be combined into a single big source file
|
||||
**sqlite3.c** called "the amalgamation". The amalgamation is the recommended
|
||||
way of using SQLite in a larger application. Combining all individual
|
||||
source code files into a single big source code file allows the C compiler
|
||||
to perform more cross-procedure analysis and generate better code. SQLite
|
||||
runs about 5% faster when compiled from the amalgamation versus when compiled
|
||||
from individual source files.
|
||||
|
||||
The amalgamation is generated from the tool/mksqlite3c.tcl Tcl script.
|
||||
First, all of the individual source files must be gathered into the tsrc/
|
||||
subdirectory (using the equivalent of "make target_source") then the
|
||||
tool/mksqlite3c.tcl script is run to copy them all together in just the
|
||||
right order while resolving internal "#include" references.
|
||||
|
||||
The amalgamation source file is more than 200K lines long. Some symbolic
|
||||
debuggers (most notably MSVC) are unable to deal with files longer than 64K
|
||||
lines. To work around this, a separate Tcl script, tool/split-sqlite3c.tcl,
|
||||
can be run on the amalgamation to break it up into a single small C file
|
||||
called **sqlite3-all.c** that does #include on about seven other files
|
||||
named **sqlite3-1.c**, **sqlite3-2.c**, ..., **sqlite3-7.c**. In this way,
|
||||
all of the source code is contained within a single translation unit so
|
||||
that the compiler can do extra cross-procedure optimization, but no
|
||||
individual source file exceeds 32K lines in length.
|
||||
|
||||
## How It All Fits Together
|
||||
|
||||
SQLite is modular in design.
|
||||
See the [architectural description](http://www.sqlite.org/arch.html)
|
||||
for details. Other documents that are useful in
|
||||
(helping to understand how SQLite works include the
|
||||
[file format](http://www.sqlite.org/fileformat2.html) description,
|
||||
the [virtual machine](http://www.sqlite.org/opcode.html) that runs
|
||||
prepared statements, the description of
|
||||
[how transactions work](http://www.sqlite.org/atomiccommit.html), and
|
||||
the [overview of the query planner](http://www.sqlite.org/optoverview.html).
|
||||
|
||||
Years of effort have gone into optimizating SQLite, both
|
||||
for small size and high performance. And optimizations tend to result in
|
||||
complex code. So there is a lot of complexity in the current SQLite
|
||||
implementation. It will not be the easiest library in the world to hack.
|
||||
|
||||
Key files:
|
||||
|
||||
* **sqlite.h.in** - This file defines the public interface to the SQLite
|
||||
library. Readers will need to be familiar with this interface before
|
||||
trying to understand how the library works internally.
|
||||
|
||||
* **sqliteInt.h** - this header file defines many of the data objects
|
||||
used internally by SQLite. In addition to "sqliteInt.h", some
|
||||
subsystems have their own header files.
|
||||
|
||||
* **parse.y** - This file describes the LALR(1) grammar that SQLite uses
|
||||
to parse SQL statements, and the actions that are taken at each step
|
||||
in the parsing process.
|
||||
|
||||
* **vdbe.c** - This file implements the virtual machine that runs
|
||||
prepared statements. There are various helper files whose names
|
||||
begin with "vdbe". The VDBE has access to the vdbeInt.h header file
|
||||
which defines internal data objects. The rest of SQLite interacts
|
||||
with the VDBE through an interface defined by vdbe.h.
|
||||
|
||||
* **where.c** - This file (together with its helper files named
|
||||
by "where*.c") analyzes the WHERE clause and generates
|
||||
virtual machine code to run queries efficiently. This file is
|
||||
sometimes called the "query optimizer". It has its own private
|
||||
header file, whereInt.h, that defines data objects used internally.
|
||||
|
||||
* **btree.c** - This file contains the implementation of the B-Tree
|
||||
storage engine used by SQLite. The interface to the rest of the system
|
||||
is defined by "btree.h". The "btreeInt.h" header defines objects
|
||||
used internally by btree.c and not published to the rest of the system.
|
||||
|
||||
* **pager.c** - This file contains the "pager" implementation, the
|
||||
module that implements transactions. The "pager.h" header file
|
||||
defines the interface between pager.c and the rest of the system.
|
||||
|
||||
* **os_unix.c** and **os_win.c** - These two files implement the interface
|
||||
between SQLite and the underlying operating system using the run-time
|
||||
pluggable VFS interface.
|
||||
|
||||
* **shell.c.in** - This file is not part of the core SQLite library. This
|
||||
is the file that, when linked against sqlite3.a, generates the
|
||||
"sqlite3.exe" command-line shell. The "shell.c.in" file is transformed
|
||||
into "shell.c" as part of the build process.
|
||||
|
||||
* **tclsqlite.c** - This file implements the Tcl bindings for SQLite. It
|
||||
is not part of the core SQLite library. But as most of the tests in this
|
||||
repository are written in Tcl, the Tcl language bindings are important.
|
||||
|
||||
* **test*.c** - Files in the src/ folder that begin with "test" go into
|
||||
building the "testfixture.exe" program. The testfixture.exe program is
|
||||
an enhanced Tcl shell. The testfixture.exe program runs scripts in the
|
||||
test/ folder to validate the core SQLite code. The testfixture program
|
||||
(and some other test programs too) is build and run when you type
|
||||
"make test".
|
||||
|
||||
* **ext/misc/json1.c** - This file implements the various JSON functions
|
||||
that are build into SQLite.
|
||||
|
||||
There are many other source files. Each has a succinct header comment that
|
||||
describes its purpose and role within the larger system.
|
||||
|
||||
<a name="vauth"></a>
|
||||
## Verifying Code Authenticity
|
||||
|
||||
The `manifest` file at the root directory of the source tree
|
||||
contains either a SHA3-256 hash (for newer files) or a SHA1 hash (for
|
||||
older files) for every source file in the repository.
|
||||
The SHA3-256 hash of the `manifest`
|
||||
file itself is the official name of the version of the source tree that you
|
||||
have. The `manifest.uuid` file should contain the SHA3-256 hash of the
|
||||
`manifest` file. If all of the above hash comparisons are correct, then
|
||||
you can be confident that your source tree is authentic and unadulterated.
|
||||
|
||||
The format of the `manifest` file should be mostly self-explanatory, but
|
||||
if you want details, they are available
|
||||
[here](https://fossil-scm.org/fossil/doc/trunk/www/fileformat.wiki#manifest).
|
||||
|
||||
## Contacts
|
||||
|
||||
The main SQLite website is [http://www.sqlite.org/](http://www.sqlite.org/)
|
||||
with geographically distributed backups at
|
||||
[http://www2.sqlite.org/](http://www2.sqlite.org) and
|
||||
[http://www3.sqlite.org/](http://www3.sqlite.org).
|
||||
1
VERSION
Normal file
1
VERSION
Normal file
|
|
@ -0,0 +1 @@
|
|||
3.37.1
|
||||
5537
aclocal.m4
vendored
5537
aclocal.m4
vendored
File diff suppressed because it is too large
Load diff
5333
art/sqlite370.eps
Normal file
5333
art/sqlite370.eps
Normal file
File diff suppressed because one or more lines are too long
BIN
art/sqlite370.ico
Normal file
BIN
art/sqlite370.ico
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 2.2 KiB |
BIN
art/sqlite370.jpg
Normal file
BIN
art/sqlite370.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 79 KiB |
1037
autoconf/Makefile.msc
Normal file
1037
autoconf/Makefile.msc
Normal file
File diff suppressed because it is too large
Load diff
11
autoconf/README.first
Normal file
11
autoconf/README.first
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
This directory contains components use to build an autoconf-ready package
|
||||
of the SQLite amalgamation: sqlite-autoconf-30XXXXXX.tar.gz
|
||||
|
||||
To build the autoconf amalgamation, run from the top-level:
|
||||
|
||||
./configure
|
||||
make amalgamation-tarball
|
||||
|
||||
The amalgamation-tarball target (also available in "main.mk") runs the
|
||||
script tool/mkautoconfamal.sh which does the work. Refer to that script
|
||||
for details.
|
||||
285
autoconf/configure.ac
Normal file
285
autoconf/configure.ac
Normal file
|
|
@ -0,0 +1,285 @@
|
|||
|
||||
#-----------------------------------------------------------------------
|
||||
# Supports the following non-standard switches.
|
||||
#
|
||||
# --enable-threadsafe
|
||||
# --enable-readline
|
||||
# --enable-editline
|
||||
# --enable-static-shell
|
||||
# --enable-dynamic-extensions
|
||||
#
|
||||
|
||||
AC_PREREQ(2.61)
|
||||
AC_INIT(sqlite, --SQLITE-VERSION--, http://www.sqlite.org)
|
||||
AC_CONFIG_SRCDIR([sqlite3.c])
|
||||
AC_CONFIG_AUX_DIR([.])
|
||||
|
||||
# Use automake.
|
||||
AM_INIT_AUTOMAKE([foreign])
|
||||
|
||||
AC_SYS_LARGEFILE
|
||||
|
||||
# Check for required programs.
|
||||
AC_PROG_CC
|
||||
AC_PROG_LIBTOOL
|
||||
AC_PROG_MKDIR_P
|
||||
|
||||
# Check for library functions that SQLite can optionally use.
|
||||
AC_CHECK_FUNCS([fdatasync usleep fullfsync localtime_r gmtime_r])
|
||||
AC_FUNC_STRERROR_R
|
||||
|
||||
AC_CONFIG_FILES([Makefile sqlite3.pc])
|
||||
BUILD_CFLAGS=
|
||||
AC_SUBST(BUILD_CFLAGS)
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
# Two options to enable readline compatible libraries:
|
||||
#
|
||||
# --enable-editline
|
||||
# --enable-readline
|
||||
#
|
||||
# Both are enabled by default. If, after command line processing both are
|
||||
# still enabled, the script searches for editline first and automatically
|
||||
# disables readline if it is found. So, to use readline explicitly, the
|
||||
# user must pass "--disable-editline". To disable command line editing
|
||||
# support altogether, "--disable-editline --disable-readline".
|
||||
#
|
||||
# When searching for either library, check for headers before libraries
|
||||
# as some distros supply packages that contain libraries but not header
|
||||
# files, which come as a separate development package.
|
||||
#
|
||||
AC_ARG_ENABLE(editline, [AS_HELP_STRING([--enable-editline],[use BSD libedit])])
|
||||
AC_ARG_ENABLE(readline, [AS_HELP_STRING([--enable-readline],[use readline])])
|
||||
|
||||
AS_IF([ test x"$enable_editline" != xno ],[
|
||||
AC_CHECK_HEADERS([editline/readline.h],[
|
||||
sLIBS=$LIBS
|
||||
LIBS=""
|
||||
AC_SEARCH_LIBS([readline],[edit],[
|
||||
AC_DEFINE([HAVE_EDITLINE],1,Define to use BSD editline)
|
||||
READLINE_LIBS="$LIBS -ltinfo"
|
||||
enable_readline=no
|
||||
],[],[-ltinfo])
|
||||
AS_UNSET(ac_cv_search_readline)
|
||||
LIBS=$sLIBS
|
||||
])
|
||||
])
|
||||
|
||||
AS_IF([ test x"$enable_readline" != xno ],[
|
||||
AC_CHECK_HEADERS([readline/readline.h],[
|
||||
sLIBS=$LIBS
|
||||
LIBS=""
|
||||
AC_SEARCH_LIBS(tgetent, termcap curses ncurses ncursesw, [], [])
|
||||
AC_SEARCH_LIBS(readline,[readline edit], [
|
||||
AC_DEFINE([HAVE_READLINE],1,Define to use readline or wrapper)
|
||||
READLINE_LIBS=$LIBS
|
||||
])
|
||||
LIBS=$sLIBS
|
||||
])
|
||||
])
|
||||
|
||||
AC_SUBST(READLINE_LIBS)
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# --enable-threadsafe
|
||||
#
|
||||
AC_ARG_ENABLE(threadsafe, [AS_HELP_STRING(
|
||||
[--enable-threadsafe], [build a thread-safe library [default=yes]])],
|
||||
[], [enable_threadsafe=yes])
|
||||
if test x"$enable_threadsafe" == "xno"; then
|
||||
BUILD_CFLAGS="$BUILD_CFLAGS -DSQLITE_THREADSAFE=0"
|
||||
else
|
||||
BUILD_CFLAGS="$BUILD_CFLAGS -D_REENTRANT=1 -DSQLITE_THREADSAFE=1"
|
||||
AC_SEARCH_LIBS(pthread_create, pthread)
|
||||
AC_SEARCH_LIBS(pthread_mutexattr_init, pthread)
|
||||
fi
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# --enable-dynamic-extensions
|
||||
#
|
||||
AC_ARG_ENABLE(dynamic-extensions, [AS_HELP_STRING(
|
||||
[--enable-dynamic-extensions], [support loadable extensions [default=yes]])],
|
||||
[], [enable_dynamic_extensions=yes])
|
||||
if test x"$enable_dynamic_extensions" != "xno"; then
|
||||
AC_SEARCH_LIBS(dlopen, dl)
|
||||
else
|
||||
BUILD_CFLAGS="$BUILD_CFLAGS -DSQLITE_OMIT_LOAD_EXTENSION=1"
|
||||
fi
|
||||
AC_MSG_CHECKING([for whether to support dynamic extensions])
|
||||
AC_MSG_RESULT($enable_dynamic_extensions)
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# --enable-math
|
||||
#
|
||||
AC_ARG_ENABLE(math, [AS_HELP_STRING(
|
||||
[--enable-math], [SQL math functions [default=yes]])],
|
||||
[], [enable_math=yes])
|
||||
AC_MSG_CHECKING([SQL math functions])
|
||||
if test x"$enable_math" = "xyes"; then
|
||||
BUILD_CFLAGS="$BUILD_CFLAGS -DSQLITE_ENABLE_MATH_FUNCTIONS"
|
||||
AC_MSG_RESULT([enabled])
|
||||
AC_SEARCH_LIBS(ceil, m)
|
||||
else
|
||||
AC_MSG_RESULT([disabled])
|
||||
fi
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# --enable-fts4
|
||||
#
|
||||
AC_ARG_ENABLE(fts4, [AS_HELP_STRING(
|
||||
[--enable-fts4], [include fts4 support [default=yes]])],
|
||||
[], [enable_fts4=yes])
|
||||
AC_MSG_CHECKING([FTS4 extension])
|
||||
if test x"$enable_fts4" = "xyes"; then
|
||||
BUILD_CFLAGS="$BUILD_CFLAGS -DSQLITE_ENABLE_FTS4"
|
||||
AC_MSG_RESULT([enabled])
|
||||
else
|
||||
AC_MSG_RESULT([disabled])
|
||||
fi
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# --enable-fts3
|
||||
#
|
||||
AC_ARG_ENABLE(fts3, [AS_HELP_STRING(
|
||||
[--enable-fts3], [include fts3 support [default=no]])],
|
||||
[], [])
|
||||
AC_MSG_CHECKING([FTS3 extension])
|
||||
if test x"$enable_fts3" = "xyes" -a x"$enable_fts4" = "xno"; then
|
||||
BUILD_CFLAGS="$BUILD_CFLAGS -DSQLITE_ENABLE_FTS3"
|
||||
AC_MSG_RESULT([enabled])
|
||||
else
|
||||
AC_MSG_RESULT([disabled])
|
||||
fi
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# --enable-fts5
|
||||
#
|
||||
AC_ARG_ENABLE(fts5, [AS_HELP_STRING(
|
||||
[--enable-fts5], [include fts5 support [default=yes]])],
|
||||
[], [enable_fts5=yes])
|
||||
AC_MSG_CHECKING([FTS5 extension])
|
||||
if test x"$enable_fts5" = "xyes"; then
|
||||
AC_MSG_RESULT([enabled])
|
||||
AC_SEARCH_LIBS(log, m)
|
||||
BUILD_CFLAGS="$BUILD_CFLAGS -DSQLITE_ENABLE_FTS5"
|
||||
else
|
||||
AC_MSG_RESULT([disabled])
|
||||
fi
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# --enable-json1
|
||||
#
|
||||
AC_ARG_ENABLE(json1, [AS_HELP_STRING(
|
||||
[--enable-json1], [include json1 support [default=yes]])],
|
||||
[],[enable_json1=yes])
|
||||
AC_MSG_CHECKING([JSON functions])
|
||||
if test x"$enable_json1" = "xyes"; then
|
||||
BUILD_CFLAGS="$BUILD_CFLAGS -DSQLITE_ENABLE_JSON1"
|
||||
AC_MSG_RESULT([enabled])
|
||||
else
|
||||
AC_MSG_RESULT([disabled])
|
||||
fi
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# --enable-rtree
|
||||
#
|
||||
AC_ARG_ENABLE(rtree, [AS_HELP_STRING(
|
||||
[--enable-rtree], [include rtree support [default=yes]])],
|
||||
[], [enable_rtree=yes])
|
||||
AC_MSG_CHECKING([RTREE extension])
|
||||
if test x"$enable_rtree" = "xyes"; then
|
||||
BUILD_CFLAGS="$BUILD_CFLAGS -DSQLITE_ENABLE_RTREE -DSQLITE_ENABLE_GEOPOLY"
|
||||
AC_MSG_RESULT([enabled])
|
||||
else
|
||||
AC_MSG_RESULT([disabled])
|
||||
fi
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# --enable-session
|
||||
#
|
||||
AC_ARG_ENABLE(session, [AS_HELP_STRING(
|
||||
[--enable-session], [enable the session extension [default=no]])],
|
||||
[], [])
|
||||
AC_MSG_CHECKING([Session extension])
|
||||
if test x"$enable_session" = "xyes"; then
|
||||
BUILD_CFLAGS="$BUILD_CFLAGS -DSQLITE_ENABLE_SESSION -DSQLITE_ENABLE_PREUPDATE_HOOK"
|
||||
AC_MSG_RESULT([enabled])
|
||||
else
|
||||
AC_MSG_RESULT([disabled])
|
||||
fi
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# --enable-debug
|
||||
#
|
||||
AC_ARG_ENABLE(debug, [AS_HELP_STRING(
|
||||
[--enable-debug], [build with debugging features enabled [default=no]])],
|
||||
[], [])
|
||||
AC_MSG_CHECKING([Build type])
|
||||
if test x"$enable_debug" = "xyes"; then
|
||||
BUILD_CFLAGS="$BUILD_CFLAGS -DSQLITE_DEBUG -DSQLITE_ENABLE_SELECTTRACE -DSQLITE_ENABLE_WHERETRACE"
|
||||
CFLAGS="-g -O0"
|
||||
AC_MSG_RESULT([debug])
|
||||
else
|
||||
AC_MSG_RESULT([release])
|
||||
fi
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# --enable-static-shell
|
||||
#
|
||||
AC_ARG_ENABLE(static-shell, [AS_HELP_STRING(
|
||||
[--enable-static-shell],
|
||||
[statically link libsqlite3 into shell tool [default=yes]])],
|
||||
[], [enable_static_shell=yes])
|
||||
if test x"$enable_static_shell" = "xyes"; then
|
||||
EXTRA_SHELL_OBJ=sqlite3-sqlite3.$OBJEXT
|
||||
else
|
||||
EXTRA_SHELL_OBJ=libsqlite3.la
|
||||
fi
|
||||
AC_SUBST(EXTRA_SHELL_OBJ)
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
AC_CHECK_FUNCS(posix_fallocate)
|
||||
AC_CHECK_HEADERS(zlib.h,[
|
||||
AC_SEARCH_LIBS(deflate,z,[BUILD_CFLAGS="$BUILD_CFLAGS -DSQLITE_HAVE_ZLIB"])
|
||||
])
|
||||
|
||||
AC_SEARCH_LIBS(system,,,[SHELL_CFLAGS="-DSQLITE_NOHAVE_SYSTEM"])
|
||||
AC_SUBST(SHELL_CFLAGS)
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# UPDATE: Maybe it's better if users just set CFLAGS before invoking
|
||||
# configure. This option doesn't really add much...
|
||||
#
|
||||
# --enable-tempstore
|
||||
#
|
||||
# AC_ARG_ENABLE(tempstore, [AS_HELP_STRING(
|
||||
# [--enable-tempstore],
|
||||
# [in-memory temporary tables (never, no, yes, always) [default=no]])],
|
||||
# [], [enable_tempstore=no])
|
||||
# AC_MSG_CHECKING([for whether or not to store temp tables in-memory])
|
||||
# case "$enable_tempstore" in
|
||||
# never ) TEMP_STORE=0 ;;
|
||||
# no ) TEMP_STORE=1 ;;
|
||||
# always ) TEMP_STORE=3 ;;
|
||||
# yes ) TEMP_STORE=3 ;;
|
||||
# * )
|
||||
# TEMP_STORE=1
|
||||
# enable_tempstore=yes
|
||||
# ;;
|
||||
# esac
|
||||
# AC_MSG_RESULT($enable_tempstore)
|
||||
# AC_SUBST(TEMP_STORE)
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
AC_OUTPUT
|
||||
0
tea/aclocal.m4 → autoconf/tea/aclocal.m4
vendored
0
tea/aclocal.m4 → autoconf/tea/aclocal.m4
vendored
|
|
@ -19,7 +19,7 @@ dnl to configure the system for the local environment.
|
|||
# so you can encode the package version directly into the source files.
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
AC_INIT([sqlite], [3.35.5])
|
||||
AC_INIT([sqlite], [3.32.0])
|
||||
|
||||
#--------------------------------------------------------------------
|
||||
# Call TEA_INIT as the first TEA_ macro to set up initial vars.
|
||||
|
|
@ -14,13 +14,10 @@
|
|||
|
||||
#define _CRT_SECURE_NO_DEPRECATE
|
||||
#include <windows.h>
|
||||
#define NO_SHLWAPI_GDI
|
||||
#define NO_SHLWAPI_STREAM
|
||||
#define NO_SHLWAPI_REG
|
||||
#include <shlwapi.h>
|
||||
#ifdef _MSC_VER
|
||||
#pragma comment (lib, "user32.lib")
|
||||
#pragma comment (lib, "kernel32.lib")
|
||||
#pragma comment (lib, "shlwapi.lib")
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
|
|
@ -39,15 +36,15 @@
|
|||
#endif
|
||||
|
||||
|
||||
|
||||
/* protos */
|
||||
|
||||
static int CheckForCompilerFeature(const char *option);
|
||||
static int CheckForLinkerFeature(const char *option);
|
||||
static int CheckForLinkerFeature(char **options, int count);
|
||||
static int IsIn(const char *string, const char *substring);
|
||||
static int SubstituteFile(const char *substs, const char *filename);
|
||||
static int QualifyPath(const char *path);
|
||||
static const char *GetVersionFromFile(const char *filename, const char *match);
|
||||
static int LocateDependency(const char *keyfile);
|
||||
static const char *GetVersionFromFile(const char *filename, const char *match, int numdots);
|
||||
static DWORD WINAPI ReadFromPipe(LPVOID args);
|
||||
|
||||
/* globals */
|
||||
|
|
@ -59,8 +56,8 @@ typedef struct {
|
|||
char buffer[STATICBUFFERSIZE];
|
||||
} pipeinfo;
|
||||
|
||||
pipeinfo Out = {INVALID_HANDLE_VALUE, '\0'};
|
||||
pipeinfo Err = {INVALID_HANDLE_VALUE, '\0'};
|
||||
pipeinfo Out = {INVALID_HANDLE_VALUE, ""};
|
||||
pipeinfo Err = {INVALID_HANDLE_VALUE, ""};
|
||||
|
||||
/*
|
||||
* exitcodes: 0 == no, 1 == yes, 2 == error
|
||||
|
|
@ -74,6 +71,7 @@ main(
|
|||
char msg[300];
|
||||
DWORD dwWritten;
|
||||
int chars;
|
||||
const char *s;
|
||||
|
||||
/*
|
||||
* Make sure children (cl.exe and link.exe) are kept quiet.
|
||||
|
|
@ -102,16 +100,16 @@ main(
|
|||
}
|
||||
return CheckForCompilerFeature(argv[2]);
|
||||
case 'l':
|
||||
if (argc != 3) {
|
||||
if (argc < 3) {
|
||||
chars = snprintf(msg, sizeof(msg) - 1,
|
||||
"usage: %s -l <linker option>\n"
|
||||
"usage: %s -l <linker option> ?<mandatory option> ...?\n"
|
||||
"Tests for whether link.exe supports an option\n"
|
||||
"exitcodes: 0 == no, 1 == yes, 2 == error\n", argv[0]);
|
||||
WriteFile(GetStdHandle(STD_ERROR_HANDLE), msg, chars,
|
||||
&dwWritten, NULL);
|
||||
return 2;
|
||||
}
|
||||
return CheckForLinkerFeature(argv[2]);
|
||||
return CheckForLinkerFeature(&argv[2], argc-2);
|
||||
case 'f':
|
||||
if (argc == 2) {
|
||||
chars = snprintf(msg, sizeof(msg) - 1,
|
||||
|
|
@ -153,8 +151,13 @@ main(
|
|||
&dwWritten, NULL);
|
||||
return 0;
|
||||
}
|
||||
printf("%s\n", GetVersionFromFile(argv[2], argv[3]));
|
||||
return 0;
|
||||
s = GetVersionFromFile(argv[2], argv[3], *(argv[1]+2) - '0');
|
||||
if (s && *s) {
|
||||
printf("%s\n", s);
|
||||
return 0;
|
||||
} else
|
||||
return 1; /* Version not found. Return non-0 exit code */
|
||||
|
||||
case 'Q':
|
||||
if (argc != 3) {
|
||||
chars = snprintf(msg, sizeof(msg) - 1,
|
||||
|
|
@ -166,6 +169,18 @@ main(
|
|||
return 2;
|
||||
}
|
||||
return QualifyPath(argv[2]);
|
||||
|
||||
case 'L':
|
||||
if (argc != 3) {
|
||||
chars = snprintf(msg, sizeof(msg) - 1,
|
||||
"usage: %s -L keypath\n"
|
||||
"Emit the fully qualified path of directory containing keypath\n"
|
||||
"exitcodes: 0 == success, 1 == not found, 2 == error\n", argv[0]);
|
||||
WriteFile(GetStdHandle(STD_ERROR_HANDLE), msg, chars,
|
||||
&dwWritten, NULL);
|
||||
return 2;
|
||||
}
|
||||
return LocateDependency(argv[2]);
|
||||
}
|
||||
}
|
||||
chars = snprintf(msg, sizeof(msg) - 1,
|
||||
|
|
@ -260,7 +275,7 @@ CheckForCompilerFeature(
|
|||
"Tried to launch: \"%s\", but got error [%u]: ", cmdline, err);
|
||||
|
||||
FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|
|
||||
FORMAT_MESSAGE_MAX_WIDTH_MASK, 0L, err, 0, (LPVOID)&msg[chars],
|
||||
FORMAT_MESSAGE_MAX_WIDTH_MASK, 0L, err, 0, (LPSTR)&msg[chars],
|
||||
(300-chars), 0);
|
||||
WriteFile(GetStdHandle(STD_ERROR_HANDLE), msg, lstrlen(msg), &err,NULL);
|
||||
return 2;
|
||||
|
|
@ -313,7 +328,8 @@ CheckForCompilerFeature(
|
|||
|
||||
static int
|
||||
CheckForLinkerFeature(
|
||||
const char *option)
|
||||
char **options,
|
||||
int count)
|
||||
{
|
||||
STARTUPINFO si;
|
||||
PROCESS_INFORMATION pi;
|
||||
|
|
@ -322,7 +338,8 @@ CheckForLinkerFeature(
|
|||
char msg[300];
|
||||
BOOL ok;
|
||||
HANDLE hProcess, h, pipeThreads[2];
|
||||
char cmdline[100];
|
||||
int i;
|
||||
char cmdline[255];
|
||||
|
||||
hProcess = GetCurrentProcess();
|
||||
|
||||
|
|
@ -368,7 +385,11 @@ CheckForLinkerFeature(
|
|||
* Append our option for testing.
|
||||
*/
|
||||
|
||||
lstrcat(cmdline, option);
|
||||
for (i = 0; i < count; i++) {
|
||||
lstrcat(cmdline, " \"");
|
||||
lstrcat(cmdline, options[i]);
|
||||
lstrcat(cmdline, "\"");
|
||||
}
|
||||
|
||||
ok = CreateProcess(
|
||||
NULL, /* Module name. */
|
||||
|
|
@ -388,7 +409,7 @@ CheckForLinkerFeature(
|
|||
"Tried to launch: \"%s\", but got error [%u]: ", cmdline, err);
|
||||
|
||||
FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|
|
||||
FORMAT_MESSAGE_MAX_WIDTH_MASK, 0L, err, 0, (LPVOID)&msg[chars],
|
||||
FORMAT_MESSAGE_MAX_WIDTH_MASK, 0L, err, 0, (LPSTR)&msg[chars],
|
||||
(300-chars), 0);
|
||||
WriteFile(GetStdHandle(STD_ERROR_HANDLE), msg, lstrlen(msg), &err,NULL);
|
||||
return 2;
|
||||
|
|
@ -433,7 +454,9 @@ CheckForLinkerFeature(
|
|||
return !(strstr(Out.buffer, "LNK1117") != NULL ||
|
||||
strstr(Err.buffer, "LNK1117") != NULL ||
|
||||
strstr(Out.buffer, "LNK4044") != NULL ||
|
||||
strstr(Err.buffer, "LNK4044") != NULL);
|
||||
strstr(Err.buffer, "LNK4044") != NULL ||
|
||||
strstr(Out.buffer, "LNK4224") != NULL ||
|
||||
strstr(Err.buffer, "LNK4224") != NULL);
|
||||
}
|
||||
|
||||
static DWORD WINAPI
|
||||
|
|
@ -479,9 +502,9 @@ IsIn(
|
|||
static const char *
|
||||
GetVersionFromFile(
|
||||
const char *filename,
|
||||
const char *match)
|
||||
const char *match,
|
||||
int numdots)
|
||||
{
|
||||
size_t cbBuffer = 100;
|
||||
static char szBuffer[100];
|
||||
char *szResult = NULL;
|
||||
FILE *fp = fopen(filename, "rt");
|
||||
|
|
@ -491,16 +514,17 @@ GetVersionFromFile(
|
|||
* Read data until we see our match string.
|
||||
*/
|
||||
|
||||
while (fgets(szBuffer, cbBuffer, fp) != NULL) {
|
||||
while (fgets(szBuffer, sizeof(szBuffer), fp) != NULL) {
|
||||
LPSTR p, q;
|
||||
|
||||
p = strstr(szBuffer, match);
|
||||
if (p != NULL) {
|
||||
/*
|
||||
* Skip to first digit.
|
||||
* Skip to first digit after the match.
|
||||
*/
|
||||
|
||||
while (*p && !isdigit(*p)) {
|
||||
p += strlen(match);
|
||||
while (*p && !isdigit((unsigned char)*p)) {
|
||||
++p;
|
||||
}
|
||||
|
||||
|
|
@ -509,13 +533,13 @@ GetVersionFromFile(
|
|||
*/
|
||||
|
||||
q = p;
|
||||
while (*q && (isalnum(*q) || *q == '.')) {
|
||||
while (*q && (strchr("0123456789.ab", *q)) && (((!strchr(".ab", *q)
|
||||
&& !strchr("ab", q[-1])) || --numdots))) {
|
||||
++q;
|
||||
}
|
||||
|
||||
memcpy(szBuffer, p, q - p);
|
||||
szBuffer[q-p] = 0;
|
||||
szResult = szBuffer;
|
||||
*q = 0;
|
||||
szResult = p;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -538,7 +562,7 @@ typedef struct list_item_t {
|
|||
static list_item_t *
|
||||
list_insert(list_item_t **listPtrPtr, const char *key, const char *value)
|
||||
{
|
||||
list_item_t *itemPtr = malloc(sizeof(list_item_t));
|
||||
list_item_t *itemPtr = (list_item_t *)malloc(sizeof(list_item_t));
|
||||
if (itemPtr) {
|
||||
itemPtr->key = strdup(key);
|
||||
itemPtr->value = strdup(value);
|
||||
|
|
@ -587,9 +611,7 @@ SubstituteFile(
|
|||
const char *substitutions,
|
||||
const char *filename)
|
||||
{
|
||||
size_t cbBuffer = 1024;
|
||||
static char szBuffer[1024], szCopy[1024];
|
||||
char *szResult = NULL;
|
||||
list_item_t *substPtr = NULL;
|
||||
FILE *fp, *sp;
|
||||
|
||||
|
|
@ -602,7 +624,7 @@ SubstituteFile(
|
|||
|
||||
sp = fopen(substitutions, "rt");
|
||||
if (sp != NULL) {
|
||||
while (fgets(szBuffer, cbBuffer, sp) != NULL) {
|
||||
while (fgets(szBuffer, sizeof(szBuffer), sp) != NULL) {
|
||||
unsigned char *ks, *ke, *vs, *ve;
|
||||
ks = (unsigned char*)szBuffer;
|
||||
while (ks && *ks && isspace(*ks)) ++ks;
|
||||
|
|
@ -619,7 +641,7 @@ SubstituteFile(
|
|||
}
|
||||
|
||||
/* debug: dump the list */
|
||||
#ifdef _DEBUG
|
||||
#ifndef NDEBUG
|
||||
{
|
||||
int n = 0;
|
||||
list_item_t *p = NULL;
|
||||
|
|
@ -628,12 +650,12 @@ SubstituteFile(
|
|||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* Run the substitutions over each line of the input
|
||||
*/
|
||||
|
||||
while (fgets(szBuffer, cbBuffer, fp) != NULL) {
|
||||
|
||||
while (fgets(szBuffer, sizeof(szBuffer), fp) != NULL) {
|
||||
list_item_t *p = NULL;
|
||||
for (p = substPtr; p != NULL; p = p->nextPtr) {
|
||||
char *m = strstr(szBuffer, p->key);
|
||||
|
|
@ -650,15 +672,26 @@ SubstituteFile(
|
|||
memcpy(szBuffer, szCopy, sizeof(szCopy));
|
||||
}
|
||||
}
|
||||
printf(szBuffer);
|
||||
printf("%s", szBuffer);
|
||||
}
|
||||
|
||||
|
||||
list_free(&substPtr);
|
||||
}
|
||||
fclose(fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
BOOL FileExists(LPCTSTR szPath)
|
||||
{
|
||||
#ifndef INVALID_FILE_ATTRIBUTES
|
||||
#define INVALID_FILE_ATTRIBUTES ((DWORD)-1)
|
||||
#endif
|
||||
DWORD pathAttr = GetFileAttributes(szPath);
|
||||
return (pathAttr != INVALID_FILE_ATTRIBUTES &&
|
||||
!(pathAttr & FILE_ATTRIBUTE_DIRECTORY));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* QualifyPath --
|
||||
*
|
||||
|
|
@ -672,17 +705,105 @@ QualifyPath(
|
|||
const char *szPath)
|
||||
{
|
||||
char szCwd[MAX_PATH + 1];
|
||||
char szTmp[MAX_PATH + 1];
|
||||
char *p;
|
||||
GetCurrentDirectory(MAX_PATH, szCwd);
|
||||
while ((p = strchr(szPath, '/')) && *p)
|
||||
*p = '\\';
|
||||
PathCombine(szTmp, szCwd, szPath);
|
||||
PathCanonicalize(szCwd, szTmp);
|
||||
|
||||
GetFullPathName(szPath, sizeof(szCwd)-1, szCwd, NULL);
|
||||
printf("%s\n", szCwd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Implements LocateDependency for a single directory. See that command
|
||||
* for an explanation.
|
||||
* Returns 0 if found after printing the directory.
|
||||
* Returns 1 if not found but no errors.
|
||||
* Returns 2 on any kind of error
|
||||
* Basically, these are used as exit codes for the process.
|
||||
*/
|
||||
static int LocateDependencyHelper(const char *dir, const char *keypath)
|
||||
{
|
||||
HANDLE hSearch;
|
||||
char path[MAX_PATH+1];
|
||||
size_t dirlen;
|
||||
int keylen, ret;
|
||||
WIN32_FIND_DATA finfo;
|
||||
|
||||
if (dir == NULL || keypath == NULL)
|
||||
return 2; /* Have no real error reporting mechanism into nmake */
|
||||
dirlen = strlen(dir);
|
||||
if ((dirlen + 3) > sizeof(path))
|
||||
return 2;
|
||||
strncpy(path, dir, dirlen);
|
||||
strncpy(path+dirlen, "\\*", 3); /* Including terminating \0 */
|
||||
keylen = strlen(keypath);
|
||||
|
||||
#if 0 /* This function is not available in Visual C++ 6 */
|
||||
/*
|
||||
* Use numerics 0 -> FindExInfoStandard,
|
||||
* 1 -> FindExSearchLimitToDirectories,
|
||||
* as these are not defined in Visual C++ 6
|
||||
*/
|
||||
hSearch = FindFirstFileEx(path, 0, &finfo, 1, NULL, 0);
|
||||
#else
|
||||
hSearch = FindFirstFile(path, &finfo);
|
||||
#endif
|
||||
if (hSearch == INVALID_HANDLE_VALUE)
|
||||
return 1; /* Not found */
|
||||
|
||||
/* Loop through all subdirs checking if the keypath is under there */
|
||||
ret = 1; /* Assume not found */
|
||||
do {
|
||||
int sublen;
|
||||
/*
|
||||
* We need to check it is a directory despite the
|
||||
* FindExSearchLimitToDirectories in the above call. See SDK docs
|
||||
*/
|
||||
if ((finfo.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0)
|
||||
continue;
|
||||
sublen = strlen(finfo.cFileName);
|
||||
if ((dirlen+1+sublen+1+keylen+1) > sizeof(path))
|
||||
continue; /* Path does not fit, assume not matched */
|
||||
strncpy(path+dirlen+1, finfo.cFileName, sublen);
|
||||
path[dirlen+1+sublen] = '\\';
|
||||
strncpy(path+dirlen+1+sublen+1, keypath, keylen+1);
|
||||
if (FileExists(path)) {
|
||||
/* Found a match, print to stdout */
|
||||
path[dirlen+1+sublen] = '\0';
|
||||
QualifyPath(path);
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
} while (FindNextFile(hSearch, &finfo));
|
||||
FindClose(hSearch);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* LocateDependency --
|
||||
*
|
||||
* Locates a dependency for a package.
|
||||
* keypath - a relative path within the package directory
|
||||
* that is used to confirm it is the correct directory.
|
||||
* The search path for the package directory is currently only
|
||||
* the parent and grandparent of the current working directory.
|
||||
* If found, the command prints
|
||||
* name_DIRPATH=<full path of located directory>
|
||||
* and returns 0. If not found, does not print anything and returns 1.
|
||||
*/
|
||||
static int LocateDependency(const char *keypath)
|
||||
{
|
||||
size_t i;
|
||||
int ret;
|
||||
static const char *paths[] = {"..", "..\\..", "..\\..\\.."};
|
||||
|
||||
for (i = 0; i < (sizeof(paths)/sizeof(paths[0])); ++i) {
|
||||
ret = LocateDependencyHelper(paths[i], keypath);
|
||||
if (ret == 0)
|
||||
return ret;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Local variables:
|
||||
* mode: c
|
||||
347
compile
347
compile
|
|
@ -1,347 +0,0 @@
|
|||
#! /bin/sh
|
||||
# Wrapper for compilers which do not understand '-c -o'.
|
||||
|
||||
scriptversion=2012-10-14.11; # UTC
|
||||
|
||||
# Copyright (C) 1999-2014 Free Software Foundation, Inc.
|
||||
# Written by Tom Tromey <tromey@cygnus.com>.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
# This file is maintained in Automake, please report
|
||||
# bugs to <bug-automake@gnu.org> or send patches to
|
||||
# <automake-patches@gnu.org>.
|
||||
|
||||
nl='
|
||||
'
|
||||
|
||||
# We need space, tab and new line, in precisely that order. Quoting is
|
||||
# there to prevent tools from complaining about whitespace usage.
|
||||
IFS=" "" $nl"
|
||||
|
||||
file_conv=
|
||||
|
||||
# func_file_conv build_file lazy
|
||||
# Convert a $build file to $host form and store it in $file
|
||||
# Currently only supports Windows hosts. If the determined conversion
|
||||
# type is listed in (the comma separated) LAZY, no conversion will
|
||||
# take place.
|
||||
func_file_conv ()
|
||||
{
|
||||
file=$1
|
||||
case $file in
|
||||
/ | /[!/]*) # absolute file, and not a UNC file
|
||||
if test -z "$file_conv"; then
|
||||
# lazily determine how to convert abs files
|
||||
case `uname -s` in
|
||||
MINGW*)
|
||||
file_conv=mingw
|
||||
;;
|
||||
CYGWIN*)
|
||||
file_conv=cygwin
|
||||
;;
|
||||
*)
|
||||
file_conv=wine
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
case $file_conv/,$2, in
|
||||
*,$file_conv,*)
|
||||
;;
|
||||
mingw/*)
|
||||
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
|
||||
;;
|
||||
cygwin/*)
|
||||
file=`cygpath -m "$file" || echo "$file"`
|
||||
;;
|
||||
wine/*)
|
||||
file=`winepath -w "$file" || echo "$file"`
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# func_cl_dashL linkdir
|
||||
# Make cl look for libraries in LINKDIR
|
||||
func_cl_dashL ()
|
||||
{
|
||||
func_file_conv "$1"
|
||||
if test -z "$lib_path"; then
|
||||
lib_path=$file
|
||||
else
|
||||
lib_path="$lib_path;$file"
|
||||
fi
|
||||
linker_opts="$linker_opts -LIBPATH:$file"
|
||||
}
|
||||
|
||||
# func_cl_dashl library
|
||||
# Do a library search-path lookup for cl
|
||||
func_cl_dashl ()
|
||||
{
|
||||
lib=$1
|
||||
found=no
|
||||
save_IFS=$IFS
|
||||
IFS=';'
|
||||
for dir in $lib_path $LIB
|
||||
do
|
||||
IFS=$save_IFS
|
||||
if $shared && test -f "$dir/$lib.dll.lib"; then
|
||||
found=yes
|
||||
lib=$dir/$lib.dll.lib
|
||||
break
|
||||
fi
|
||||
if test -f "$dir/$lib.lib"; then
|
||||
found=yes
|
||||
lib=$dir/$lib.lib
|
||||
break
|
||||
fi
|
||||
if test -f "$dir/lib$lib.a"; then
|
||||
found=yes
|
||||
lib=$dir/lib$lib.a
|
||||
break
|
||||
fi
|
||||
done
|
||||
IFS=$save_IFS
|
||||
|
||||
if test "$found" != yes; then
|
||||
lib=$lib.lib
|
||||
fi
|
||||
}
|
||||
|
||||
# func_cl_wrapper cl arg...
|
||||
# Adjust compile command to suit cl
|
||||
func_cl_wrapper ()
|
||||
{
|
||||
# Assume a capable shell
|
||||
lib_path=
|
||||
shared=:
|
||||
linker_opts=
|
||||
for arg
|
||||
do
|
||||
if test -n "$eat"; then
|
||||
eat=
|
||||
else
|
||||
case $1 in
|
||||
-o)
|
||||
# configure might choose to run compile as 'compile cc -o foo foo.c'.
|
||||
eat=1
|
||||
case $2 in
|
||||
*.o | *.[oO][bB][jJ])
|
||||
func_file_conv "$2"
|
||||
set x "$@" -Fo"$file"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
func_file_conv "$2"
|
||||
set x "$@" -Fe"$file"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
-I)
|
||||
eat=1
|
||||
func_file_conv "$2" mingw
|
||||
set x "$@" -I"$file"
|
||||
shift
|
||||
;;
|
||||
-I*)
|
||||
func_file_conv "${1#-I}" mingw
|
||||
set x "$@" -I"$file"
|
||||
shift
|
||||
;;
|
||||
-l)
|
||||
eat=1
|
||||
func_cl_dashl "$2"
|
||||
set x "$@" "$lib"
|
||||
shift
|
||||
;;
|
||||
-l*)
|
||||
func_cl_dashl "${1#-l}"
|
||||
set x "$@" "$lib"
|
||||
shift
|
||||
;;
|
||||
-L)
|
||||
eat=1
|
||||
func_cl_dashL "$2"
|
||||
;;
|
||||
-L*)
|
||||
func_cl_dashL "${1#-L}"
|
||||
;;
|
||||
-static)
|
||||
shared=false
|
||||
;;
|
||||
-Wl,*)
|
||||
arg=${1#-Wl,}
|
||||
save_ifs="$IFS"; IFS=','
|
||||
for flag in $arg; do
|
||||
IFS="$save_ifs"
|
||||
linker_opts="$linker_opts $flag"
|
||||
done
|
||||
IFS="$save_ifs"
|
||||
;;
|
||||
-Xlinker)
|
||||
eat=1
|
||||
linker_opts="$linker_opts $2"
|
||||
;;
|
||||
-*)
|
||||
set x "$@" "$1"
|
||||
shift
|
||||
;;
|
||||
*.cc | *.CC | *.cxx | *.CXX | *.[cC]++)
|
||||
func_file_conv "$1"
|
||||
set x "$@" -Tp"$file"
|
||||
shift
|
||||
;;
|
||||
*.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO])
|
||||
func_file_conv "$1" mingw
|
||||
set x "$@" "$file"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set x "$@" "$1"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
shift
|
||||
done
|
||||
if test -n "$linker_opts"; then
|
||||
linker_opts="-link$linker_opts"
|
||||
fi
|
||||
exec "$@" $linker_opts
|
||||
exit 1
|
||||
}
|
||||
|
||||
eat=
|
||||
|
||||
case $1 in
|
||||
'')
|
||||
echo "$0: No command. Try '$0 --help' for more information." 1>&2
|
||||
exit 1;
|
||||
;;
|
||||
-h | --h*)
|
||||
cat <<\EOF
|
||||
Usage: compile [--help] [--version] PROGRAM [ARGS]
|
||||
|
||||
Wrapper for compilers which do not understand '-c -o'.
|
||||
Remove '-o dest.o' from ARGS, run PROGRAM with the remaining
|
||||
arguments, and rename the output as expected.
|
||||
|
||||
If you are trying to build a whole package this is not the
|
||||
right script to run: please start by reading the file 'INSTALL'.
|
||||
|
||||
Report bugs to <bug-automake@gnu.org>.
|
||||
EOF
|
||||
exit $?
|
||||
;;
|
||||
-v | --v*)
|
||||
echo "compile $scriptversion"
|
||||
exit $?
|
||||
;;
|
||||
cl | *[/\\]cl | cl.exe | *[/\\]cl.exe )
|
||||
func_cl_wrapper "$@" # Doesn't return...
|
||||
;;
|
||||
esac
|
||||
|
||||
ofile=
|
||||
cfile=
|
||||
|
||||
for arg
|
||||
do
|
||||
if test -n "$eat"; then
|
||||
eat=
|
||||
else
|
||||
case $1 in
|
||||
-o)
|
||||
# configure might choose to run compile as 'compile cc -o foo foo.c'.
|
||||
# So we strip '-o arg' only if arg is an object.
|
||||
eat=1
|
||||
case $2 in
|
||||
*.o | *.obj)
|
||||
ofile=$2
|
||||
;;
|
||||
*)
|
||||
set x "$@" -o "$2"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
*.c)
|
||||
cfile=$1
|
||||
set x "$@" "$1"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set x "$@" "$1"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
shift
|
||||
done
|
||||
|
||||
if test -z "$ofile" || test -z "$cfile"; then
|
||||
# If no '-o' option was seen then we might have been invoked from a
|
||||
# pattern rule where we don't need one. That is ok -- this is a
|
||||
# normal compilation that the losing compiler can handle. If no
|
||||
# '.c' file was seen then we are probably linking. That is also
|
||||
# ok.
|
||||
exec "$@"
|
||||
fi
|
||||
|
||||
# Name of file we expect compiler to create.
|
||||
cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
|
||||
|
||||
# Create the lock directory.
|
||||
# Note: use '[/\\:.-]' here to ensure that we don't use the same name
|
||||
# that we are using for the .o file. Also, base the name on the expected
|
||||
# object file name, since that is what matters with a parallel build.
|
||||
lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
|
||||
while true; do
|
||||
if mkdir "$lockdir" >/dev/null 2>&1; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
# FIXME: race condition here if user kills between mkdir and trap.
|
||||
trap "rmdir '$lockdir'; exit 1" 1 2 15
|
||||
|
||||
# Run the compile.
|
||||
"$@"
|
||||
ret=$?
|
||||
|
||||
if test -f "$cofile"; then
|
||||
test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
|
||||
elif test -f "${cofile}bj"; then
|
||||
test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
|
||||
fi
|
||||
|
||||
rmdir "$lockdir"
|
||||
exit $ret
|
||||
|
||||
# Local Variables:
|
||||
# mode: shell-script
|
||||
# sh-indentation: 2
|
||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
||||
353
config.guess
vendored
Executable file → Normal file
353
config.guess
vendored
Executable file → Normal file
|
|
@ -1,8 +1,8 @@
|
|||
#! /bin/sh
|
||||
# Attempt to guess a canonical system name.
|
||||
# Copyright 1992-2018 Free Software Foundation, Inc.
|
||||
# Copyright 1992-2019 Free Software Foundation, Inc.
|
||||
|
||||
timestamp='2018-02-24'
|
||||
timestamp='2019-05-28'
|
||||
|
||||
# This file is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by
|
||||
|
|
@ -50,7 +50,7 @@ version="\
|
|||
GNU config.guess ($timestamp)
|
||||
|
||||
Originally written by Per Bothner.
|
||||
Copyright 1992-2018 Free Software Foundation, Inc.
|
||||
Copyright 1992-2019 Free Software Foundation, Inc.
|
||||
|
||||
This is free software; see the source for copying conditions. There is NO
|
||||
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
|
||||
|
|
@ -84,8 +84,6 @@ if test $# != 0; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
trap 'exit 1' 1 2 15
|
||||
|
||||
# CC_FOR_BUILD -- compiler used by this script. Note that the use of a
|
||||
# compiler to aid in system detection is discouraged as it requires
|
||||
# temporary files to be created and, as you can see below, it is a
|
||||
|
|
@ -96,34 +94,38 @@ trap 'exit 1' 1 2 15
|
|||
|
||||
# Portable tmp directory creation inspired by the Autoconf team.
|
||||
|
||||
set_cc_for_build='
|
||||
trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
|
||||
trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
|
||||
: ${TMPDIR=/tmp} ;
|
||||
{ tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
|
||||
{ test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
|
||||
{ tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } ||
|
||||
{ echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
|
||||
dummy=$tmp/dummy ;
|
||||
tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ;
|
||||
case $CC_FOR_BUILD,$HOST_CC,$CC in
|
||||
,,) echo "int x;" > "$dummy.c" ;
|
||||
for c in cc gcc c89 c99 ; do
|
||||
if ($c -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then
|
||||
CC_FOR_BUILD="$c"; break ;
|
||||
fi ;
|
||||
done ;
|
||||
if test x"$CC_FOR_BUILD" = x ; then
|
||||
CC_FOR_BUILD=no_compiler_found ;
|
||||
fi
|
||||
;;
|
||||
,,*) CC_FOR_BUILD=$CC ;;
|
||||
,*,*) CC_FOR_BUILD=$HOST_CC ;;
|
||||
esac ; set_cc_for_build= ;'
|
||||
tmp=
|
||||
# shellcheck disable=SC2172
|
||||
trap 'test -z "$tmp" || rm -fr "$tmp"' 0 1 2 13 15
|
||||
|
||||
set_cc_for_build() {
|
||||
: "${TMPDIR=/tmp}"
|
||||
# shellcheck disable=SC2039
|
||||
{ tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
|
||||
{ test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir "$tmp" 2>/dev/null) ; } ||
|
||||
{ tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir "$tmp" 2>/dev/null) && echo "Warning: creating insecure temp directory" >&2 ; } ||
|
||||
{ echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; }
|
||||
dummy=$tmp/dummy
|
||||
case ${CC_FOR_BUILD-},${HOST_CC-},${CC-} in
|
||||
,,) echo "int x;" > "$dummy.c"
|
||||
for driver in cc gcc c89 c99 ; do
|
||||
if ($driver -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then
|
||||
CC_FOR_BUILD="$driver"
|
||||
break
|
||||
fi
|
||||
done
|
||||
if test x"$CC_FOR_BUILD" = x ; then
|
||||
CC_FOR_BUILD=no_compiler_found
|
||||
fi
|
||||
;;
|
||||
,,*) CC_FOR_BUILD=$CC ;;
|
||||
,*,*) CC_FOR_BUILD=$HOST_CC ;;
|
||||
esac
|
||||
}
|
||||
|
||||
# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
|
||||
# (ghazi@noc.rutgers.edu 1994-08-24)
|
||||
if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
|
||||
if test -f /.attbin/uname ; then
|
||||
PATH=$PATH:/.attbin ; export PATH
|
||||
fi
|
||||
|
||||
|
|
@ -138,7 +140,7 @@ Linux|GNU|GNU/*)
|
|||
# We could probably try harder.
|
||||
LIBC=gnu
|
||||
|
||||
eval "$set_cc_for_build"
|
||||
set_cc_for_build
|
||||
cat <<-EOF > "$dummy.c"
|
||||
#include <features.h>
|
||||
#if defined(__UCLIBC__)
|
||||
|
|
@ -199,7 +201,7 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
|
|||
os=netbsdelf
|
||||
;;
|
||||
arm*|i386|m68k|ns32k|sh3*|sparc|vax)
|
||||
eval "$set_cc_for_build"
|
||||
set_cc_for_build
|
||||
if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
|
||||
| grep -q __ELF__
|
||||
then
|
||||
|
|
@ -237,7 +239,7 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
|
|||
# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
|
||||
# contains redundant information, the shorter form:
|
||||
# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
|
||||
echo "$machine-${os}${release}${abi}"
|
||||
echo "$machine-${os}${release}${abi-}"
|
||||
exit ;;
|
||||
*:Bitrig:*:*)
|
||||
UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
|
||||
|
|
@ -389,7 +391,7 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
|
|||
echo i386-pc-auroraux"$UNAME_RELEASE"
|
||||
exit ;;
|
||||
i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
|
||||
eval "$set_cc_for_build"
|
||||
set_cc_for_build
|
||||
SUN_ARCH=i386
|
||||
# If there is a compiler, see if it is configured for 64-bit objects.
|
||||
# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
|
||||
|
|
@ -482,7 +484,7 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
|
|||
echo clipper-intergraph-clix"$UNAME_RELEASE"
|
||||
exit ;;
|
||||
mips:*:*:UMIPS | mips:*:*:RISCos)
|
||||
eval "$set_cc_for_build"
|
||||
set_cc_for_build
|
||||
sed 's/^ //' << EOF > "$dummy.c"
|
||||
#ifdef __cplusplus
|
||||
#include <stdio.h> /* for printf() prototype */
|
||||
|
|
@ -579,7 +581,7 @@ EOF
|
|||
exit ;;
|
||||
*:AIX:2:3)
|
||||
if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
|
||||
eval "$set_cc_for_build"
|
||||
set_cc_for_build
|
||||
sed 's/^ //' << EOF > "$dummy.c"
|
||||
#include <sys/systemcfg.h>
|
||||
|
||||
|
|
@ -660,7 +662,7 @@ EOF
|
|||
esac
|
||||
fi
|
||||
if [ "$HP_ARCH" = "" ]; then
|
||||
eval "$set_cc_for_build"
|
||||
set_cc_for_build
|
||||
sed 's/^ //' << EOF > "$dummy.c"
|
||||
|
||||
#define _HPUX_SOURCE
|
||||
|
|
@ -700,7 +702,7 @@ EOF
|
|||
esac
|
||||
if [ "$HP_ARCH" = hppa2.0w ]
|
||||
then
|
||||
eval "$set_cc_for_build"
|
||||
set_cc_for_build
|
||||
|
||||
# hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
|
||||
# 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler
|
||||
|
|
@ -726,7 +728,7 @@ EOF
|
|||
echo ia64-hp-hpux"$HPUX_REV"
|
||||
exit ;;
|
||||
3050*:HI-UX:*:*)
|
||||
eval "$set_cc_for_build"
|
||||
set_cc_for_build
|
||||
sed 's/^ //' << EOF > "$dummy.c"
|
||||
#include <unistd.h>
|
||||
int
|
||||
|
|
@ -840,6 +842,17 @@ EOF
|
|||
*:BSD/OS:*:*)
|
||||
echo "$UNAME_MACHINE"-unknown-bsdi"$UNAME_RELEASE"
|
||||
exit ;;
|
||||
arm:FreeBSD:*:*)
|
||||
UNAME_PROCESSOR=`uname -p`
|
||||
set_cc_for_build
|
||||
if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
|
||||
| grep -q __ARM_PCS_VFP
|
||||
then
|
||||
echo "${UNAME_PROCESSOR}"-unknown-freebsd"`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`"-gnueabi
|
||||
else
|
||||
echo "${UNAME_PROCESSOR}"-unknown-freebsd"`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`"-gnueabihf
|
||||
fi
|
||||
exit ;;
|
||||
*:FreeBSD:*:*)
|
||||
UNAME_PROCESSOR=`/usr/bin/uname -p`
|
||||
case "$UNAME_PROCESSOR" in
|
||||
|
|
@ -881,7 +894,7 @@ EOF
|
|||
echo "$UNAME_MACHINE"-pc-uwin
|
||||
exit ;;
|
||||
amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
|
||||
echo x86_64-unknown-cygwin
|
||||
echo x86_64-pc-cygwin
|
||||
exit ;;
|
||||
prep*:SunOS:5.*:*)
|
||||
echo powerpcle-unknown-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`"
|
||||
|
|
@ -894,8 +907,8 @@ EOF
|
|||
# other systems with GNU libc and userland
|
||||
echo "$UNAME_MACHINE-unknown-`echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`-$LIBC"
|
||||
exit ;;
|
||||
i*86:Minix:*:*)
|
||||
echo "$UNAME_MACHINE"-pc-minix
|
||||
*:Minix:*:*)
|
||||
echo "$UNAME_MACHINE"-unknown-minix
|
||||
exit ;;
|
||||
aarch64:Linux:*:*)
|
||||
echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
|
||||
|
|
@ -922,7 +935,7 @@ EOF
|
|||
echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
|
||||
exit ;;
|
||||
arm*:Linux:*:*)
|
||||
eval "$set_cc_for_build"
|
||||
set_cc_for_build
|
||||
if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
|
||||
| grep -q __ARM_EABI__
|
||||
then
|
||||
|
|
@ -971,23 +984,51 @@ EOF
|
|||
echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
|
||||
exit ;;
|
||||
mips:Linux:*:* | mips64:Linux:*:*)
|
||||
eval "$set_cc_for_build"
|
||||
set_cc_for_build
|
||||
IS_GLIBC=0
|
||||
test x"${LIBC}" = xgnu && IS_GLIBC=1
|
||||
sed 's/^ //' << EOF > "$dummy.c"
|
||||
#undef CPU
|
||||
#undef ${UNAME_MACHINE}
|
||||
#undef ${UNAME_MACHINE}el
|
||||
#undef mips
|
||||
#undef mipsel
|
||||
#undef mips64
|
||||
#undef mips64el
|
||||
#if ${IS_GLIBC} && defined(_ABI64)
|
||||
LIBCABI=gnuabi64
|
||||
#else
|
||||
#if ${IS_GLIBC} && defined(_ABIN32)
|
||||
LIBCABI=gnuabin32
|
||||
#else
|
||||
LIBCABI=${LIBC}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if ${IS_GLIBC} && defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6
|
||||
CPU=mipsisa64r6
|
||||
#else
|
||||
#if ${IS_GLIBC} && !defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6
|
||||
CPU=mipsisa32r6
|
||||
#else
|
||||
#if defined(__mips64)
|
||||
CPU=mips64
|
||||
#else
|
||||
CPU=mips
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
|
||||
CPU=${UNAME_MACHINE}el
|
||||
MIPS_ENDIAN=el
|
||||
#else
|
||||
#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
|
||||
CPU=${UNAME_MACHINE}
|
||||
MIPS_ENDIAN=
|
||||
#else
|
||||
CPU=
|
||||
MIPS_ENDIAN=
|
||||
#endif
|
||||
#endif
|
||||
EOF
|
||||
eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU'`"
|
||||
test "x$CPU" != x && { echo "$CPU-unknown-linux-$LIBC"; exit; }
|
||||
eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU\|^MIPS_ENDIAN\|^LIBCABI'`"
|
||||
test "x$CPU" != x && { echo "$CPU${MIPS_ENDIAN}-unknown-linux-$LIBCABI"; exit; }
|
||||
;;
|
||||
mips64el:Linux:*:*)
|
||||
echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
|
||||
|
|
@ -1046,11 +1087,7 @@ EOF
|
|||
echo "$UNAME_MACHINE"-dec-linux-"$LIBC"
|
||||
exit ;;
|
||||
x86_64:Linux:*:*)
|
||||
if objdump -f /bin/sh | grep -q elf32-x86-64; then
|
||||
echo "$UNAME_MACHINE"-pc-linux-"$LIBC"x32
|
||||
else
|
||||
echo "$UNAME_MACHINE"-pc-linux-"$LIBC"
|
||||
fi
|
||||
echo "$UNAME_MACHINE"-pc-linux-"$LIBC"
|
||||
exit ;;
|
||||
xtensa*:Linux:*:*)
|
||||
echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
|
||||
|
|
@ -1104,7 +1141,7 @@ EOF
|
|||
*Pentium) UNAME_MACHINE=i586 ;;
|
||||
*Pent*|*Celeron) UNAME_MACHINE=i686 ;;
|
||||
esac
|
||||
echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}{$UNAME_VERSION}"
|
||||
echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}"
|
||||
exit ;;
|
||||
i*86:*:3.2:*)
|
||||
if test -f /usr/options/cb.name; then
|
||||
|
|
@ -1288,38 +1325,39 @@ EOF
|
|||
echo "$UNAME_MACHINE"-apple-rhapsody"$UNAME_RELEASE"
|
||||
exit ;;
|
||||
*:Darwin:*:*)
|
||||
UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
|
||||
eval "$set_cc_for_build"
|
||||
if test "$UNAME_PROCESSOR" = unknown ; then
|
||||
UNAME_PROCESSOR=powerpc
|
||||
UNAME_PROCESSOR=`uname -p`
|
||||
case $UNAME_PROCESSOR in
|
||||
unknown) UNAME_PROCESSOR=powerpc ;;
|
||||
esac
|
||||
if command -v xcode-select > /dev/null 2> /dev/null && \
|
||||
! xcode-select --print-path > /dev/null 2> /dev/null ; then
|
||||
# Avoid executing cc if there is no toolchain installed as
|
||||
# cc will be a stub that puts up a graphical alert
|
||||
# prompting the user to install developer tools.
|
||||
CC_FOR_BUILD=no_compiler_found
|
||||
else
|
||||
set_cc_for_build
|
||||
fi
|
||||
if test "`echo "$UNAME_RELEASE" | sed -e 's/\..*//'`" -le 10 ; then
|
||||
if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
|
||||
if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
|
||||
(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
|
||||
grep IS_64BIT_ARCH >/dev/null
|
||||
then
|
||||
case $UNAME_PROCESSOR in
|
||||
i386) UNAME_PROCESSOR=x86_64 ;;
|
||||
powerpc) UNAME_PROCESSOR=powerpc64 ;;
|
||||
esac
|
||||
fi
|
||||
# On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc
|
||||
if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \
|
||||
(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
|
||||
grep IS_PPC >/dev/null
|
||||
then
|
||||
UNAME_PROCESSOR=powerpc
|
||||
fi
|
||||
if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
|
||||
if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
|
||||
(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
|
||||
grep IS_64BIT_ARCH >/dev/null
|
||||
then
|
||||
case $UNAME_PROCESSOR in
|
||||
i386) UNAME_PROCESSOR=x86_64 ;;
|
||||
powerpc) UNAME_PROCESSOR=powerpc64 ;;
|
||||
esac
|
||||
fi
|
||||
# On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc
|
||||
if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \
|
||||
(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
|
||||
grep IS_PPC >/dev/null
|
||||
then
|
||||
UNAME_PROCESSOR=powerpc
|
||||
fi
|
||||
elif test "$UNAME_PROCESSOR" = i386 ; then
|
||||
# Avoid executing cc on OS X 10.9, as it ships with a stub
|
||||
# that puts up a graphical alert prompting to install
|
||||
# developer tools. Any system running Mac OS X 10.7 or
|
||||
# later (Darwin 11 and later) is required to have a 64-bit
|
||||
# processor. This is not true of the ARM version of Darwin
|
||||
# that Apple uses in portable devices.
|
||||
UNAME_PROCESSOR=x86_64
|
||||
# uname -m returns i386 or x86_64
|
||||
UNAME_PROCESSOR=$UNAME_MACHINE
|
||||
fi
|
||||
echo "$UNAME_PROCESSOR"-apple-darwin"$UNAME_RELEASE"
|
||||
exit ;;
|
||||
|
|
@ -1362,6 +1400,7 @@ EOF
|
|||
# "uname -m" is not consistent, so use $cputype instead. 386
|
||||
# is converted to i386 for consistency with other x86
|
||||
# operating systems.
|
||||
# shellcheck disable=SC2154
|
||||
if test "$cputype" = 386; then
|
||||
UNAME_MACHINE=i386
|
||||
else
|
||||
|
|
@ -1418,8 +1457,148 @@ EOF
|
|||
amd64:Isilon\ OneFS:*:*)
|
||||
echo x86_64-unknown-onefs
|
||||
exit ;;
|
||||
*:Unleashed:*:*)
|
||||
echo "$UNAME_MACHINE"-unknown-unleashed"$UNAME_RELEASE"
|
||||
exit ;;
|
||||
esac
|
||||
|
||||
# No uname command or uname output not recognized.
|
||||
set_cc_for_build
|
||||
cat > "$dummy.c" <<EOF
|
||||
#ifdef _SEQUENT_
|
||||
#include <sys/types.h>
|
||||
#include <sys/utsname.h>
|
||||
#endif
|
||||
#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__)
|
||||
#if defined (vax) || defined (__vax) || defined (__vax__) || defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__)
|
||||
#include <signal.h>
|
||||
#if defined(_SIZE_T_) || defined(SIGLOST)
|
||||
#include <sys/utsname.h>
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
main ()
|
||||
{
|
||||
#if defined (sony)
|
||||
#if defined (MIPSEB)
|
||||
/* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed,
|
||||
I don't know.... */
|
||||
printf ("mips-sony-bsd\n"); exit (0);
|
||||
#else
|
||||
#include <sys/param.h>
|
||||
printf ("m68k-sony-newsos%s\n",
|
||||
#ifdef NEWSOS4
|
||||
"4"
|
||||
#else
|
||||
""
|
||||
#endif
|
||||
); exit (0);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined (NeXT)
|
||||
#if !defined (__ARCHITECTURE__)
|
||||
#define __ARCHITECTURE__ "m68k"
|
||||
#endif
|
||||
int version;
|
||||
version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
|
||||
if (version < 4)
|
||||
printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
|
||||
else
|
||||
printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
|
||||
exit (0);
|
||||
#endif
|
||||
|
||||
#if defined (MULTIMAX) || defined (n16)
|
||||
#if defined (UMAXV)
|
||||
printf ("ns32k-encore-sysv\n"); exit (0);
|
||||
#else
|
||||
#if defined (CMU)
|
||||
printf ("ns32k-encore-mach\n"); exit (0);
|
||||
#else
|
||||
printf ("ns32k-encore-bsd\n"); exit (0);
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined (__386BSD__)
|
||||
printf ("i386-pc-bsd\n"); exit (0);
|
||||
#endif
|
||||
|
||||
#if defined (sequent)
|
||||
#if defined (i386)
|
||||
printf ("i386-sequent-dynix\n"); exit (0);
|
||||
#endif
|
||||
#if defined (ns32000)
|
||||
printf ("ns32k-sequent-dynix\n"); exit (0);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined (_SEQUENT_)
|
||||
struct utsname un;
|
||||
|
||||
uname(&un);
|
||||
if (strncmp(un.version, "V2", 2) == 0) {
|
||||
printf ("i386-sequent-ptx2\n"); exit (0);
|
||||
}
|
||||
if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
|
||||
printf ("i386-sequent-ptx1\n"); exit (0);
|
||||
}
|
||||
printf ("i386-sequent-ptx\n"); exit (0);
|
||||
#endif
|
||||
|
||||
#if defined (vax)
|
||||
#if !defined (ultrix)
|
||||
#include <sys/param.h>
|
||||
#if defined (BSD)
|
||||
#if BSD == 43
|
||||
printf ("vax-dec-bsd4.3\n"); exit (0);
|
||||
#else
|
||||
#if BSD == 199006
|
||||
printf ("vax-dec-bsd4.3reno\n"); exit (0);
|
||||
#else
|
||||
printf ("vax-dec-bsd\n"); exit (0);
|
||||
#endif
|
||||
#endif
|
||||
#else
|
||||
printf ("vax-dec-bsd\n"); exit (0);
|
||||
#endif
|
||||
#else
|
||||
#if defined(_SIZE_T_) || defined(SIGLOST)
|
||||
struct utsname un;
|
||||
uname (&un);
|
||||
printf ("vax-dec-ultrix%s\n", un.release); exit (0);
|
||||
#else
|
||||
printf ("vax-dec-ultrix\n"); exit (0);
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__)
|
||||
#if defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__)
|
||||
#if defined(_SIZE_T_) || defined(SIGLOST)
|
||||
struct utsname *un;
|
||||
uname (&un);
|
||||
printf ("mips-dec-ultrix%s\n", un.release); exit (0);
|
||||
#else
|
||||
printf ("mips-dec-ultrix\n"); exit (0);
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined (alliant) && defined (i860)
|
||||
printf ("i860-alliant-bsd\n"); exit (0);
|
||||
#endif
|
||||
|
||||
exit (1);
|
||||
}
|
||||
EOF
|
||||
|
||||
$CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null && SYSTEM_NAME=`$dummy` &&
|
||||
{ echo "$SYSTEM_NAME"; exit; }
|
||||
|
||||
# Apollos put the system type in the environment.
|
||||
test -d /usr/apollo && { echo "$ISP-apollo-$SYSTYPE"; exit; }
|
||||
|
||||
echo "$0: unable to guess system type" >&2
|
||||
|
||||
case "$UNAME_MACHINE:$UNAME_SYSTEM" in
|
||||
|
|
@ -1473,7 +1652,7 @@ EOF
|
|||
exit 1
|
||||
|
||||
# Local variables:
|
||||
# eval: (add-hook 'write-file-functions 'time-stamp)
|
||||
# eval: (add-hook 'before-save-hook 'time-stamp)
|
||||
# time-stamp-start: "timestamp='"
|
||||
# time-stamp-format: "%:y-%02m-%02d"
|
||||
# time-stamp-end: "'"
|
||||
|
|
|
|||
131
config.h.in
Normal file
131
config.h.in
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
/* config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#undef HAVE_DLFCN_H
|
||||
|
||||
/* Define to 1 if you have the `fdatasync' function. */
|
||||
#undef HAVE_FDATASYNC
|
||||
|
||||
/* Define to 1 if you have the `gmtime_r' function. */
|
||||
#undef HAVE_GMTIME_R
|
||||
|
||||
/* Define to 1 if the system has the type `int16_t'. */
|
||||
#undef HAVE_INT16_T
|
||||
|
||||
/* Define to 1 if the system has the type `int32_t'. */
|
||||
#undef HAVE_INT32_T
|
||||
|
||||
/* Define to 1 if the system has the type `int64_t'. */
|
||||
#undef HAVE_INT64_T
|
||||
|
||||
/* Define to 1 if the system has the type `int8_t'. */
|
||||
#undef HAVE_INT8_T
|
||||
|
||||
/* Define to 1 if the system has the type `intptr_t'. */
|
||||
#undef HAVE_INTPTR_T
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#undef HAVE_INTTYPES_H
|
||||
|
||||
/* Define to 1 if you have the `isnan' function. */
|
||||
#undef HAVE_ISNAN
|
||||
|
||||
/* Define to 1 if you have the `localtime_r' function. */
|
||||
#undef HAVE_LOCALTIME_R
|
||||
|
||||
/* Define to 1 if you have the `localtime_s' function. */
|
||||
#undef HAVE_LOCALTIME_S
|
||||
|
||||
/* Define to 1 if you have the <malloc.h> header file. */
|
||||
#undef HAVE_MALLOC_H
|
||||
|
||||
/* Define to 1 if you have the `malloc_usable_size' function. */
|
||||
#undef HAVE_MALLOC_USABLE_SIZE
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#undef HAVE_MEMORY_H
|
||||
|
||||
/* Define to 1 if you have the pread() function. */
|
||||
#undef HAVE_PREAD
|
||||
|
||||
/* Define to 1 if you have the pread64() function. */
|
||||
#undef HAVE_PREAD64
|
||||
|
||||
/* Define to 1 if you have the pwrite() function. */
|
||||
#undef HAVE_PWRITE
|
||||
|
||||
/* Define to 1 if you have the pwrite64() function. */
|
||||
#undef HAVE_PWRITE64
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#undef HAVE_STDINT_H
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#undef HAVE_STDLIB_H
|
||||
|
||||
/* Define to 1 if you have the strchrnul() function */
|
||||
#undef HAVE_STRCHRNUL
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#undef HAVE_STRINGS_H
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#undef HAVE_STRING_H
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#undef HAVE_SYS_STAT_H
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#undef HAVE_SYS_TYPES_H
|
||||
|
||||
/* Define to 1 if the system has the type `uint16_t'. */
|
||||
#undef HAVE_UINT16_T
|
||||
|
||||
/* Define to 1 if the system has the type `uint32_t'. */
|
||||
#undef HAVE_UINT32_T
|
||||
|
||||
/* Define to 1 if the system has the type `uint64_t'. */
|
||||
#undef HAVE_UINT64_T
|
||||
|
||||
/* Define to 1 if the system has the type `uint8_t'. */
|
||||
#undef HAVE_UINT8_T
|
||||
|
||||
/* Define to 1 if the system has the type `uintptr_t'. */
|
||||
#undef HAVE_UINTPTR_T
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#undef HAVE_UNISTD_H
|
||||
|
||||
/* Define to 1 if you have the `usleep' function. */
|
||||
#undef HAVE_USLEEP
|
||||
|
||||
/* Define to 1 if you have the utime() library function. */
|
||||
#undef HAVE_UTIME
|
||||
|
||||
/* Define to the sub-directory in which libtool stores uninstalled libraries.
|
||||
*/
|
||||
#undef LT_OBJDIR
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#undef PACKAGE_BUGREPORT
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#undef PACKAGE_NAME
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#undef PACKAGE_STRING
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#undef PACKAGE_TARNAME
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#undef PACKAGE_VERSION
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#undef STDC_HEADERS
|
||||
|
||||
/* Number of bits in a file offset, on hosts where this is settable. */
|
||||
#undef _FILE_OFFSET_BITS
|
||||
|
||||
/* Define for large files, on AIX-style hosts. */
|
||||
#undef _LARGE_FILES
|
||||
2719
config.sub
vendored
Executable file → Normal file
2719
config.sub
vendored
Executable file → Normal file
File diff suppressed because it is too large
Load diff
1004
configure.ac
1004
configure.ac
File diff suppressed because it is too large
Load diff
679
contrib/sqlitecon.tcl
Normal file
679
contrib/sqlitecon.tcl
Normal file
|
|
@ -0,0 +1,679 @@
|
|||
# A Tk console widget for SQLite. Invoke sqlitecon::create with a window name,
|
||||
# a prompt string, a title to set a new top-level window, and the SQLite
|
||||
# database handle. For example:
|
||||
#
|
||||
# sqlitecon::create .sqlcon {sql:- } {SQL Console} db
|
||||
#
|
||||
# A toplevel window is created that allows you to type in SQL commands to
|
||||
# be processed on the spot.
|
||||
#
|
||||
# A limited set of dot-commands are supported:
|
||||
#
|
||||
# .table
|
||||
# .schema ?TABLE?
|
||||
# .mode list|column|multicolumn|line
|
||||
# .exit
|
||||
#
|
||||
# In addition, a new SQL function named "edit()" is created. This function
|
||||
# takes a single text argument and returns a text result. Whenever the
|
||||
# the function is called, it pops up a new toplevel window containing a
|
||||
# text editor screen initialized to the argument. When the "OK" button
|
||||
# is pressed, whatever revised text is in the text editor is returned as
|
||||
# the result of the edit() function. This allows text fields of SQL tables
|
||||
# to be edited quickly and easily as follows:
|
||||
#
|
||||
# UPDATE table1 SET dscr = edit(dscr) WHERE rowid=15;
|
||||
#
|
||||
|
||||
|
||||
# Create a namespace to work in
|
||||
#
|
||||
namespace eval ::sqlitecon {
|
||||
# do nothing
|
||||
}
|
||||
|
||||
# Create a console widget named $w. The prompt string is $prompt.
|
||||
# The title at the top of the window is $title. The database connection
|
||||
# object is $db
|
||||
#
|
||||
proc sqlitecon::create {w prompt title db} {
|
||||
upvar #0 $w.t v
|
||||
if {[winfo exists $w]} {destroy $w}
|
||||
if {[info exists v]} {unset v}
|
||||
toplevel $w
|
||||
wm title $w $title
|
||||
wm iconname $w $title
|
||||
frame $w.mb -bd 2 -relief raised
|
||||
pack $w.mb -side top -fill x
|
||||
menubutton $w.mb.file -text File -menu $w.mb.file.m
|
||||
menubutton $w.mb.edit -text Edit -menu $w.mb.edit.m
|
||||
pack $w.mb.file $w.mb.edit -side left -padx 8 -pady 1
|
||||
set m [menu $w.mb.file.m -tearoff 0]
|
||||
$m add command -label {Close} -command "destroy $w"
|
||||
sqlitecon::create_child $w $prompt $w.mb.edit.m
|
||||
set v(db) $db
|
||||
$db function edit ::sqlitecon::_edit
|
||||
}
|
||||
|
||||
# This routine creates a console as a child window within a larger
|
||||
# window. It also creates an edit menu named "$editmenu" if $editmenu!="".
|
||||
# The calling function is responsible for posting the edit menu.
|
||||
#
|
||||
proc sqlitecon::create_child {w prompt editmenu} {
|
||||
upvar #0 $w.t v
|
||||
if {$editmenu!=""} {
|
||||
set m [menu $editmenu -tearoff 0]
|
||||
$m add command -label Cut -command "sqlitecon::Cut $w.t"
|
||||
$m add command -label Copy -command "sqlitecon::Copy $w.t"
|
||||
$m add command -label Paste -command "sqlitecon::Paste $w.t"
|
||||
$m add command -label {Clear Screen} -command "sqlitecon::Clear $w.t"
|
||||
$m add separator
|
||||
$m add command -label {Save As...} -command "sqlitecon::SaveFile $w.t"
|
||||
catch {$editmenu config -postcommand "sqlitecon::EnableEditMenu $w"}
|
||||
}
|
||||
scrollbar $w.sb -orient vertical -command "$w.t yview"
|
||||
pack $w.sb -side right -fill y
|
||||
text $w.t -font fixed -yscrollcommand "$w.sb set"
|
||||
pack $w.t -side right -fill both -expand 1
|
||||
bindtags $w.t Sqlitecon
|
||||
set v(editmenu) $editmenu
|
||||
set v(history) 0
|
||||
set v(historycnt) 0
|
||||
set v(current) -1
|
||||
set v(prompt) $prompt
|
||||
set v(prior) {}
|
||||
set v(plength) [string length $v(prompt)]
|
||||
set v(x) 0
|
||||
set v(y) 0
|
||||
set v(mode) column
|
||||
set v(header) on
|
||||
$w.t mark set insert end
|
||||
$w.t tag config ok -foreground blue
|
||||
$w.t tag config err -foreground red
|
||||
$w.t insert end $v(prompt)
|
||||
$w.t mark set out 1.0
|
||||
after idle "focus $w.t"
|
||||
}
|
||||
|
||||
bind Sqlitecon <1> {sqlitecon::Button1 %W %x %y}
|
||||
bind Sqlitecon <B1-Motion> {sqlitecon::B1Motion %W %x %y}
|
||||
bind Sqlitecon <B1-Leave> {sqlitecon::B1Leave %W %x %y}
|
||||
bind Sqlitecon <B1-Enter> {sqlitecon::cancelMotor %W}
|
||||
bind Sqlitecon <ButtonRelease-1> {sqlitecon::cancelMotor %W}
|
||||
bind Sqlitecon <KeyPress> {sqlitecon::Insert %W %A}
|
||||
bind Sqlitecon <Left> {sqlitecon::Left %W}
|
||||
bind Sqlitecon <Control-b> {sqlitecon::Left %W}
|
||||
bind Sqlitecon <Right> {sqlitecon::Right %W}
|
||||
bind Sqlitecon <Control-f> {sqlitecon::Right %W}
|
||||
bind Sqlitecon <BackSpace> {sqlitecon::Backspace %W}
|
||||
bind Sqlitecon <Control-h> {sqlitecon::Backspace %W}
|
||||
bind Sqlitecon <Delete> {sqlitecon::Delete %W}
|
||||
bind Sqlitecon <Control-d> {sqlitecon::Delete %W}
|
||||
bind Sqlitecon <Home> {sqlitecon::Home %W}
|
||||
bind Sqlitecon <Control-a> {sqlitecon::Home %W}
|
||||
bind Sqlitecon <End> {sqlitecon::End %W}
|
||||
bind Sqlitecon <Control-e> {sqlitecon::End %W}
|
||||
bind Sqlitecon <Return> {sqlitecon::Enter %W}
|
||||
bind Sqlitecon <KP_Enter> {sqlitecon::Enter %W}
|
||||
bind Sqlitecon <Up> {sqlitecon::Prior %W}
|
||||
bind Sqlitecon <Control-p> {sqlitecon::Prior %W}
|
||||
bind Sqlitecon <Down> {sqlitecon::Next %W}
|
||||
bind Sqlitecon <Control-n> {sqlitecon::Next %W}
|
||||
bind Sqlitecon <Control-k> {sqlitecon::EraseEOL %W}
|
||||
bind Sqlitecon <<Cut>> {sqlitecon::Cut %W}
|
||||
bind Sqlitecon <<Copy>> {sqlitecon::Copy %W}
|
||||
bind Sqlitecon <<Paste>> {sqlitecon::Paste %W}
|
||||
bind Sqlitecon <<Clear>> {sqlitecon::Clear %W}
|
||||
|
||||
# Insert a single character at the insertion cursor
|
||||
#
|
||||
proc sqlitecon::Insert {w a} {
|
||||
$w insert insert $a
|
||||
$w yview insert
|
||||
}
|
||||
|
||||
# Move the cursor one character to the left
|
||||
#
|
||||
proc sqlitecon::Left {w} {
|
||||
upvar #0 $w v
|
||||
scan [$w index insert] %d.%d row col
|
||||
if {$col>$v(plength)} {
|
||||
$w mark set insert "insert -1c"
|
||||
}
|
||||
}
|
||||
|
||||
# Erase the character to the left of the cursor
|
||||
#
|
||||
proc sqlitecon::Backspace {w} {
|
||||
upvar #0 $w v
|
||||
scan [$w index insert] %d.%d row col
|
||||
if {$col>$v(plength)} {
|
||||
$w delete {insert -1c}
|
||||
}
|
||||
}
|
||||
|
||||
# Erase to the end of the line
|
||||
#
|
||||
proc sqlitecon::EraseEOL {w} {
|
||||
upvar #0 $w v
|
||||
scan [$w index insert] %d.%d row col
|
||||
if {$col>=$v(plength)} {
|
||||
$w delete insert {insert lineend}
|
||||
}
|
||||
}
|
||||
|
||||
# Move the cursor one character to the right
|
||||
#
|
||||
proc sqlitecon::Right {w} {
|
||||
$w mark set insert "insert +1c"
|
||||
}
|
||||
|
||||
# Erase the character to the right of the cursor
|
||||
#
|
||||
proc sqlitecon::Delete w {
|
||||
$w delete insert
|
||||
}
|
||||
|
||||
# Move the cursor to the beginning of the current line
|
||||
#
|
||||
proc sqlitecon::Home w {
|
||||
upvar #0 $w v
|
||||
scan [$w index insert] %d.%d row col
|
||||
$w mark set insert $row.$v(plength)
|
||||
}
|
||||
|
||||
# Move the cursor to the end of the current line
|
||||
#
|
||||
proc sqlitecon::End w {
|
||||
$w mark set insert {insert lineend}
|
||||
}
|
||||
|
||||
# Add a line to the history
|
||||
#
|
||||
proc sqlitecon::addHistory {w line} {
|
||||
upvar #0 $w v
|
||||
if {$v(historycnt)>0} {
|
||||
set last [lindex $v(history) [expr $v(historycnt)-1]]
|
||||
if {[string compare $last $line]} {
|
||||
lappend v(history) $line
|
||||
incr v(historycnt)
|
||||
}
|
||||
} else {
|
||||
set v(history) [list $line]
|
||||
set v(historycnt) 1
|
||||
}
|
||||
set v(current) $v(historycnt)
|
||||
}
|
||||
|
||||
# Called when "Enter" is pressed. Do something with the line
|
||||
# of text that was entered.
|
||||
#
|
||||
proc sqlitecon::Enter w {
|
||||
upvar #0 $w v
|
||||
scan [$w index insert] %d.%d row col
|
||||
set start $row.$v(plength)
|
||||
set line [$w get $start "$start lineend"]
|
||||
$w insert end \n
|
||||
$w mark set out end
|
||||
if {$v(prior)==""} {
|
||||
set cmd $line
|
||||
} else {
|
||||
set cmd $v(prior)\n$line
|
||||
}
|
||||
if {[string index $cmd 0]=="." || [$v(db) complete $cmd]} {
|
||||
regsub -all {\n} [string trim $cmd] { } cmd2
|
||||
addHistory $w $cmd2
|
||||
set rc [catch {DoCommand $w $cmd} res]
|
||||
if {![winfo exists $w]} return
|
||||
if {$rc} {
|
||||
$w insert end $res\n err
|
||||
} elseif {[string length $res]>0} {
|
||||
$w insert end $res\n ok
|
||||
}
|
||||
set v(prior) {}
|
||||
$w insert end $v(prompt)
|
||||
} else {
|
||||
set v(prior) $cmd
|
||||
regsub -all {[^ ]} $v(prompt) . x
|
||||
$w insert end $x
|
||||
}
|
||||
$w mark set insert end
|
||||
$w mark set out {insert linestart}
|
||||
$w yview insert
|
||||
}
|
||||
|
||||
# Execute a single SQL command. Pay special attention to control
|
||||
# directives that begin with "."
|
||||
#
|
||||
# The return value is the text output from the command, properly
|
||||
# formatted.
|
||||
#
|
||||
proc sqlitecon::DoCommand {w cmd} {
|
||||
upvar #0 $w v
|
||||
set mode $v(mode)
|
||||
set header $v(header)
|
||||
if {[regexp {^(\.[a-z]+)} $cmd all word]} {
|
||||
if {$word==".mode"} {
|
||||
regexp {^.[a-z]+ +([a-z]+)} $cmd all v(mode)
|
||||
return {}
|
||||
} elseif {$word==".exit"} {
|
||||
destroy [winfo toplevel $w]
|
||||
return {}
|
||||
} elseif {$word==".header"} {
|
||||
regexp {^.[a-z]+ +([a-z]+)} $cmd all v(header)
|
||||
return {}
|
||||
} elseif {$word==".tables"} {
|
||||
set mode multicolumn
|
||||
set cmd {SELECT name FROM sqlite_master WHERE type='table'
|
||||
UNION ALL
|
||||
SELECT name FROM sqlite_temp_master WHERE type='table'}
|
||||
$v(db) eval {PRAGMA database_list} {
|
||||
if {$name!="temp" && $name!="main"} {
|
||||
append cmd "UNION ALL SELECT name FROM $name.sqlite_master\
|
||||
WHERE type='table'"
|
||||
}
|
||||
}
|
||||
append cmd { ORDER BY 1}
|
||||
} elseif {$word==".fullschema"} {
|
||||
set pattern %
|
||||
regexp {^.[a-z]+ +([^ ]+)} $cmd all pattern
|
||||
set mode list
|
||||
set header 0
|
||||
set cmd "SELECT sql FROM sqlite_master WHERE tbl_name LIKE '$pattern'
|
||||
AND sql NOT NULL UNION ALL SELECT sql FROM sqlite_temp_master
|
||||
WHERE tbl_name LIKE '$pattern' AND sql NOT NULL"
|
||||
$v(db) eval {PRAGMA database_list} {
|
||||
if {$name!="temp" && $name!="main"} {
|
||||
append cmd " UNION ALL SELECT sql FROM $name.sqlite_master\
|
||||
WHERE tbl_name LIKE '$pattern' AND sql NOT NULL"
|
||||
}
|
||||
}
|
||||
} elseif {$word==".schema"} {
|
||||
set pattern %
|
||||
regexp {^.[a-z]+ +([^ ]+)} $cmd all pattern
|
||||
set mode list
|
||||
set header 0
|
||||
set cmd "SELECT sql FROM sqlite_master WHERE name LIKE '$pattern'
|
||||
AND sql NOT NULL UNION ALL SELECT sql FROM sqlite_temp_master
|
||||
WHERE name LIKE '$pattern' AND sql NOT NULL"
|
||||
$v(db) eval {PRAGMA database_list} {
|
||||
if {$name!="temp" && $name!="main"} {
|
||||
append cmd " UNION ALL SELECT sql FROM $name.sqlite_master\
|
||||
WHERE name LIKE '$pattern' AND sql NOT NULL"
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return \
|
||||
".exit\n.mode line|list|column\n.schema ?TABLENAME?\n.tables"
|
||||
}
|
||||
}
|
||||
set res {}
|
||||
if {$mode=="list"} {
|
||||
$v(db) eval $cmd x {
|
||||
set sep {}
|
||||
foreach col $x(*) {
|
||||
append res $sep$x($col)
|
||||
set sep |
|
||||
}
|
||||
append res \n
|
||||
}
|
||||
if {[info exists x(*)] && $header} {
|
||||
set sep {}
|
||||
set hdr {}
|
||||
foreach col $x(*) {
|
||||
append hdr $sep$col
|
||||
set sep |
|
||||
}
|
||||
set res $hdr\n$res
|
||||
}
|
||||
} elseif {[string range $mode 0 2]=="col"} {
|
||||
set y {}
|
||||
$v(db) eval $cmd x {
|
||||
foreach col $x(*) {
|
||||
if {![info exists cw($col)] || $cw($col)<[string length $x($col)]} {
|
||||
set cw($col) [string length $x($col)]
|
||||
}
|
||||
lappend y $x($col)
|
||||
}
|
||||
}
|
||||
if {[info exists x(*)] && $header} {
|
||||
set hdr {}
|
||||
set ln {}
|
||||
set dash ---------------------------------------------------------------
|
||||
append dash ------------------------------------------------------------
|
||||
foreach col $x(*) {
|
||||
if {![info exists cw($col)] || $cw($col)<[string length $col]} {
|
||||
set cw($col) [string length $col]
|
||||
}
|
||||
lappend hdr $col
|
||||
lappend ln [string range $dash 1 $cw($col)]
|
||||
}
|
||||
set y [concat $hdr $ln $y]
|
||||
}
|
||||
if {[info exists x(*)]} {
|
||||
set format {}
|
||||
set arglist {}
|
||||
set arglist2 {}
|
||||
set i 0
|
||||
foreach col $x(*) {
|
||||
lappend arglist x$i
|
||||
append arglist2 " \$x$i"
|
||||
incr i
|
||||
append format " %-$cw($col)s"
|
||||
}
|
||||
set format [string trimleft $format]\n
|
||||
if {[llength $arglist]>0} {
|
||||
foreach $arglist $y "append res \[format [list $format] $arglist2\]"
|
||||
}
|
||||
}
|
||||
} elseif {$mode=="multicolumn"} {
|
||||
set y [$v(db) eval $cmd]
|
||||
set max 0
|
||||
foreach e $y {
|
||||
if {$max<[string length $e]} {set max [string length $e]}
|
||||
}
|
||||
set ncol [expr {int(80/($max+2))}]
|
||||
if {$ncol<1} {set ncol 1}
|
||||
set nelem [llength $y]
|
||||
set nrow [expr {($nelem+$ncol-1)/$ncol}]
|
||||
set format "%-${max}s"
|
||||
for {set i 0} {$i<$nrow} {incr i} {
|
||||
set j $i
|
||||
while 1 {
|
||||
append res [format $format [lindex $y $j]]
|
||||
incr j $nrow
|
||||
if {$j>=$nelem} break
|
||||
append res { }
|
||||
}
|
||||
append res \n
|
||||
}
|
||||
} else {
|
||||
$v(db) eval $cmd x {
|
||||
foreach col $x(*) {append res "$col = $x($col)\n"}
|
||||
append res \n
|
||||
}
|
||||
}
|
||||
return [string trimright $res]
|
||||
}
|
||||
|
||||
# Change the line to the previous line
|
||||
#
|
||||
proc sqlitecon::Prior w {
|
||||
upvar #0 $w v
|
||||
if {$v(current)<=0} return
|
||||
incr v(current) -1
|
||||
set line [lindex $v(history) $v(current)]
|
||||
sqlitecon::SetLine $w $line
|
||||
}
|
||||
|
||||
# Change the line to the next line
|
||||
#
|
||||
proc sqlitecon::Next w {
|
||||
upvar #0 $w v
|
||||
if {$v(current)>=$v(historycnt)} return
|
||||
incr v(current) 1
|
||||
set line [lindex $v(history) $v(current)]
|
||||
sqlitecon::SetLine $w $line
|
||||
}
|
||||
|
||||
# Change the contents of the entry line
|
||||
#
|
||||
proc sqlitecon::SetLine {w line} {
|
||||
upvar #0 $w v
|
||||
scan [$w index insert] %d.%d row col
|
||||
set start $row.$v(plength)
|
||||
$w delete $start end
|
||||
$w insert end $line
|
||||
$w mark set insert end
|
||||
$w yview insert
|
||||
}
|
||||
|
||||
# Called when the mouse button is pressed at position $x,$y on
|
||||
# the console widget.
|
||||
#
|
||||
proc sqlitecon::Button1 {w x y} {
|
||||
global tkPriv
|
||||
upvar #0 $w v
|
||||
set v(mouseMoved) 0
|
||||
set v(pressX) $x
|
||||
set p [sqlitecon::nearestBoundry $w $x $y]
|
||||
scan [$w index insert] %d.%d ix iy
|
||||
scan $p %d.%d px py
|
||||
if {$px==$ix} {
|
||||
$w mark set insert $p
|
||||
}
|
||||
$w mark set anchor $p
|
||||
focus $w
|
||||
}
|
||||
|
||||
# Find the boundry between characters that is nearest
|
||||
# to $x,$y
|
||||
#
|
||||
proc sqlitecon::nearestBoundry {w x y} {
|
||||
set p [$w index @$x,$y]
|
||||
set bb [$w bbox $p]
|
||||
if {![string compare $bb ""]} {return $p}
|
||||
if {($x-[lindex $bb 0])<([lindex $bb 2]/2)} {return $p}
|
||||
$w index "$p + 1 char"
|
||||
}
|
||||
|
||||
# This routine extends the selection to the point specified by $x,$y
|
||||
#
|
||||
proc sqlitecon::SelectTo {w x y} {
|
||||
upvar #0 $w v
|
||||
set cur [sqlitecon::nearestBoundry $w $x $y]
|
||||
if {[catch {$w index anchor}]} {
|
||||
$w mark set anchor $cur
|
||||
}
|
||||
set anchor [$w index anchor]
|
||||
if {[$w compare $cur != $anchor] || (abs($v(pressX) - $x) >= 3)} {
|
||||
if {$v(mouseMoved)==0} {
|
||||
$w tag remove sel 0.0 end
|
||||
}
|
||||
set v(mouseMoved) 1
|
||||
}
|
||||
if {[$w compare $cur < anchor]} {
|
||||
set first $cur
|
||||
set last anchor
|
||||
} else {
|
||||
set first anchor
|
||||
set last $cur
|
||||
}
|
||||
if {$v(mouseMoved)} {
|
||||
$w tag remove sel 0.0 $first
|
||||
$w tag add sel $first $last
|
||||
$w tag remove sel $last end
|
||||
update idletasks
|
||||
}
|
||||
}
|
||||
|
||||
# Called whenever the mouse moves while button-1 is held down.
|
||||
#
|
||||
proc sqlitecon::B1Motion {w x y} {
|
||||
upvar #0 $w v
|
||||
set v(y) $y
|
||||
set v(x) $x
|
||||
sqlitecon::SelectTo $w $x $y
|
||||
}
|
||||
|
||||
# Called whenever the mouse leaves the boundries of the widget
|
||||
# while button 1 is held down.
|
||||
#
|
||||
proc sqlitecon::B1Leave {w x y} {
|
||||
upvar #0 $w v
|
||||
set v(y) $y
|
||||
set v(x) $x
|
||||
sqlitecon::motor $w
|
||||
}
|
||||
|
||||
# This routine is called to automatically scroll the window when
|
||||
# the mouse drags offscreen.
|
||||
#
|
||||
proc sqlitecon::motor w {
|
||||
upvar #0 $w v
|
||||
if {![winfo exists $w]} return
|
||||
if {$v(y)>=[winfo height $w]} {
|
||||
$w yview scroll 1 units
|
||||
} elseif {$v(y)<0} {
|
||||
$w yview scroll -1 units
|
||||
} else {
|
||||
return
|
||||
}
|
||||
sqlitecon::SelectTo $w $v(x) $v(y)
|
||||
set v(timer) [after 50 sqlitecon::motor $w]
|
||||
}
|
||||
|
||||
# This routine cancels the scrolling motor if it is active
|
||||
#
|
||||
proc sqlitecon::cancelMotor w {
|
||||
upvar #0 $w v
|
||||
catch {after cancel $v(timer)}
|
||||
catch {unset v(timer)}
|
||||
}
|
||||
|
||||
# Do a Copy operation on the stuff currently selected.
|
||||
#
|
||||
proc sqlitecon::Copy w {
|
||||
if {![catch {set text [$w get sel.first sel.last]}]} {
|
||||
clipboard clear -displayof $w
|
||||
clipboard append -displayof $w $text
|
||||
}
|
||||
}
|
||||
|
||||
# Return 1 if the selection exists and is contained
|
||||
# entirely on the input line. Return 2 if the selection
|
||||
# exists but is not entirely on the input line. Return 0
|
||||
# if the selection does not exist.
|
||||
#
|
||||
proc sqlitecon::canCut w {
|
||||
set r [catch {
|
||||
scan [$w index sel.first] %d.%d s1x s1y
|
||||
scan [$w index sel.last] %d.%d s2x s2y
|
||||
scan [$w index insert] %d.%d ix iy
|
||||
}]
|
||||
if {$r==1} {return 0}
|
||||
if {$s1x==$ix && $s2x==$ix} {return 1}
|
||||
return 2
|
||||
}
|
||||
|
||||
# Do a Cut operation if possible. Cuts are only allowed
|
||||
# if the current selection is entirely contained on the
|
||||
# current input line.
|
||||
#
|
||||
proc sqlitecon::Cut w {
|
||||
if {[sqlitecon::canCut $w]==1} {
|
||||
sqlitecon::Copy $w
|
||||
$w delete sel.first sel.last
|
||||
}
|
||||
}
|
||||
|
||||
# Do a paste opeation.
|
||||
#
|
||||
proc sqlitecon::Paste w {
|
||||
if {[sqlitecon::canCut $w]==1} {
|
||||
$w delete sel.first sel.last
|
||||
}
|
||||
if {[catch {selection get -displayof $w -selection CLIPBOARD} topaste]
|
||||
&& [catch {selection get -displayof $w -selection PRIMARY} topaste]} {
|
||||
return
|
||||
}
|
||||
if {[info exists ::$w]} {
|
||||
set prior 0
|
||||
foreach line [split $topaste \n] {
|
||||
if {$prior} {
|
||||
sqlitecon::Enter $w
|
||||
update
|
||||
}
|
||||
set prior 1
|
||||
$w insert insert $line
|
||||
}
|
||||
} else {
|
||||
$w insert insert $topaste
|
||||
}
|
||||
}
|
||||
|
||||
# Enable or disable entries in the Edit menu
|
||||
#
|
||||
proc sqlitecon::EnableEditMenu w {
|
||||
upvar #0 $w.t v
|
||||
set m $v(editmenu)
|
||||
if {$m=="" || ![winfo exists $m]} return
|
||||
switch [sqlitecon::canCut $w.t] {
|
||||
0 {
|
||||
$m entryconf Copy -state disabled
|
||||
$m entryconf Cut -state disabled
|
||||
}
|
||||
1 {
|
||||
$m entryconf Copy -state normal
|
||||
$m entryconf Cut -state normal
|
||||
}
|
||||
2 {
|
||||
$m entryconf Copy -state normal
|
||||
$m entryconf Cut -state disabled
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Prompt the user for the name of a writable file. Then write the
|
||||
# entire contents of the console screen to that file.
|
||||
#
|
||||
proc sqlitecon::SaveFile w {
|
||||
set types {
|
||||
{{Text Files} {.txt}}
|
||||
{{All Files} *}
|
||||
}
|
||||
set f [tk_getSaveFile -filetypes $types -title "Write Screen To..."]
|
||||
if {$f!=""} {
|
||||
if {[catch {open $f w} fd]} {
|
||||
tk_messageBox -type ok -icon error -message $fd
|
||||
} else {
|
||||
puts $fd [string trimright [$w get 1.0 end] \n]
|
||||
close $fd
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Erase everything from the console above the insertion line.
|
||||
#
|
||||
proc sqlitecon::Clear w {
|
||||
$w delete 1.0 {insert linestart}
|
||||
}
|
||||
|
||||
# An in-line editor for SQL
|
||||
#
|
||||
proc sqlitecon::_edit {origtxt {title {}}} {
|
||||
for {set i 0} {[winfo exists .ed$i]} {incr i} continue
|
||||
set w .ed$i
|
||||
toplevel $w
|
||||
wm protocol $w WM_DELETE_WINDOW "$w.b.can invoke"
|
||||
wm title $w {Inline SQL Editor}
|
||||
frame $w.b
|
||||
pack $w.b -side bottom -fill x
|
||||
button $w.b.can -text Cancel -width 6 -command [list set ::$w 0]
|
||||
button $w.b.ok -text OK -width 6 -command [list set ::$w 1]
|
||||
button $w.b.cut -text Cut -width 6 -command [list ::sqlitecon::Cut $w.t]
|
||||
button $w.b.copy -text Copy -width 6 -command [list ::sqlitecon::Copy $w.t]
|
||||
button $w.b.paste -text Paste -width 6 -command [list ::sqlitecon::Paste $w.t]
|
||||
set ::$w {}
|
||||
pack $w.b.cut $w.b.copy $w.b.paste $w.b.can $w.b.ok\
|
||||
-side left -padx 5 -pady 5 -expand 1
|
||||
if {$title!=""} {
|
||||
label $w.title -text $title
|
||||
pack $w.title -side top -padx 5 -pady 5
|
||||
}
|
||||
text $w.t -bg white -fg black -yscrollcommand [list $w.sb set]
|
||||
pack $w.t -side left -fill both -expand 1
|
||||
scrollbar $w.sb -orient vertical -command [list $w.t yview]
|
||||
pack $w.sb -side left -fill y
|
||||
$w.t insert end $origtxt
|
||||
|
||||
vwait ::$w
|
||||
|
||||
if {[set ::$w]} {
|
||||
set txt [string trimright [$w.t get 1.0 end]]
|
||||
} else {
|
||||
set txt $origtxt
|
||||
}
|
||||
destroy $w
|
||||
return $txt
|
||||
}
|
||||
791
depcomp
791
depcomp
|
|
@ -1,791 +0,0 @@
|
|||
#! /bin/sh
|
||||
# depcomp - compile a program generating dependencies as side-effects
|
||||
|
||||
scriptversion=2016-01-11.22; # UTC
|
||||
|
||||
# Copyright (C) 1999-2017 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
|
||||
|
||||
case $1 in
|
||||
'')
|
||||
echo "$0: No command. Try '$0 --help' for more information." 1>&2
|
||||
exit 1;
|
||||
;;
|
||||
-h | --h*)
|
||||
cat <<\EOF
|
||||
Usage: depcomp [--help] [--version] PROGRAM [ARGS]
|
||||
|
||||
Run PROGRAMS ARGS to compile a file, generating dependencies
|
||||
as side-effects.
|
||||
|
||||
Environment variables:
|
||||
depmode Dependency tracking mode.
|
||||
source Source file read by 'PROGRAMS ARGS'.
|
||||
object Object file output by 'PROGRAMS ARGS'.
|
||||
DEPDIR directory where to store dependencies.
|
||||
depfile Dependency file to output.
|
||||
tmpdepfile Temporary file to use when outputting dependencies.
|
||||
libtool Whether libtool is used (yes/no).
|
||||
|
||||
Report bugs to <bug-automake@gnu.org>.
|
||||
EOF
|
||||
exit $?
|
||||
;;
|
||||
-v | --v*)
|
||||
echo "depcomp $scriptversion"
|
||||
exit $?
|
||||
;;
|
||||
esac
|
||||
|
||||
# Get the directory component of the given path, and save it in the
|
||||
# global variables '$dir'. Note that this directory component will
|
||||
# be either empty or ending with a '/' character. This is deliberate.
|
||||
set_dir_from ()
|
||||
{
|
||||
case $1 in
|
||||
*/*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;;
|
||||
*) dir=;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Get the suffix-stripped basename of the given path, and save it the
|
||||
# global variable '$base'.
|
||||
set_base_from ()
|
||||
{
|
||||
base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'`
|
||||
}
|
||||
|
||||
# If no dependency file was actually created by the compiler invocation,
|
||||
# we still have to create a dummy depfile, to avoid errors with the
|
||||
# Makefile "include basename.Plo" scheme.
|
||||
make_dummy_depfile ()
|
||||
{
|
||||
echo "#dummy" > "$depfile"
|
||||
}
|
||||
|
||||
# Factor out some common post-processing of the generated depfile.
|
||||
# Requires the auxiliary global variable '$tmpdepfile' to be set.
|
||||
aix_post_process_depfile ()
|
||||
{
|
||||
# If the compiler actually managed to produce a dependency file,
|
||||
# post-process it.
|
||||
if test -f "$tmpdepfile"; then
|
||||
# Each line is of the form 'foo.o: dependency.h'.
|
||||
# Do two passes, one to just change these to
|
||||
# $object: dependency.h
|
||||
# and one to simply output
|
||||
# dependency.h:
|
||||
# which is needed to avoid the deleted-header problem.
|
||||
{ sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile"
|
||||
sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile"
|
||||
} > "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
else
|
||||
make_dummy_depfile
|
||||
fi
|
||||
}
|
||||
|
||||
# A tabulation character.
|
||||
tab=' '
|
||||
# A newline character.
|
||||
nl='
|
||||
'
|
||||
# Character ranges might be problematic outside the C locale.
|
||||
# These definitions help.
|
||||
upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
lower=abcdefghijklmnopqrstuvwxyz
|
||||
digits=0123456789
|
||||
alpha=${upper}${lower}
|
||||
|
||||
if test -z "$depmode" || test -z "$source" || test -z "$object"; then
|
||||
echo "depcomp: Variables source, object and depmode must be set" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
|
||||
depfile=${depfile-`echo "$object" |
|
||||
sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
|
||||
tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
|
||||
|
||||
rm -f "$tmpdepfile"
|
||||
|
||||
# Avoid interferences from the environment.
|
||||
gccflag= dashmflag=
|
||||
|
||||
# Some modes work just like other modes, but use different flags. We
|
||||
# parameterize here, but still list the modes in the big case below,
|
||||
# to make depend.m4 easier to write. Note that we *cannot* use a case
|
||||
# here, because this file can only contain one case statement.
|
||||
if test "$depmode" = hp; then
|
||||
# HP compiler uses -M and no extra arg.
|
||||
gccflag=-M
|
||||
depmode=gcc
|
||||
fi
|
||||
|
||||
if test "$depmode" = dashXmstdout; then
|
||||
# This is just like dashmstdout with a different argument.
|
||||
dashmflag=-xM
|
||||
depmode=dashmstdout
|
||||
fi
|
||||
|
||||
cygpath_u="cygpath -u -f -"
|
||||
if test "$depmode" = msvcmsys; then
|
||||
# This is just like msvisualcpp but w/o cygpath translation.
|
||||
# Just convert the backslash-escaped backslashes to single forward
|
||||
# slashes to satisfy depend.m4
|
||||
cygpath_u='sed s,\\\\,/,g'
|
||||
depmode=msvisualcpp
|
||||
fi
|
||||
|
||||
if test "$depmode" = msvc7msys; then
|
||||
# This is just like msvc7 but w/o cygpath translation.
|
||||
# Just convert the backslash-escaped backslashes to single forward
|
||||
# slashes to satisfy depend.m4
|
||||
cygpath_u='sed s,\\\\,/,g'
|
||||
depmode=msvc7
|
||||
fi
|
||||
|
||||
if test "$depmode" = xlc; then
|
||||
# IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information.
|
||||
gccflag=-qmakedep=gcc,-MF
|
||||
depmode=gcc
|
||||
fi
|
||||
|
||||
case "$depmode" in
|
||||
gcc3)
|
||||
## gcc 3 implements dependency tracking that does exactly what
|
||||
## we want. Yay! Note: for some reason libtool 1.4 doesn't like
|
||||
## it if -MD -MP comes after the -MF stuff. Hmm.
|
||||
## Unfortunately, FreeBSD c89 acceptance of flags depends upon
|
||||
## the command line argument order; so add the flags where they
|
||||
## appear in depend2.am. Note that the slowdown incurred here
|
||||
## affects only configure: in makefiles, %FASTDEP% shortcuts this.
|
||||
for arg
|
||||
do
|
||||
case $arg in
|
||||
-c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
|
||||
*) set fnord "$@" "$arg" ;;
|
||||
esac
|
||||
shift # fnord
|
||||
shift # $arg
|
||||
done
|
||||
"$@"
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
mv "$tmpdepfile" "$depfile"
|
||||
;;
|
||||
|
||||
gcc)
|
||||
## Note that this doesn't just cater to obsosete pre-3.x GCC compilers.
|
||||
## but also to in-use compilers like IMB xlc/xlC and the HP C compiler.
|
||||
## (see the conditional assignment to $gccflag above).
|
||||
## There are various ways to get dependency output from gcc. Here's
|
||||
## why we pick this rather obscure method:
|
||||
## - Don't want to use -MD because we'd like the dependencies to end
|
||||
## up in a subdir. Having to rename by hand is ugly.
|
||||
## (We might end up doing this anyway to support other compilers.)
|
||||
## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
|
||||
## -MM, not -M (despite what the docs say). Also, it might not be
|
||||
## supported by the other compilers which use the 'gcc' depmode.
|
||||
## - Using -M directly means running the compiler twice (even worse
|
||||
## than renaming).
|
||||
if test -z "$gccflag"; then
|
||||
gccflag=-MD,
|
||||
fi
|
||||
"$@" -Wp,"$gccflag$tmpdepfile"
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
# The second -e expression handles DOS-style file names with drive
|
||||
# letters.
|
||||
sed -e 's/^[^:]*: / /' \
|
||||
-e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
|
||||
## This next piece of magic avoids the "deleted header file" problem.
|
||||
## The problem is that when a header file which appears in a .P file
|
||||
## is deleted, the dependency causes make to die (because there is
|
||||
## typically no way to rebuild the header). We avoid this by adding
|
||||
## dummy dependencies for each header file. Too bad gcc doesn't do
|
||||
## this for us directly.
|
||||
## Some versions of gcc put a space before the ':'. On the theory
|
||||
## that the space means something, we add a space to the output as
|
||||
## well. hp depmode also adds that space, but also prefixes the VPATH
|
||||
## to the object. Take care to not repeat it in the output.
|
||||
## Some versions of the HPUX 10.20 sed can't process this invocation
|
||||
## correctly. Breaking it into two sed invocations is a workaround.
|
||||
tr ' ' "$nl" < "$tmpdepfile" \
|
||||
| sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
|
||||
| sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
hp)
|
||||
# This case exists only to let depend.m4 do its work. It works by
|
||||
# looking at the text of this script. This case will never be run,
|
||||
# since it is checked for above.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
sgi)
|
||||
if test "$libtool" = yes; then
|
||||
"$@" "-Wp,-MDupdate,$tmpdepfile"
|
||||
else
|
||||
"$@" -MDupdate "$tmpdepfile"
|
||||
fi
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
|
||||
if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
|
||||
echo "$object : \\" > "$depfile"
|
||||
# Clip off the initial element (the dependent). Don't try to be
|
||||
# clever and replace this with sed code, as IRIX sed won't handle
|
||||
# lines with more than a fixed number of characters (4096 in
|
||||
# IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
|
||||
# the IRIX cc adds comments like '#:fec' to the end of the
|
||||
# dependency line.
|
||||
tr ' ' "$nl" < "$tmpdepfile" \
|
||||
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \
|
||||
| tr "$nl" ' ' >> "$depfile"
|
||||
echo >> "$depfile"
|
||||
# The second pass generates a dummy entry for each header file.
|
||||
tr ' ' "$nl" < "$tmpdepfile" \
|
||||
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
|
||||
>> "$depfile"
|
||||
else
|
||||
make_dummy_depfile
|
||||
fi
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
xlc)
|
||||
# This case exists only to let depend.m4 do its work. It works by
|
||||
# looking at the text of this script. This case will never be run,
|
||||
# since it is checked for above.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
aix)
|
||||
# The C for AIX Compiler uses -M and outputs the dependencies
|
||||
# in a .u file. In older versions, this file always lives in the
|
||||
# current directory. Also, the AIX compiler puts '$object:' at the
|
||||
# start of each line; $object doesn't have directory information.
|
||||
# Version 6 uses the directory in both cases.
|
||||
set_dir_from "$object"
|
||||
set_base_from "$object"
|
||||
if test "$libtool" = yes; then
|
||||
tmpdepfile1=$dir$base.u
|
||||
tmpdepfile2=$base.u
|
||||
tmpdepfile3=$dir.libs/$base.u
|
||||
"$@" -Wc,-M
|
||||
else
|
||||
tmpdepfile1=$dir$base.u
|
||||
tmpdepfile2=$dir$base.u
|
||||
tmpdepfile3=$dir$base.u
|
||||
"$@" -M
|
||||
fi
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
exit $stat
|
||||
fi
|
||||
|
||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
do
|
||||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
aix_post_process_depfile
|
||||
;;
|
||||
|
||||
tcc)
|
||||
# tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26
|
||||
# FIXME: That version still under development at the moment of writing.
|
||||
# Make that this statement remains true also for stable, released
|
||||
# versions.
|
||||
# It will wrap lines (doesn't matter whether long or short) with a
|
||||
# trailing '\', as in:
|
||||
#
|
||||
# foo.o : \
|
||||
# foo.c \
|
||||
# foo.h \
|
||||
#
|
||||
# It will put a trailing '\' even on the last line, and will use leading
|
||||
# spaces rather than leading tabs (at least since its commit 0394caf7
|
||||
# "Emit spaces for -MD").
|
||||
"$@" -MD -MF "$tmpdepfile"
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
# Each non-empty line is of the form 'foo.o : \' or ' dep.h \'.
|
||||
# We have to change lines of the first kind to '$object: \'.
|
||||
sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile"
|
||||
# And for each line of the second kind, we have to emit a 'dep.h:'
|
||||
# dummy dependency, to avoid the deleted-header problem.
|
||||
sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
## The order of this option in the case statement is important, since the
|
||||
## shell code in configure will try each of these formats in the order
|
||||
## listed in this file. A plain '-MD' option would be understood by many
|
||||
## compilers, so we must ensure this comes after the gcc and icc options.
|
||||
pgcc)
|
||||
# Portland's C compiler understands '-MD'.
|
||||
# Will always output deps to 'file.d' where file is the root name of the
|
||||
# source file under compilation, even if file resides in a subdirectory.
|
||||
# The object file name does not affect the name of the '.d' file.
|
||||
# pgcc 10.2 will output
|
||||
# foo.o: sub/foo.c sub/foo.h
|
||||
# and will wrap long lines using '\' :
|
||||
# foo.o: sub/foo.c ... \
|
||||
# sub/foo.h ... \
|
||||
# ...
|
||||
set_dir_from "$object"
|
||||
# Use the source, not the object, to determine the base name, since
|
||||
# that's sadly what pgcc will do too.
|
||||
set_base_from "$source"
|
||||
tmpdepfile=$base.d
|
||||
|
||||
# For projects that build the same source file twice into different object
|
||||
# files, the pgcc approach of using the *source* file root name can cause
|
||||
# problems in parallel builds. Use a locking strategy to avoid stomping on
|
||||
# the same $tmpdepfile.
|
||||
lockdir=$base.d-lock
|
||||
trap "
|
||||
echo '$0: caught signal, cleaning up...' >&2
|
||||
rmdir '$lockdir'
|
||||
exit 1
|
||||
" 1 2 13 15
|
||||
numtries=100
|
||||
i=$numtries
|
||||
while test $i -gt 0; do
|
||||
# mkdir is a portable test-and-set.
|
||||
if mkdir "$lockdir" 2>/dev/null; then
|
||||
# This process acquired the lock.
|
||||
"$@" -MD
|
||||
stat=$?
|
||||
# Release the lock.
|
||||
rmdir "$lockdir"
|
||||
break
|
||||
else
|
||||
# If the lock is being held by a different process, wait
|
||||
# until the winning process is done or we timeout.
|
||||
while test -d "$lockdir" && test $i -gt 0; do
|
||||
sleep 1
|
||||
i=`expr $i - 1`
|
||||
done
|
||||
fi
|
||||
i=`expr $i - 1`
|
||||
done
|
||||
trap - 1 2 13 15
|
||||
if test $i -le 0; then
|
||||
echo "$0: failed to acquire lock after $numtries attempts" >&2
|
||||
echo "$0: check lockdir '$lockdir'" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
# Each line is of the form `foo.o: dependent.h',
|
||||
# or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
|
||||
# Do two passes, one to just change these to
|
||||
# `$object: dependent.h' and one to simply `dependent.h:'.
|
||||
sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
|
||||
# Some versions of the HPUX 10.20 sed can't process this invocation
|
||||
# correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \
|
||||
| sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
hp2)
|
||||
# The "hp" stanza above does not work with aCC (C++) and HP's ia64
|
||||
# compilers, which have integrated preprocessors. The correct option
|
||||
# to use with these is +Maked; it writes dependencies to a file named
|
||||
# 'foo.d', which lands next to the object file, wherever that
|
||||
# happens to be.
|
||||
# Much of this is similar to the tru64 case; see comments there.
|
||||
set_dir_from "$object"
|
||||
set_base_from "$object"
|
||||
if test "$libtool" = yes; then
|
||||
tmpdepfile1=$dir$base.d
|
||||
tmpdepfile2=$dir.libs/$base.d
|
||||
"$@" -Wc,+Maked
|
||||
else
|
||||
tmpdepfile1=$dir$base.d
|
||||
tmpdepfile2=$dir$base.d
|
||||
"$@" +Maked
|
||||
fi
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2"
|
||||
exit $stat
|
||||
fi
|
||||
|
||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
|
||||
do
|
||||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
if test -f "$tmpdepfile"; then
|
||||
sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile"
|
||||
# Add 'dependent.h:' lines.
|
||||
sed -ne '2,${
|
||||
s/^ *//
|
||||
s/ \\*$//
|
||||
s/$/:/
|
||||
p
|
||||
}' "$tmpdepfile" >> "$depfile"
|
||||
else
|
||||
make_dummy_depfile
|
||||
fi
|
||||
rm -f "$tmpdepfile" "$tmpdepfile2"
|
||||
;;
|
||||
|
||||
tru64)
|
||||
# The Tru64 compiler uses -MD to generate dependencies as a side
|
||||
# effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'.
|
||||
# At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
|
||||
# dependencies in 'foo.d' instead, so we check for that too.
|
||||
# Subdirectories are respected.
|
||||
set_dir_from "$object"
|
||||
set_base_from "$object"
|
||||
|
||||
if test "$libtool" = yes; then
|
||||
# Libtool generates 2 separate objects for the 2 libraries. These
|
||||
# two compilations output dependencies in $dir.libs/$base.o.d and
|
||||
# in $dir$base.o.d. We have to check for both files, because
|
||||
# one of the two compilations can be disabled. We should prefer
|
||||
# $dir$base.o.d over $dir.libs/$base.o.d because the latter is
|
||||
# automatically cleaned when .libs/ is deleted, while ignoring
|
||||
# the former would cause a distcleancheck panic.
|
||||
tmpdepfile1=$dir$base.o.d # libtool 1.5
|
||||
tmpdepfile2=$dir.libs/$base.o.d # Likewise.
|
||||
tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504
|
||||
"$@" -Wc,-MD
|
||||
else
|
||||
tmpdepfile1=$dir$base.d
|
||||
tmpdepfile2=$dir$base.d
|
||||
tmpdepfile3=$dir$base.d
|
||||
"$@" -MD
|
||||
fi
|
||||
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
exit $stat
|
||||
fi
|
||||
|
||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
do
|
||||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
# Same post-processing that is required for AIX mode.
|
||||
aix_post_process_depfile
|
||||
;;
|
||||
|
||||
msvc7)
|
||||
if test "$libtool" = yes; then
|
||||
showIncludes=-Wc,-showIncludes
|
||||
else
|
||||
showIncludes=-showIncludes
|
||||
fi
|
||||
"$@" $showIncludes > "$tmpdepfile"
|
||||
stat=$?
|
||||
grep -v '^Note: including file: ' "$tmpdepfile"
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
# The first sed program below extracts the file names and escapes
|
||||
# backslashes for cygpath. The second sed program outputs the file
|
||||
# name when reading, but also accumulates all include files in the
|
||||
# hold buffer in order to output them again at the end. This only
|
||||
# works with sed implementations that can handle large buffers.
|
||||
sed < "$tmpdepfile" -n '
|
||||
/^Note: including file: *\(.*\)/ {
|
||||
s//\1/
|
||||
s/\\/\\\\/g
|
||||
p
|
||||
}' | $cygpath_u | sort -u | sed -n '
|
||||
s/ /\\ /g
|
||||
s/\(.*\)/'"$tab"'\1 \\/p
|
||||
s/.\(.*\) \\/\1:/
|
||||
H
|
||||
$ {
|
||||
s/.*/'"$tab"'/
|
||||
G
|
||||
p
|
||||
}' >> "$depfile"
|
||||
echo >> "$depfile" # make sure the fragment doesn't end with a backslash
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
msvc7msys)
|
||||
# This case exists only to let depend.m4 do its work. It works by
|
||||
# looking at the text of this script. This case will never be run,
|
||||
# since it is checked for above.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
#nosideeffect)
|
||||
# This comment above is used by automake to tell side-effect
|
||||
# dependency tracking mechanisms from slower ones.
|
||||
|
||||
dashmstdout)
|
||||
# Important note: in order to support this mode, a compiler *must*
|
||||
# always write the preprocessed file to stdout, regardless of -o.
|
||||
"$@" || exit $?
|
||||
|
||||
# Remove the call to Libtool.
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
|
||||
# Remove '-o $object'.
|
||||
IFS=" "
|
||||
for arg
|
||||
do
|
||||
case $arg in
|
||||
-o)
|
||||
shift
|
||||
;;
|
||||
$object)
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"
|
||||
shift # fnord
|
||||
shift # $arg
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
test -z "$dashmflag" && dashmflag=-M
|
||||
# Require at least two characters before searching for ':'
|
||||
# in the target name. This is to cope with DOS-style filenames:
|
||||
# a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
|
||||
"$@" $dashmflag |
|
||||
sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile"
|
||||
rm -f "$depfile"
|
||||
cat < "$tmpdepfile" > "$depfile"
|
||||
# Some versions of the HPUX 10.20 sed can't process this sed invocation
|
||||
# correctly. Breaking it into two sed invocations is a workaround.
|
||||
tr ' ' "$nl" < "$tmpdepfile" \
|
||||
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
|
||||
| sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
dashXmstdout)
|
||||
# This case only exists to satisfy depend.m4. It is never actually
|
||||
# run, as this mode is specially recognized in the preamble.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
makedepend)
|
||||
"$@" || exit $?
|
||||
# Remove any Libtool call
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
# X makedepend
|
||||
shift
|
||||
cleared=no eat=no
|
||||
for arg
|
||||
do
|
||||
case $cleared in
|
||||
no)
|
||||
set ""; shift
|
||||
cleared=yes ;;
|
||||
esac
|
||||
if test $eat = yes; then
|
||||
eat=no
|
||||
continue
|
||||
fi
|
||||
case "$arg" in
|
||||
-D*|-I*)
|
||||
set fnord "$@" "$arg"; shift ;;
|
||||
# Strip any option that makedepend may not understand. Remove
|
||||
# the object too, otherwise makedepend will parse it as a source file.
|
||||
-arch)
|
||||
eat=yes ;;
|
||||
-*|$object)
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"; shift ;;
|
||||
esac
|
||||
done
|
||||
obj_suffix=`echo "$object" | sed 's/^.*\././'`
|
||||
touch "$tmpdepfile"
|
||||
${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
|
||||
rm -f "$depfile"
|
||||
# makedepend may prepend the VPATH from the source file name to the object.
|
||||
# No need to regex-escape $object, excess matching of '.' is harmless.
|
||||
sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
|
||||
# Some versions of the HPUX 10.20 sed can't process the last invocation
|
||||
# correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed '1,2d' "$tmpdepfile" \
|
||||
| tr ' ' "$nl" \
|
||||
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
|
||||
| sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile" "$tmpdepfile".bak
|
||||
;;
|
||||
|
||||
cpp)
|
||||
# Important note: in order to support this mode, a compiler *must*
|
||||
# always write the preprocessed file to stdout.
|
||||
"$@" || exit $?
|
||||
|
||||
# Remove the call to Libtool.
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
|
||||
# Remove '-o $object'.
|
||||
IFS=" "
|
||||
for arg
|
||||
do
|
||||
case $arg in
|
||||
-o)
|
||||
shift
|
||||
;;
|
||||
$object)
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"
|
||||
shift # fnord
|
||||
shift # $arg
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
"$@" -E \
|
||||
| sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
|
||||
-e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
|
||||
| sed '$ s: \\$::' > "$tmpdepfile"
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
cat < "$tmpdepfile" >> "$depfile"
|
||||
sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
msvisualcpp)
|
||||
# Important note: in order to support this mode, a compiler *must*
|
||||
# always write the preprocessed file to stdout.
|
||||
"$@" || exit $?
|
||||
|
||||
# Remove the call to Libtool.
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
|
||||
IFS=" "
|
||||
for arg
|
||||
do
|
||||
case "$arg" in
|
||||
-o)
|
||||
shift
|
||||
;;
|
||||
$object)
|
||||
shift
|
||||
;;
|
||||
"-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
|
||||
set fnord "$@"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
"$@" -E 2>/dev/null |
|
||||
sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile"
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile"
|
||||
echo "$tab" >> "$depfile"
|
||||
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
msvcmsys)
|
||||
# This case exists only to let depend.m4 do its work. It works by
|
||||
# looking at the text of this script. This case will never be run,
|
||||
# since it is checked for above.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
none)
|
||||
exec "$@"
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Unknown depmode $depmode" 1>&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
||||
|
||||
# Local Variables:
|
||||
# mode: shell-script
|
||||
# sh-indentation: 2
|
||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC0"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
||||
87
doc/F2FS.txt
Normal file
87
doc/F2FS.txt
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
|
||||
SQLite's OS layer contains the following definitions used in F2FS related
|
||||
calls:
|
||||
|
||||
#define F2FS_IOCTL_MAGIC 0xf5
|
||||
#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1)
|
||||
#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2)
|
||||
#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3)
|
||||
#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
|
||||
#define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, u32)
|
||||
#define F2FS_FEATURE_ATOMIC_WRITE 0x0004
|
||||
|
||||
After opening a database file on Linux (including Android), SQLite determines
|
||||
whether or not a file supports F2FS atomic commits as follows:
|
||||
|
||||
u32 flags = 0;
|
||||
rc = ioctl(fd, F2FS_IOC_GET_FEATURES, &flags);
|
||||
if( rc==0 && (flags & F2FS_FEATURE_ATOMIC_WRITE) ){
|
||||
/* File supports F2FS atomic commits */
|
||||
}else{
|
||||
/* File does NOT support F2FS atomic commits */
|
||||
}
|
||||
|
||||
where "fd" is the file-descriptor open on the database file.
|
||||
|
||||
Usually, when writing to a database file that supports atomic commits, SQLite
|
||||
accumulates the entire transaction in heap memory, deferring all writes to the
|
||||
db file until the transaction is committed.
|
||||
|
||||
When it is time to commit a transaction on a file that supports atomic
|
||||
commits, SQLite does:
|
||||
|
||||
/* Take an F_WRLCK lock on the database file. This prevents any other
|
||||
** SQLite clients from reading or writing the file until the lock
|
||||
** is released. */
|
||||
rc = fcntl(fd, F_SETLK, ...);
|
||||
if( rc!=0 ) goto failed;
|
||||
|
||||
rc = ioctl(fd, F2FS_IOC_START_ATOMIC_WRITE);
|
||||
if( rc!=0 ) goto fallback_to_legacy_journal_commit;
|
||||
|
||||
foreach (dirty page){
|
||||
rc = write(fd, ...dirty page...);
|
||||
if( rc!=0 ){
|
||||
ioctl(fd, F2FS_IOC_ABORT_VOLATILE_WRITE);
|
||||
goto fallback_to_legacy_journal_commit;
|
||||
}
|
||||
}
|
||||
|
||||
rc = ioctl(fd, F2FS_IOC_COMMIT_ATOMIC_WRITE);
|
||||
if( rc!=0 ){
|
||||
ioctl(fd, F2FS_IOC_ABORT_VOLATILE_WRITE);
|
||||
goto fallback_to_legacy_journal_commit;
|
||||
}
|
||||
|
||||
/* If we get there, the transaction has been successfully
|
||||
** committed to persistent storage. The following call
|
||||
** relinquishes the F_WRLCK lock. */
|
||||
fcntl(fd, F_SETLK, ...);
|
||||
|
||||
Assumptions:
|
||||
|
||||
1. After either of the F2FS_IOC_ABORT_VOLATILE_WRITE calls return,
|
||||
the database file is in the state that it was in before
|
||||
F2FS_IOC_START_ATOMIC_WRITE was invoked. Even if the ioctl()
|
||||
fails - we're ignoring the return code.
|
||||
|
||||
This is true regardless of the type of error that occurred in
|
||||
ioctl() or write().
|
||||
|
||||
2. If the system fails before the F2FS_IOC_COMMIT_ATOMIC_WRITE is
|
||||
completed, then following a reboot the database file is in the
|
||||
state that it was in before F2FS_IOC_START_ATOMIC_WRITE was invoked.
|
||||
Or, if the write was commited right before the system failed, in a
|
||||
state indicating that all write() calls were successfully committed
|
||||
to persistent storage before the failure occurred.
|
||||
|
||||
3. If the process crashes before the F2FS_IOC_COMMIT_ATOMIC_WRITE is
|
||||
completed then the file is automatically restored to the state that
|
||||
it was in before F2FS_IOC_START_ATOMIC_WRITE was called. This occurs
|
||||
before the posix advisory lock is automatically dropped - there is
|
||||
no chance that another client will be able to read the file in a
|
||||
half-committed state before the rollback operation occurs.
|
||||
|
||||
|
||||
|
||||
|
||||
1264
doc/lemon.html
Normal file
1264
doc/lemon.html
Normal file
File diff suppressed because it is too large
Load diff
76
doc/pager-invariants.txt
Normal file
76
doc/pager-invariants.txt
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
*** Throughout this document, a page is deemed to have been synced
|
||||
automatically as soon as it is written when PRAGMA synchronous=OFF.
|
||||
Otherwise, the page is not synced until the xSync method of the VFS
|
||||
is called successfully on the file containing the page.
|
||||
|
||||
*** Definition: A page of the database file is said to be "overwriteable" if
|
||||
one or more of the following are true about the page:
|
||||
|
||||
(a) The original content of the page as it was at the beginning of
|
||||
the transaction has been written into the rollback journal and
|
||||
synced.
|
||||
|
||||
(b) The page was a freelist leaf page at the start of the transaction.
|
||||
|
||||
(c) The page number is greater than the largest page that existed in
|
||||
the database file at the start of the transaction.
|
||||
|
||||
(1) A page of the database file is never overwritten unless one of the
|
||||
following are true:
|
||||
|
||||
(a) The page and all other pages on the same sector are overwriteable.
|
||||
|
||||
(b) The atomic page write optimization is enabled, and the entire
|
||||
transaction other than the update of the transaction sequence
|
||||
number consists of a single page change.
|
||||
|
||||
(2) The content of a page written into the rollback journal exactly matches
|
||||
both the content in the database when the rollback journal was written
|
||||
and the content in the database at the beginning of the current
|
||||
transaction.
|
||||
|
||||
(3) Writes to the database file are an integer multiple of the page size
|
||||
in length and are aligned to a page boundary.
|
||||
|
||||
(4) Reads from the database file are either aligned on a page boundary and
|
||||
an integer multiple of the page size in length or are taken from the
|
||||
first 100 bytes of the database file.
|
||||
|
||||
(5) All writes to the database file are synced prior to the rollback journal
|
||||
being deleted, truncated, or zeroed.
|
||||
|
||||
(6) If a master journal file is used, then all writes to the database file
|
||||
are synced prior to the master journal being deleted.
|
||||
|
||||
*** Definition: Two databases (or the same database at two points it time)
|
||||
are said to be "logically equivalent" if they give the same answer to
|
||||
all queries. Note in particular the content of freelist leaf
|
||||
pages can be changed arbitarily without effecting the logical equivalence
|
||||
of the database.
|
||||
|
||||
(7) At any time, if any subset, including the empty set and the total set,
|
||||
of the unsynced changes to a rollback journal are removed and the
|
||||
journal is rolled back, the resulting database file will be logical
|
||||
equivalent to the database file at the beginning of the transaction.
|
||||
|
||||
(8) When a transaction is rolled back, the xTruncate method of the VFS
|
||||
is called to restore the database file to the same size it was at
|
||||
the beginning of the transaction. (In some VFSes, the xTruncate
|
||||
method is a no-op, but that does not change the fact the SQLite will
|
||||
invoke it.)
|
||||
|
||||
(9) Whenever the database file is modified, at least one bit in the range
|
||||
of bytes from 24 through 39 inclusive will be changed prior to releasing
|
||||
the EXCLUSIVE lock.
|
||||
|
||||
(10) The pattern of bits in bytes 24 through 39 shall not repeat in less
|
||||
than one billion transactions.
|
||||
|
||||
(11) A database file is well-formed at the beginning and at the conclusion
|
||||
of every transaction.
|
||||
|
||||
(12) An EXCLUSIVE lock must be held on the database file before making
|
||||
any changes to the database file.
|
||||
|
||||
(13) A SHARED lock must be held on the database file before reading any
|
||||
content out of the database file.
|
||||
142
doc/trusted-schema.md
Normal file
142
doc/trusted-schema.md
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
# The new-security-options branch
|
||||
|
||||
## The problem that the [new-security-options](/timeline?r=new-security-options) branch tries to solve
|
||||
|
||||
An attacker might modify the schema of an SQLite database by adding
|
||||
structures that cause code to run when some other application opens and
|
||||
reads the database. For example, the attacker might replace a table
|
||||
definition with a view. Or the attacker might add triggers to tables
|
||||
or views, or add new CHECK constraints or generated columns or indexes
|
||||
with expressions in the index list or in the WHERE clause. If the
|
||||
added features invoke SQL functions or virtual tables with side effects,
|
||||
that might cause harm to the system if run by a high-privilege victim.
|
||||
Or, the added features might exfiltrate information if the database is
|
||||
read by a high-privilege victim.
|
||||
|
||||
The changes in this branch strive to make it easier for high-privilege
|
||||
applications to safely read SQLite database files that might have been
|
||||
maliciously corrupted by an attacker.
|
||||
|
||||
## Overview of changes in [new-security-options](/timeline?r=new-security-options)
|
||||
|
||||
The basic idea is to tag every SQL function and virtual table with one
|
||||
of three risk levels:
|
||||
|
||||
1. Innocuous
|
||||
2. Normal
|
||||
3. Direct-Only
|
||||
|
||||
Innocuous functions/vtabs are safe and can be used at any time.
|
||||
Direct-only elements, in contrast, might have cause side-effects and
|
||||
should only be used from top-level SQL, not from within triggers or views nor
|
||||
in elements of the schema such as CHECK constraint, DEFAULT values,
|
||||
generated columns, index expressions, or in the WHERE clause of a
|
||||
partial index that are potentially under the control of an attacker.
|
||||
Normal elements behave like Innocuous if TRUSTED\_SCHEMA=on
|
||||
and behave like direct-only if TRUSTED\_SCHEMA=off.
|
||||
|
||||
Application-defined functions and virtual tables go in as Normal unless
|
||||
the application takes deliberate steps to change the risk level.
|
||||
|
||||
For backwards compatibility, the default is TRUSTED\_SCHEMA=on. Documentation
|
||||
will be updated to recommend applications turn TRUSTED\_SCHEMA to off.
|
||||
|
||||
An innocuous function or virtual table is one that can only read content
|
||||
from the database file in which it resides, and can only alter the database
|
||||
in which it resides. Most SQL functions are innocuous. For example, there
|
||||
is no harm in an attacker running the abs() function.
|
||||
|
||||
Direct-only elements that have side-effects that go outside the database file
|
||||
in which it lives, or return information from outside of the database file.
|
||||
Examples of direct-only elements include:
|
||||
|
||||
1. The fts3\_tokenizer() function
|
||||
2. The writefile() function
|
||||
3. The readfile() function
|
||||
4. The zipvfs virtual table
|
||||
5. The csv virtual table
|
||||
|
||||
We do not want an attacker to be able to add these kinds of things to
|
||||
the database schema and possibly trick a high-privilege application
|
||||
from performing any of these actions. Therefore, functions and vtabs
|
||||
with side-effects are marked as Direct-Only.
|
||||
|
||||
Legacy applications might add other risky functions or vtabs. Those will
|
||||
go in as "Normal" by default. For optimal security, we want those risky
|
||||
app-defined functions and vtabs to be direct-only, but making that the
|
||||
default might break some legacy applications. Hence, all app-defined
|
||||
functions and vtabs go in as Normal, but the application can switch them
|
||||
over to "Direct-Only" behavior using a single pragma.
|
||||
|
||||
The restrictions on the use of functions and virtual tables do not apply
|
||||
to TEMP. A TEMP VIEW or a TEMP TRIGGER can use any valid SQL function
|
||||
or virtual table. The idea is that TEMP views and triggers must be
|
||||
directly created by the application and are thus under the control of the
|
||||
application. TEMP views and triggers cannot be created by an attacker who
|
||||
corrupts the schema of a persistent database file. Hence TEMP views and
|
||||
triggers are safe.
|
||||
|
||||
## Specific changes
|
||||
|
||||
1. New sqlite3\_db\_config() option SQLITE\_DBCONFIG\_TRUSTED\_SCHEMA for
|
||||
turning TRUSTED\_SCHEMA on and off. It defaults to ON.
|
||||
|
||||
2. Compile-time option -DSQLITE\_TRUSTED\_SCHEMA=0 causes the default
|
||||
TRUSTED\_SCHEMA setting to be off.
|
||||
|
||||
3. New pragma "PRAGMA trusted\_schema=(ON\|OFF);". This provides access
|
||||
to the TRUSTED_SCHEMA setting for application coded using scripting
|
||||
languages or other secondary languages where they are unable to make
|
||||
calls to sqlite3\_db\_config().
|
||||
|
||||
4. New options for the "enc" parameter to sqlite3\_create\_function() and
|
||||
its kin:
|
||||
<ol type="a">
|
||||
<li> _SQLITE\_INNOCUOUS_ → tags the new functions as Innocuous
|
||||
<li> _SQLITE\_DIRECTONLY_ → tags the new functions as Direct-Only
|
||||
</ol>
|
||||
|
||||
5. New options to sqlite3\_vtab\_config():
|
||||
<ol type="a">
|
||||
<li> _SQLITE\_VTAB\_INNOCUOUS_ → tags the vtab as Innocuous
|
||||
<li> _SQLITE\_VTAB\_DIRECTONLY_ → tags the vtab as Direct-Only
|
||||
</ol>
|
||||
|
||||
6. Change many of the functions and virtual tables in the SQLite source
|
||||
tree to use one of the tags above.
|
||||
|
||||
7. Enhanced PRAGMA function\_list and virtual-table "pragma\_function\_list"
|
||||
with additional columns. The columns now are:
|
||||
<ul>
|
||||
<li> _name_ → Name of the function
|
||||
<li> _builtin_ → 1 for built-in functions. 0 otherwise.
|
||||
<li> _type_ → 's'=Scalar, 'a'=Aggregate, 'w'=Window
|
||||
<li> _enc_ → 'utf8', 'utf16le', or 'utf16be'
|
||||
<li> _narg_ → number of argument
|
||||
<li> _flags_ → Bitmask of SQLITE\_INNOCUOUS, SQLITE\_DIRECTONLY,
|
||||
SQLITE\_DETERMINISTIC, SQLITE\_SUBTYPE, and
|
||||
SQLITE\_FUNC\_INTERNAL flags.
|
||||
</ul>
|
||||
<p>The last four columns are new.
|
||||
|
||||
8. The function\_list PRAGMA now also shows all entries for each function.
|
||||
So, for example, if a function can take either 2 or 3 arguments,
|
||||
there are separate rows for the 2-argument and 3-argument versions of
|
||||
the function.
|
||||
|
||||
## Additional Notes
|
||||
|
||||
The function_list enhancements allow the application to query the set
|
||||
of SQL functions that meet various criteria. For example, to see all
|
||||
SQL functions that are never allowed to be used in the schema or in
|
||||
trigger or views:
|
||||
|
||||
~~~
|
||||
SELECT DISTINCT name FROM pragma_function_list
|
||||
WHERE (flags & 0x80000)!=0
|
||||
ORDER BY name;
|
||||
~~~
|
||||
|
||||
Doing the same is not possible for virtual tables, as a virtual table
|
||||
might be Innocuous, Normal, or Direct-Only depending on the arguments
|
||||
passed into the xConnect method.
|
||||
49
doc/vdbesort-memory.md
Normal file
49
doc/vdbesort-memory.md
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
|
||||
20-11-2020
|
||||
|
||||
# Memory Allocation In vdbesort.c
|
||||
|
||||
Memory allocation is slightly different depending on:
|
||||
|
||||
* whether or not SQLITE_CONFIG_SMALL_MALLOC is set, and
|
||||
* whether or not worker threads are enabled.
|
||||
|
||||
## SQLITE_CONFIG_SMALL_MALLOC=0
|
||||
|
||||
Assuming SQLITE_CONFIG_SMALL_MALLOC is not set, keys passed to the sorter are
|
||||
added to an in-memory buffer. This buffer is grown using sqlite3Realloc() as
|
||||
required it reaches the size configured for the main pager cache using "PRAGMA
|
||||
cache_size". i.e. if the user has executed "PRAGMA main.cache_size = -2048",
|
||||
then this buffer is allowed to grow up to 2MB in size.
|
||||
|
||||
Once the buffer has grown to its threshold, keys are sorted and written to
|
||||
a temp file. If worker threads are not enabled, this is the only significant
|
||||
allocation the sorter module makes. After keys are sorted and flushed out to
|
||||
the temp file, the buffer is reused to accumulate the next batch of keys.
|
||||
|
||||
If worker threads are available, then the buffer is passed to a worker thread
|
||||
to sort and flush once it is full, and a new buffer allocated to allow the
|
||||
main thread to continue to accumulate keys. Buffers are reused once they
|
||||
have been flushed, so in this case at most (nWorker+1) buffers are allocated
|
||||
and used, where nWorker is the number of configured worker threads.
|
||||
|
||||
There are no other significant users of heap memory in the sorter module.
|
||||
Once sorted buffers of keys have been flushed to disk, they are read back
|
||||
either by mapping the file (via sqlite3_file.xFetch()) or else read back
|
||||
in one page at a time.
|
||||
|
||||
All buffers are allocated by the main thread. A sorter object is associated
|
||||
with a single database connection, to which it holds a pointer.
|
||||
|
||||
## SQLITE_CONFIG_SMALL_MALLOC=1
|
||||
|
||||
This case is similar to the above, except that instead of accumulating
|
||||
multiple keys in a single large buffer, sqlite3VdbeSorterWrite() stores
|
||||
keys in a regular heap-memory linked list (one allocation per element).
|
||||
List elements are freed as they are flushed to disk, either by the main
|
||||
thread or by a worker thread.
|
||||
|
||||
Each time a key is added the sorter (and an allocation made),
|
||||
sqlite3HeapNearlyFull() is called. If it returns true, the current
|
||||
list of keys is flushed to a temporary file, even if it has not yet
|
||||
reached the size threshold.
|
||||
130
doc/vfs-shm.txt
Normal file
130
doc/vfs-shm.txt
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
The 5 states of an historical rollback lock as implemented by the
|
||||
xLock, xUnlock, and xCheckReservedLock methods of the sqlite3_io_methods
|
||||
objec are:
|
||||
|
||||
UNLOCKED
|
||||
SHARED
|
||||
RESERVED
|
||||
PENDING
|
||||
EXCLUSIVE
|
||||
|
||||
The wal-index file has a similar locking hierarchy implemented using
|
||||
the xShmLock method of the sqlite3_vfs object, but with 7
|
||||
states. Each connection to a wal-index file must be in one of
|
||||
the following 7 states:
|
||||
|
||||
UNLOCKED
|
||||
READ
|
||||
READ_FULL
|
||||
WRITE
|
||||
PENDING
|
||||
CHECKPOINT
|
||||
RECOVER
|
||||
|
||||
These roughly correspond to the 5 states of a rollback lock except
|
||||
that SHARED is split out into 2 states: READ and READ_FULL and
|
||||
there is an extra RECOVER state used for wal-index reconstruction.
|
||||
|
||||
The meanings of the various wal-index locking states is as follows:
|
||||
|
||||
UNLOCKED - The wal-index is not in use.
|
||||
|
||||
READ - Some prefix of the wal-index is being read. Additional
|
||||
wal-index information can be appended at any time. The
|
||||
newly appended content will be ignored by the holder of
|
||||
the READ lock.
|
||||
|
||||
READ_FULL - The entire wal-index is being read. No new information
|
||||
can be added to the wal-index. The holder of a READ_FULL
|
||||
lock promises never to read pages from the database file
|
||||
that are available anywhere in the wal-index.
|
||||
|
||||
WRITE - It is OK to append to the wal-index file and to adjust
|
||||
the header to indicate the new "last valid frame".
|
||||
|
||||
PENDING - Waiting on all READ locks to clear so that a
|
||||
CHECKPOINT lock can be acquired.
|
||||
|
||||
CHECKPOINT - It is OK to write any WAL data into the database file
|
||||
and zero the last valid frame field of the wal-index
|
||||
header. The wal-index file itself may not be changed
|
||||
other than to zero the last valid frame field in the
|
||||
header.
|
||||
|
||||
RECOVER - Held during wal-index recovery. Used to prevent a
|
||||
race if multiple clients try to recover a wal-index at
|
||||
the same time.
|
||||
|
||||
|
||||
A particular lock manager implementation may coalesce one or more of
|
||||
the wal-index locking states, though with a reduction in concurrency.
|
||||
For example, an implemention might implement only exclusive locking,
|
||||
in which case all states would be equivalent to CHECKPOINT, meaning that
|
||||
only one reader or one writer or one checkpointer could be active at a
|
||||
time. Or, an implementation might combine READ and READ_FULL into
|
||||
a single state equivalent to READ, meaning that a writer could
|
||||
coexist with a reader, but no reader or writers could coexist with a
|
||||
checkpointer.
|
||||
|
||||
The lock manager must obey the following rules:
|
||||
|
||||
(1) A READ cannot coexist with CHECKPOINT.
|
||||
(2) A READ_FULL cannot coexist with WRITE.
|
||||
(3) None of WRITE, PENDING, CHECKPOINT, or RECOVER can coexist.
|
||||
|
||||
The SQLite core will obey the next set of rules. These rules are
|
||||
assertions on the behavior of the SQLite core which might be verified
|
||||
during testing using an instrumented lock manager.
|
||||
|
||||
(5) No part of the wal-index will be read without holding either some
|
||||
kind of SHM lock or an EXCLUSIVE lock on the original database.
|
||||
The original database is the file named in the 2nd parameter to
|
||||
the xShmOpen method.
|
||||
|
||||
(6) A holder of a READ_FULL will never read any page of the database
|
||||
file that is contained anywhere in the wal-index.
|
||||
|
||||
(7) No part of the wal-index other than the header will be written nor
|
||||
will the size of the wal-index grow without holding a WRITE or
|
||||
an EXCLUSIVE on the original database file.
|
||||
|
||||
(8) The wal-index header will not be written without holding one of
|
||||
WRITE, CHECKPOINT, or RECOVER on the wal-index or an EXCLUSIVE on
|
||||
the original database files.
|
||||
|
||||
(9) A CHECKPOINT or RECOVER must be held on the wal-index, or an
|
||||
EXCLUSIVE on the original database file, in order to reset the
|
||||
last valid frame counter in the header of the wal-index back to zero.
|
||||
|
||||
(10) A WRITE can only increase the last valid frame pointer in the header.
|
||||
|
||||
The SQLite core will only ever send requests for UNLOCK, READ, WRITE,
|
||||
CHECKPOINT, or RECOVER to the lock manager. The SQLite core will never
|
||||
request a READ_FULL or PENDING lock though the lock manager may deliver
|
||||
those locking states in response to READ and CHECKPOINT requests,
|
||||
respectively, if and only if the requested READ or CHECKPOINT cannot
|
||||
be delivered.
|
||||
|
||||
The following are the allowed lock transitions:
|
||||
|
||||
Original-State Request New-State
|
||||
-------------- ---------- ----------
|
||||
(11a) UNLOCK READ READ
|
||||
(11b) UNLOCK READ READ_FULL
|
||||
(11c) UNLOCK CHECKPOINT PENDING
|
||||
(11d) UNLOCK CHECKPOINT CHECKPOINT
|
||||
(11e) READ UNLOCK UNLOCK
|
||||
(11f) READ WRITE WRITE
|
||||
(11g) READ RECOVER RECOVER
|
||||
(11h) READ_FULL UNLOCK UNLOCK
|
||||
(11i) READ_FULL WRITE WRITE
|
||||
(11j) READ_FULL RECOVER RECOVER
|
||||
(11k) WRITE READ READ
|
||||
(11l) PENDING UNLOCK UNLOCK
|
||||
(11m) PENDING CHECKPOINT CHECKPOINT
|
||||
(11n) CHECKPOINT UNLOCK UNLOCK
|
||||
(11o) RECOVER READ READ
|
||||
|
||||
These 15 transitions are all that needs to be supported. The lock
|
||||
manager implementation can assert that fact. The other 27 possible
|
||||
transitions among the 7 locking states will never occur.
|
||||
88
doc/wal-lock.md
Normal file
88
doc/wal-lock.md
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
# Wal-Mode Blocking Locks
|
||||
|
||||
On some Unix-like systems, SQLite may be configured to use POSIX blocking locks
|
||||
by:
|
||||
|
||||
* building the library with SQLITE\_ENABLE\_SETLK\_TIMEOUT defined, and
|
||||
* configuring a timeout in ms using the sqlite3\_busy\_timeout() API.
|
||||
|
||||
Blocking locks may be advantageous as (a) waiting database clients do not
|
||||
need to continuously poll the database lock, and (b) using blocking locks
|
||||
facilitates transfer of OS priority between processes when a high priority
|
||||
process is blocked by a lower priority one.
|
||||
|
||||
Only read/write clients use blocking locks. Clients that have read-only access
|
||||
to the \*-shm file nevery use blocking locks.
|
||||
|
||||
Threads or processes that access a single database at a time never deadlock as
|
||||
a result of blocking database locks. But it is of course possible for threads
|
||||
that lock multiple databases simultaneously to do so. In most cases the OS will
|
||||
detect the deadlock and return an error.
|
||||
|
||||
## Wal Recovery
|
||||
|
||||
Wal database "recovery" is a process required when the number of connected
|
||||
database clients changes from zero to one. In this case, a client is
|
||||
considered to connect to the database when it first reads data from it.
|
||||
Before recovery commences, an exclusive WRITER lock is taken.
|
||||
|
||||
Without blocking locks, if two clients attempt recovery simultaneously, one
|
||||
fails to obtain the WRITER lock and either invokes the busy-handler callback or
|
||||
returns SQLITE\_BUSY to the user. With blocking locks configured, the second
|
||||
client blocks on the WRITER lock.
|
||||
|
||||
## Database Readers
|
||||
|
||||
Usually, read-only are not blocked by any other database clients, so they
|
||||
have no need of blocking locks.
|
||||
|
||||
If a read-only transaction is being opened on a snapshot, the CHECKPOINTER
|
||||
lock is required briefly as part of opening the transaction (to check that a
|
||||
checkpointer is not currently overwriting the snapshot being opened). A
|
||||
blocking lock is used to obtain the CHECKPOINTER lock in this case. A snapshot
|
||||
opener may therefore block on and transfer priority to a checkpointer in some
|
||||
cases.
|
||||
|
||||
## Database Writers
|
||||
|
||||
A database writer must obtain the exclusive WRITER lock. It uses a blocking
|
||||
lock to do so if any of the following are true:
|
||||
|
||||
* the transaction is an implicit one consisting of a single DML or DDL
|
||||
statement, or
|
||||
* the transaction is opened using BEGIN IMMEDIATE or BEGIN EXCLUSIVE, or
|
||||
* the first SQL statement executed following the BEGIN command is a DML or
|
||||
DDL statement (not a read-only statement like a SELECT).
|
||||
|
||||
In other words, in all cases except when an open read-transaction is upgraded
|
||||
to a write-transaction. In that case a non-blocking lock is used.
|
||||
|
||||
## Database Checkpointers
|
||||
|
||||
Database checkpointers takes the following locks, in order:
|
||||
|
||||
* The exclusive CHECKPOINTER lock.
|
||||
* The exclusive WRITER lock (FULL, RESTART and TRUNCATE only).
|
||||
* Exclusive lock on read-mark slots 1-N. These are immediately released after being taken.
|
||||
* Exclusive lock on read-mark 0.
|
||||
* Exclusive lock on read-mark slots 1-N again. These are immediately released
|
||||
after being taken (RESTART and TRUNCATE only).
|
||||
|
||||
All of the above use blocking locks.
|
||||
|
||||
## Summary
|
||||
|
||||
With blocking locks configured, the only cases in which clients should see an
|
||||
SQLITE\_BUSY error are:
|
||||
|
||||
* if the OS does not grant a blocking lock before the configured timeout
|
||||
expires, and
|
||||
* when an open read-transaction is upgraded to a write-transaction.
|
||||
|
||||
In all other cases the blocking locks implementation should prevent clients
|
||||
from having to handle SQLITE\_BUSY errors and facilitate appropriate transfer
|
||||
of priorities between competing clients.
|
||||
|
||||
Clients that lock multiple databases simultaneously must be wary of deadlock.
|
||||
|
||||
|
||||
8
ext/README.md
Normal file
8
ext/README.md
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
## Loadable Extensions
|
||||
|
||||
Various [loadable extensions](https://www.sqlite.org/loadext.html) for
|
||||
SQLite are found in subfolders.
|
||||
|
||||
Most subfolders are dedicated to a single loadable extension (for
|
||||
example FTS5, or RTREE). But the misc/ subfolder contains a collection
|
||||
of smaller single-file extensions.
|
||||
170
ext/async/README.txt
Normal file
170
ext/async/README.txt
Normal file
|
|
@ -0,0 +1,170 @@
|
|||
NOTE (2012-11-29):
|
||||
|
||||
The functionality implemented by this extension has been superseded
|
||||
by WAL-mode. This module is no longer supported or maintained. The
|
||||
code is retained for historical reference only.
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
Normally, when SQLite writes to a database file, it waits until the write
|
||||
operation is finished before returning control to the calling application.
|
||||
Since writing to the file-system is usually very slow compared with CPU
|
||||
bound operations, this can be a performance bottleneck. This directory
|
||||
contains an extension that causes SQLite to perform all write requests
|
||||
using a separate thread running in the background. Although this does not
|
||||
reduce the overall system resources (CPU, disk bandwidth etc.) at all, it
|
||||
allows SQLite to return control to the caller quickly even when writing to
|
||||
the database, eliminating the bottleneck.
|
||||
|
||||
1. Functionality
|
||||
|
||||
1.1 How it Works
|
||||
1.2 Limitations
|
||||
1.3 Locking and Concurrency
|
||||
|
||||
2. Compilation and Usage
|
||||
|
||||
3. Porting
|
||||
|
||||
|
||||
|
||||
1. FUNCTIONALITY
|
||||
|
||||
With asynchronous I/O, write requests are handled by a separate thread
|
||||
running in the background. This means that the thread that initiates
|
||||
a database write does not have to wait for (sometimes slow) disk I/O
|
||||
to occur. The write seems to happen very quickly, though in reality
|
||||
it is happening at its usual slow pace in the background.
|
||||
|
||||
Asynchronous I/O appears to give better responsiveness, but at a price.
|
||||
You lose the Durable property. With the default I/O backend of SQLite,
|
||||
once a write completes, you know that the information you wrote is
|
||||
safely on disk. With the asynchronous I/O, this is not the case. If
|
||||
your program crashes or if a power loss occurs after the database
|
||||
write but before the asynchronous write thread has completed, then the
|
||||
database change might never make it to disk and the next user of the
|
||||
database might not see your change.
|
||||
|
||||
You lose Durability with asynchronous I/O, but you still retain the
|
||||
other parts of ACID: Atomic, Consistent, and Isolated. Many
|
||||
appliations get along fine without the Durablity.
|
||||
|
||||
1.1 How it Works
|
||||
|
||||
Asynchronous I/O works by creating a special SQLite "vfs" structure
|
||||
and registering it with sqlite3_vfs_register(). When files opened via
|
||||
this vfs are written to (using the vfs xWrite() method), the data is not
|
||||
written directly to disk, but is placed in the "write-queue" to be
|
||||
handled by the background thread.
|
||||
|
||||
When files opened with the asynchronous vfs are read from
|
||||
(using the vfs xRead() method), the data is read from the file on
|
||||
disk and the write-queue, so that from the point of view of
|
||||
the vfs reader the xWrite() appears to have already completed.
|
||||
|
||||
The special vfs is registered (and unregistered) by calls to the
|
||||
API functions sqlite3async_initialize() and sqlite3async_shutdown().
|
||||
See section "Compilation and Usage" below for details.
|
||||
|
||||
1.2 Limitations
|
||||
|
||||
In order to gain experience with the main ideas surrounding asynchronous
|
||||
IO, this implementation is deliberately kept simple. Additional
|
||||
capabilities may be added in the future.
|
||||
|
||||
For example, as currently implemented, if writes are happening at a
|
||||
steady stream that exceeds the I/O capability of the background writer
|
||||
thread, the queue of pending write operations will grow without bound.
|
||||
If this goes on for long enough, the host system could run out of memory.
|
||||
A more sophisticated module could to keep track of the quantity of
|
||||
pending writes and stop accepting new write requests when the queue of
|
||||
pending writes grows too large.
|
||||
|
||||
1.3 Locking and Concurrency
|
||||
|
||||
Multiple connections from within a single process that use this
|
||||
implementation of asynchronous IO may access a single database
|
||||
file concurrently. From the point of view of the user, if all
|
||||
connections are from within a single process, there is no difference
|
||||
between the concurrency offered by "normal" SQLite and SQLite
|
||||
using the asynchronous backend.
|
||||
|
||||
If file-locking is enabled (it is enabled by default), then connections
|
||||
from multiple processes may also read and write the database file.
|
||||
However concurrency is reduced as follows:
|
||||
|
||||
* When a connection using asynchronous IO begins a database
|
||||
transaction, the database is locked immediately. However the
|
||||
lock is not released until after all relevant operations
|
||||
in the write-queue have been flushed to disk. This means
|
||||
(for example) that the database may remain locked for some
|
||||
time after a "COMMIT" or "ROLLBACK" is issued.
|
||||
|
||||
* If an application using asynchronous IO executes transactions
|
||||
in quick succession, other database users may be effectively
|
||||
locked out of the database. This is because when a BEGIN
|
||||
is executed, a database lock is established immediately. But
|
||||
when the corresponding COMMIT or ROLLBACK occurs, the lock
|
||||
is not released until the relevant part of the write-queue
|
||||
has been flushed through. As a result, if a COMMIT is followed
|
||||
by a BEGIN before the write-queue is flushed through, the database
|
||||
is never unlocked,preventing other processes from accessing
|
||||
the database.
|
||||
|
||||
File-locking may be disabled at runtime using the sqlite3async_control()
|
||||
API (see below). This may improve performance when an NFS or other
|
||||
network file-system, as the synchronous round-trips to the server be
|
||||
required to establish file locks are avoided. However, if multiple
|
||||
connections attempt to access the same database file when file-locking
|
||||
is disabled, application crashes and database corruption is a likely
|
||||
outcome.
|
||||
|
||||
|
||||
2. COMPILATION AND USAGE
|
||||
|
||||
The asynchronous IO extension consists of a single file of C code
|
||||
(sqlite3async.c), and a header file (sqlite3async.h) that defines the
|
||||
C API used by applications to activate and control the modules
|
||||
functionality.
|
||||
|
||||
To use the asynchronous IO extension, compile sqlite3async.c as
|
||||
part of the application that uses SQLite. Then use the API defined
|
||||
in sqlite3async.h to initialize and configure the module.
|
||||
|
||||
The asynchronous IO VFS API is described in detail in comments in
|
||||
sqlite3async.h. Using the API usually consists of the following steps:
|
||||
|
||||
1. Register the asynchronous IO VFS with SQLite by calling the
|
||||
sqlite3async_initialize() function.
|
||||
|
||||
2. Create a background thread to perform write operations and call
|
||||
sqlite3async_run().
|
||||
|
||||
3. Use the normal SQLite API to read and write to databases via
|
||||
the asynchronous IO VFS.
|
||||
|
||||
Refer to sqlite3async.h for details.
|
||||
|
||||
|
||||
3. PORTING
|
||||
|
||||
Currently the asynchronous IO extension is compatible with win32 systems
|
||||
and systems that support the pthreads interface, including Mac OSX, Linux,
|
||||
and other varieties of Unix.
|
||||
|
||||
To port the asynchronous IO extension to another platform, the user must
|
||||
implement mutex and condition variable primitives for the new platform.
|
||||
Currently there is no externally available interface to allow this, but
|
||||
modifying the code within sqlite3async.c to include the new platforms
|
||||
concurrency primitives is relatively easy. Search within sqlite3async.c
|
||||
for the comment string "PORTING FUNCTIONS" for details. Then implement
|
||||
new versions of each of the following:
|
||||
|
||||
static void async_mutex_enter(int eMutex);
|
||||
static void async_mutex_leave(int eMutex);
|
||||
static void async_cond_wait(int eCond, int eMutex);
|
||||
static void async_cond_signal(int eCond);
|
||||
static void async_sched_yield(void);
|
||||
|
||||
The functionality required of each of the above functions is described
|
||||
in comments in sqlite3async.c.
|
||||
1706
ext/async/sqlite3async.c
Normal file
1706
ext/async/sqlite3async.c
Normal file
File diff suppressed because it is too large
Load diff
222
ext/async/sqlite3async.h
Normal file
222
ext/async/sqlite3async.h
Normal file
|
|
@ -0,0 +1,222 @@
|
|||
|
||||
#ifndef __SQLITEASYNC_H_
|
||||
#define __SQLITEASYNC_H_ 1
|
||||
|
||||
/*
|
||||
** Make sure we can call this stuff from C++.
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define SQLITEASYNC_VFSNAME "sqlite3async"
|
||||
|
||||
/*
|
||||
** THREAD SAFETY NOTES:
|
||||
**
|
||||
** Of the four API functions in this file, the following are not threadsafe:
|
||||
**
|
||||
** sqlite3async_initialize()
|
||||
** sqlite3async_shutdown()
|
||||
**
|
||||
** Care must be taken that neither of these functions is called while
|
||||
** another thread may be calling either any sqlite3async_XXX() function
|
||||
** or an sqlite3_XXX() API function related to a database handle that
|
||||
** is using the asynchronous IO VFS.
|
||||
**
|
||||
** These functions:
|
||||
**
|
||||
** sqlite3async_run()
|
||||
** sqlite3async_control()
|
||||
**
|
||||
** are threadsafe. It is quite safe to call either of these functions even
|
||||
** if another thread may also be calling one of them or an sqlite3_XXX()
|
||||
** function related to a database handle that uses the asynchronous IO VFS.
|
||||
*/
|
||||
|
||||
/*
|
||||
** Initialize the asynchronous IO VFS and register it with SQLite using
|
||||
** sqlite3_vfs_register(). If the asynchronous VFS is already initialized
|
||||
** and registered, this function is a no-op. The asynchronous IO VFS
|
||||
** is registered as "sqlite3async".
|
||||
**
|
||||
** The asynchronous IO VFS does not make operating system IO requests
|
||||
** directly. Instead, it uses an existing VFS implementation for all
|
||||
** required file-system operations. If the first parameter to this function
|
||||
** is NULL, then the current default VFS is used for IO. If it is not
|
||||
** NULL, then it must be the name of an existing VFS. In other words, the
|
||||
** first argument to this function is passed to sqlite3_vfs_find() to
|
||||
** locate the VFS to use for all real IO operations. This VFS is known
|
||||
** as the "parent VFS".
|
||||
**
|
||||
** If the second parameter to this function is non-zero, then the
|
||||
** asynchronous IO VFS is registered as the default VFS for all SQLite
|
||||
** database connections within the process. Otherwise, the asynchronous IO
|
||||
** VFS is only used by connections opened using sqlite3_open_v2() that
|
||||
** specifically request VFS "sqlite3async".
|
||||
**
|
||||
** If a parent VFS cannot be located, then SQLITE_ERROR is returned.
|
||||
** In the unlikely event that operating system specific initialization
|
||||
** fails (win32 systems create the required critical section and event
|
||||
** objects within this function), then SQLITE_ERROR is also returned.
|
||||
** Finally, if the call to sqlite3_vfs_register() returns an error, then
|
||||
** the error code is returned to the user by this function. In all three
|
||||
** of these cases, intialization has failed and the asynchronous IO VFS
|
||||
** is not registered with SQLite.
|
||||
**
|
||||
** Otherwise, if no error occurs, SQLITE_OK is returned.
|
||||
*/
|
||||
int sqlite3async_initialize(const char *zParent, int isDefault);
|
||||
|
||||
/*
|
||||
** This function unregisters the asynchronous IO VFS using
|
||||
** sqlite3_vfs_unregister().
|
||||
**
|
||||
** On win32 platforms, this function also releases the small number of
|
||||
** critical section and event objects created by sqlite3async_initialize().
|
||||
*/
|
||||
void sqlite3async_shutdown(void);
|
||||
|
||||
/*
|
||||
** This function may only be called when the asynchronous IO VFS is
|
||||
** installed (after a call to sqlite3async_initialize()). It processes
|
||||
** zero or more queued write operations before returning. It is expected
|
||||
** (but not required) that this function will be called by a different
|
||||
** thread than those threads that use SQLite. The "background thread"
|
||||
** that performs IO.
|
||||
**
|
||||
** How many queued write operations are performed before returning
|
||||
** depends on the global setting configured by passing the SQLITEASYNC_HALT
|
||||
** verb to sqlite3async_control() (see below for details). By default
|
||||
** this function never returns - it processes all pending operations and
|
||||
** then blocks waiting for new ones.
|
||||
**
|
||||
** If multiple simultaneous calls are made to sqlite3async_run() from two
|
||||
** or more threads, then the calls are serialized internally.
|
||||
*/
|
||||
void sqlite3async_run(void);
|
||||
|
||||
/*
|
||||
** This function may only be called when the asynchronous IO VFS is
|
||||
** installed (after a call to sqlite3async_initialize()). It is used
|
||||
** to query or configure various parameters that affect the operation
|
||||
** of the asynchronous IO VFS. At present there are three parameters
|
||||
** supported:
|
||||
**
|
||||
** * The "halt" parameter, which configures the circumstances under
|
||||
** which the sqlite3async_run() parameter is configured.
|
||||
**
|
||||
** * The "delay" parameter. Setting the delay parameter to a non-zero
|
||||
** value causes the sqlite3async_run() function to sleep for the
|
||||
** configured number of milliseconds between each queued write
|
||||
** operation.
|
||||
**
|
||||
** * The "lockfiles" parameter. This parameter determines whether or
|
||||
** not the asynchronous IO VFS locks the database files it operates
|
||||
** on. Disabling file locking can improve throughput.
|
||||
**
|
||||
** This function is always passed two arguments. When setting the value
|
||||
** of a parameter, the first argument must be one of SQLITEASYNC_HALT,
|
||||
** SQLITEASYNC_DELAY or SQLITEASYNC_LOCKFILES. The second argument must
|
||||
** be passed the new value for the parameter as type "int".
|
||||
**
|
||||
** When querying the current value of a paramter, the first argument must
|
||||
** be one of SQLITEASYNC_GET_HALT, GET_DELAY or GET_LOCKFILES. The second
|
||||
** argument to this function must be of type (int *). The current value
|
||||
** of the queried parameter is copied to the memory pointed to by the
|
||||
** second argument. For example:
|
||||
**
|
||||
** int eCurrentHalt;
|
||||
** int eNewHalt = SQLITEASYNC_HALT_IDLE;
|
||||
**
|
||||
** sqlite3async_control(SQLITEASYNC_HALT, eNewHalt);
|
||||
** sqlite3async_control(SQLITEASYNC_GET_HALT, &eCurrentHalt);
|
||||
** assert( eNewHalt==eCurrentHalt );
|
||||
**
|
||||
** See below for more detail on each configuration parameter.
|
||||
**
|
||||
** SQLITEASYNC_HALT:
|
||||
**
|
||||
** This is used to set the value of the "halt" parameter. The second
|
||||
** argument must be one of the SQLITEASYNC_HALT_XXX symbols defined
|
||||
** below (either NEVER, IDLE and NOW).
|
||||
**
|
||||
** If the parameter is set to NEVER, then calls to sqlite3async_run()
|
||||
** never return. This is the default setting. If the parameter is set
|
||||
** to IDLE, then calls to sqlite3async_run() return as soon as the
|
||||
** queue of pending write operations is empty. If the parameter is set
|
||||
** to NOW, then calls to sqlite3async_run() return as quickly as
|
||||
** possible, without processing any pending write requests.
|
||||
**
|
||||
** If an attempt is made to set this parameter to an integer value other
|
||||
** than SQLITEASYNC_HALT_NEVER, IDLE or NOW, then sqlite3async_control()
|
||||
** returns SQLITE_MISUSE and the current value of the parameter is not
|
||||
** modified.
|
||||
**
|
||||
** Modifying the "halt" parameter affects calls to sqlite3async_run()
|
||||
** made by other threads that are currently in progress.
|
||||
**
|
||||
** SQLITEASYNC_DELAY:
|
||||
**
|
||||
** This is used to set the value of the "delay" parameter. If set to
|
||||
** a non-zero value, then after completing a pending write request, the
|
||||
** sqlite3async_run() function sleeps for the configured number of
|
||||
** milliseconds.
|
||||
**
|
||||
** If an attempt is made to set this parameter to a negative value,
|
||||
** sqlite3async_control() returns SQLITE_MISUSE and the current value
|
||||
** of the parameter is not modified.
|
||||
**
|
||||
** Modifying the "delay" parameter affects calls to sqlite3async_run()
|
||||
** made by other threads that are currently in progress.
|
||||
**
|
||||
** SQLITEASYNC_LOCKFILES:
|
||||
**
|
||||
** This is used to set the value of the "lockfiles" parameter. This
|
||||
** parameter must be set to either 0 or 1. If set to 1, then the
|
||||
** asynchronous IO VFS uses the xLock() and xUnlock() methods of the
|
||||
** parent VFS to lock database files being read and/or written. If
|
||||
** the parameter is set to 0, then these locks are omitted.
|
||||
**
|
||||
** This parameter may only be set when there are no open database
|
||||
** connections using the VFS and the queue of pending write requests
|
||||
** is empty. Attempting to set it when this is not true, or to set it
|
||||
** to a value other than 0 or 1 causes sqlite3async_control() to return
|
||||
** SQLITE_MISUSE and the value of the parameter to remain unchanged.
|
||||
**
|
||||
** If this parameter is set to zero, then it is only safe to access the
|
||||
** database via the asynchronous IO VFS from within a single process. If
|
||||
** while writing to the database via the asynchronous IO VFS the database
|
||||
** is also read or written from within another process, or via another
|
||||
** connection that does not use the asynchronous IO VFS within the same
|
||||
** process, the results are undefined (and may include crashes or database
|
||||
** corruption).
|
||||
**
|
||||
** Alternatively, if this parameter is set to 1, then it is safe to access
|
||||
** the database from multiple connections within multiple processes using
|
||||
** either the asynchronous IO VFS or the parent VFS directly.
|
||||
*/
|
||||
int sqlite3async_control(int op, ...);
|
||||
|
||||
/*
|
||||
** Values that can be used as the first argument to sqlite3async_control().
|
||||
*/
|
||||
#define SQLITEASYNC_HALT 1
|
||||
#define SQLITEASYNC_GET_HALT 2
|
||||
#define SQLITEASYNC_DELAY 3
|
||||
#define SQLITEASYNC_GET_DELAY 4
|
||||
#define SQLITEASYNC_LOCKFILES 5
|
||||
#define SQLITEASYNC_GET_LOCKFILES 6
|
||||
|
||||
/*
|
||||
** If the first argument to sqlite3async_control() is SQLITEASYNC_HALT,
|
||||
** the second argument should be one of the following.
|
||||
*/
|
||||
#define SQLITEASYNC_HALT_NEVER 0 /* Never halt (default value) */
|
||||
#define SQLITEASYNC_HALT_NOW 1 /* Halt as soon as possible */
|
||||
#define SQLITEASYNC_HALT_IDLE 2 /* Halt when write-queue is empty */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* End of the 'extern "C"' block */
|
||||
#endif
|
||||
#endif /* ifndef __SQLITEASYNC_H_ */
|
||||
83
ext/expert/README.md
Normal file
83
ext/expert/README.md
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
## SQLite Expert Extension
|
||||
|
||||
This folder contains code for a simple system to propose useful indexes
|
||||
given a database and a set of SQL queries. It works as follows:
|
||||
|
||||
1. The user database schema is copied to a temporary database.
|
||||
|
||||
1. All SQL queries are prepared against the temporary database.
|
||||
Information regarding the WHERE and ORDER BY clauses, and other query
|
||||
features that affect index selection are recorded.
|
||||
|
||||
1. The information gathered in step 2 is used to create candidate
|
||||
indexes - indexes that the planner might have made use of in the previous
|
||||
step, had they been available.
|
||||
|
||||
1. A subset of the data in the user database is used to generate statistics
|
||||
for all existing indexes and the candidate indexes generated in step 3
|
||||
above.
|
||||
|
||||
1. The SQL queries are prepared a second time. If the planner uses any
|
||||
of the indexes created in step 3, they are recommended to the user.
|
||||
|
||||
# C API
|
||||
|
||||
The SQLite expert C API is defined in sqlite3expert.h. Most uses will proceed
|
||||
as follows:
|
||||
|
||||
1. An sqlite3expert object is created by calling **sqlite3\_expert\_new()**.
|
||||
A database handle opened by the user is passed as an argument.
|
||||
|
||||
1. The sqlite3expert object is configured with one or more SQL statements
|
||||
by making one or more calls to **sqlite3\_expert\_sql()**. Each call may
|
||||
specify a single SQL statement, or multiple statements separated by
|
||||
semi-colons.
|
||||
|
||||
1. Optionally, the **sqlite3\_expert\_config()** API may be used to
|
||||
configure the size of the data subset used to generate index statistics.
|
||||
Using a smaller subset of the data can speed up the analysis.
|
||||
|
||||
1. **sqlite3\_expert\_analyze()** is called to run the analysis.
|
||||
|
||||
1. One or more calls are made to **sqlite3\_expert\_report()** to extract
|
||||
components of the results of the analysis.
|
||||
|
||||
1. **sqlite3\_expert\_destroy()** is called to free all resources.
|
||||
|
||||
Refer to comments in sqlite3expert.h for further details.
|
||||
|
||||
# sqlite3_expert application
|
||||
|
||||
The file "expert.c" contains the code for a command line application that
|
||||
uses the API described above. It can be compiled with (for example):
|
||||
|
||||
<pre>
|
||||
gcc -O2 sqlite3.c expert.c sqlite3expert.c -o sqlite3_expert
|
||||
</pre>
|
||||
|
||||
Assuming the database is named "test.db", it can then be run to analyze a
|
||||
single query:
|
||||
|
||||
<pre>
|
||||
./sqlite3_expert -sql <sql-query> test.db
|
||||
</pre>
|
||||
|
||||
Or an entire text file worth of queries with:
|
||||
|
||||
<pre>
|
||||
./sqlite3_expert -file <text-file> test.db
|
||||
</pre>
|
||||
|
||||
By default, sqlite3\_expert generates index statistics using all the data in
|
||||
the user database. For a large database, this may be prohibitively time
|
||||
consuming. The "-sample" option may be used to configure sqlite3\_expert to
|
||||
generate statistics based on an integer percentage of the user database as
|
||||
follows:
|
||||
|
||||
<pre>
|
||||
# Generate statistics based on 25% of the user database rows:
|
||||
./sqlite3_expert -sample 25 -sql <sql-query> test.db
|
||||
|
||||
# Do not generate any statistics at all:
|
||||
./sqlite3_expert -sample 0 -sql <sql-query> test.db
|
||||
</pre>
|
||||
156
ext/expert/expert.c
Normal file
156
ext/expert/expert.c
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
/*
|
||||
** 2017 April 07
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
*/
|
||||
|
||||
|
||||
#include <sqlite3.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "sqlite3expert.h"
|
||||
|
||||
|
||||
static void option_requires_argument(const char *zOpt){
|
||||
fprintf(stderr, "Option requires an argument: %s\n", zOpt);
|
||||
exit(-3);
|
||||
}
|
||||
|
||||
static int option_integer_arg(const char *zVal){
|
||||
return atoi(zVal);
|
||||
}
|
||||
|
||||
static void usage(char **argv){
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "Usage %s ?OPTIONS? DATABASE\n", argv[0]);
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "Options are:\n");
|
||||
fprintf(stderr, " -sql SQL (analyze SQL statements passed as argument)\n");
|
||||
fprintf(stderr, " -file FILE (read SQL statements from file FILE)\n");
|
||||
fprintf(stderr, " -verbose LEVEL (integer verbosity level. default 1)\n");
|
||||
fprintf(stderr, " -sample PERCENT (percent of db to sample. default 100)\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
static int readSqlFromFile(sqlite3expert *p, const char *zFile, char **pzErr){
|
||||
FILE *in = fopen(zFile, "rb");
|
||||
long nIn;
|
||||
size_t nRead;
|
||||
char *pBuf;
|
||||
int rc;
|
||||
if( in==0 ){
|
||||
*pzErr = sqlite3_mprintf("failed to open file %s\n", zFile);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
fseek(in, 0, SEEK_END);
|
||||
nIn = ftell(in);
|
||||
rewind(in);
|
||||
pBuf = sqlite3_malloc64( nIn+1 );
|
||||
nRead = fread(pBuf, nIn, 1, in);
|
||||
fclose(in);
|
||||
if( nRead!=1 ){
|
||||
sqlite3_free(pBuf);
|
||||
*pzErr = sqlite3_mprintf("failed to read file %s\n", zFile);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
pBuf[nIn] = 0;
|
||||
rc = sqlite3_expert_sql(p, pBuf, pzErr);
|
||||
sqlite3_free(pBuf);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv){
|
||||
const char *zDb;
|
||||
int rc = 0;
|
||||
char *zErr = 0;
|
||||
int i;
|
||||
int iVerbose = 1; /* -verbose option */
|
||||
|
||||
sqlite3 *db = 0;
|
||||
sqlite3expert *p = 0;
|
||||
|
||||
if( argc<2 ) usage(argv);
|
||||
zDb = argv[argc-1];
|
||||
if( zDb[0]=='-' ) usage(argv);
|
||||
rc = sqlite3_open(zDb, &db);
|
||||
if( rc!=SQLITE_OK ){
|
||||
fprintf(stderr, "Cannot open db file: %s - %s\n", zDb, sqlite3_errmsg(db));
|
||||
exit(-2);
|
||||
}
|
||||
|
||||
p = sqlite3_expert_new(db, &zErr);
|
||||
if( p==0 ){
|
||||
fprintf(stderr, "Cannot run analysis: %s\n", zErr);
|
||||
rc = 1;
|
||||
}else{
|
||||
for(i=1; i<(argc-1); i++){
|
||||
char *zArg = argv[i];
|
||||
int nArg;
|
||||
if( zArg[0]=='-' && zArg[1]=='-' && zArg[2]!=0 ) zArg++;
|
||||
nArg = (int)strlen(zArg);
|
||||
if( nArg>=2 && 0==sqlite3_strnicmp(zArg, "-file", nArg) ){
|
||||
if( ++i==(argc-1) ) option_requires_argument("-file");
|
||||
rc = readSqlFromFile(p, argv[i], &zErr);
|
||||
}
|
||||
|
||||
else if( nArg>=3 && 0==sqlite3_strnicmp(zArg, "-sql", nArg) ){
|
||||
if( ++i==(argc-1) ) option_requires_argument("-sql");
|
||||
rc = sqlite3_expert_sql(p, argv[i], &zErr);
|
||||
}
|
||||
|
||||
else if( nArg>=3 && 0==sqlite3_strnicmp(zArg, "-sample", nArg) ){
|
||||
int iSample;
|
||||
if( ++i==(argc-1) ) option_requires_argument("-sample");
|
||||
iSample = option_integer_arg(argv[i]);
|
||||
sqlite3_expert_config(p, EXPERT_CONFIG_SAMPLE, iSample);
|
||||
}
|
||||
|
||||
else if( nArg>=2 && 0==sqlite3_strnicmp(zArg, "-verbose", nArg) ){
|
||||
if( ++i==(argc-1) ) option_requires_argument("-verbose");
|
||||
iVerbose = option_integer_arg(argv[i]);
|
||||
}
|
||||
|
||||
else{
|
||||
usage(argv);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = sqlite3_expert_analyze(p, &zErr);
|
||||
}
|
||||
|
||||
if( rc==SQLITE_OK ){
|
||||
int nQuery = sqlite3_expert_count(p);
|
||||
if( iVerbose>0 ){
|
||||
const char *zCand = sqlite3_expert_report(p,0,EXPERT_REPORT_CANDIDATES);
|
||||
fprintf(stdout, "-- Candidates -------------------------------\n");
|
||||
fprintf(stdout, "%s\n", zCand);
|
||||
}
|
||||
for(i=0; i<nQuery; i++){
|
||||
const char *zSql = sqlite3_expert_report(p, i, EXPERT_REPORT_SQL);
|
||||
const char *zIdx = sqlite3_expert_report(p, i, EXPERT_REPORT_INDEXES);
|
||||
const char *zEQP = sqlite3_expert_report(p, i, EXPERT_REPORT_PLAN);
|
||||
if( zIdx==0 ) zIdx = "(no new indexes)\n";
|
||||
if( iVerbose>0 ){
|
||||
fprintf(stdout, "-- Query %d ----------------------------------\n",i+1);
|
||||
fprintf(stdout, "%s\n\n", zSql);
|
||||
}
|
||||
fprintf(stdout, "%s\n%s\n", zIdx, zEQP);
|
||||
}
|
||||
}else{
|
||||
fprintf(stderr, "Error: %s\n", zErr ? zErr : "?");
|
||||
}
|
||||
|
||||
sqlite3_expert_destroy(p);
|
||||
sqlite3_free(zErr);
|
||||
return rc;
|
||||
}
|
||||
457
ext/expert/expert1.test
Normal file
457
ext/expert/expert1.test
Normal file
|
|
@ -0,0 +1,457 @@
|
|||
# 2009 Nov 11
|
||||
#
|
||||
# The author disclaims copyright to this source code. In place of
|
||||
# a legal notice, here is a blessing:
|
||||
#
|
||||
# May you do good and not evil.
|
||||
# May you find forgiveness for yourself and forgive others.
|
||||
# May you share freely, never taking more than you give.
|
||||
#
|
||||
#***********************************************************************
|
||||
#
|
||||
# The focus of this file is testing the CLI shell tool. Specifically,
|
||||
# the ".recommend" command.
|
||||
#
|
||||
#
|
||||
|
||||
# Test plan:
|
||||
#
|
||||
#
|
||||
if {![info exists testdir]} {
|
||||
set testdir [file join [file dirname [info script]] .. .. test]
|
||||
}
|
||||
source $testdir/tester.tcl
|
||||
set testprefix expert1
|
||||
|
||||
if {[info commands sqlite3_expert_new]==""} {
|
||||
finish_test
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
set CLI [test_binary_name sqlite3]
|
||||
set CMD [test_binary_name sqlite3_expert]
|
||||
|
||||
proc squish {txt} {
|
||||
regsub -all {[[:space:]]+} $txt { }
|
||||
}
|
||||
|
||||
proc do_setup_rec_test {tn setup sql res} {
|
||||
reset_db
|
||||
if {[info exists ::set_main_db_name]} {
|
||||
dbconfig_maindbname_icecube db
|
||||
}
|
||||
db eval $setup
|
||||
uplevel [list do_rec_test $tn $sql $res]
|
||||
}
|
||||
|
||||
foreach {tn setup} {
|
||||
1 {
|
||||
if {![file executable $CMD]} { continue }
|
||||
|
||||
proc do_rec_test {tn sql res} {
|
||||
set res [squish [string trim $res]]
|
||||
set tst [subst -nocommands {
|
||||
squish [string trim [exec $::CMD -verbose 0 -sql {$sql;} test.db]]
|
||||
}]
|
||||
uplevel [list do_test $tn $tst $res]
|
||||
}
|
||||
}
|
||||
2 {
|
||||
if {[info commands sqlite3_expert_new]==""} { continue }
|
||||
|
||||
proc do_rec_test {tn sql res} {
|
||||
set expert [sqlite3_expert_new db]
|
||||
$expert sql $sql
|
||||
$expert analyze
|
||||
|
||||
set result [list]
|
||||
for {set i 0} {$i < [$expert count]} {incr i} {
|
||||
set idx [string trim [$expert report $i indexes]]
|
||||
if {$idx==""} {set idx "(no new indexes)"}
|
||||
lappend result $idx
|
||||
lappend result [string trim [$expert report $i plan]]
|
||||
}
|
||||
|
||||
$expert destroy
|
||||
|
||||
set tst [subst -nocommands {set {} [squish [join {$result}]]}]
|
||||
uplevel [list do_test $tn $tst [string trim [squish $res]]]
|
||||
}
|
||||
}
|
||||
3 {
|
||||
if {[info commands sqlite3_expert_new]==""} { continue }
|
||||
set ::set_main_db_name 1
|
||||
}
|
||||
4 {
|
||||
if {![file executable $CLI]} { continue }
|
||||
|
||||
proc do_rec_test {tn sql res} {
|
||||
set res [squish [string trim $res]]
|
||||
set tst [subst -nocommands {
|
||||
squish [string trim [exec $::CLI test.db ".expert" {$sql;}]]
|
||||
}]
|
||||
uplevel [list do_test $tn $tst $res]
|
||||
}
|
||||
}
|
||||
} {
|
||||
|
||||
eval $setup
|
||||
|
||||
|
||||
do_setup_rec_test $tn.1 { CREATE TABLE t1(a, b, c) } {
|
||||
SELECT * FROM t1
|
||||
} {
|
||||
(no new indexes)
|
||||
SCAN t1
|
||||
}
|
||||
|
||||
do_setup_rec_test $tn.2 {
|
||||
CREATE TABLE t1(a, b, c);
|
||||
} {
|
||||
SELECT * FROM t1 WHERE b>?;
|
||||
} {
|
||||
CREATE INDEX t1_idx_00000062 ON t1(b);
|
||||
SEARCH t1 USING INDEX t1_idx_00000062 (b>?)
|
||||
}
|
||||
|
||||
do_setup_rec_test $tn.3 {
|
||||
CREATE TABLE t1(a, b, c);
|
||||
} {
|
||||
SELECT * FROM t1 WHERE b COLLATE nocase BETWEEN ? AND ?
|
||||
} {
|
||||
CREATE INDEX t1_idx_3e094c27 ON t1(b COLLATE NOCASE);
|
||||
SEARCH t1 USING INDEX t1_idx_3e094c27 (b>? AND b<?)
|
||||
}
|
||||
|
||||
do_setup_rec_test $tn.4 {
|
||||
CREATE TABLE t1(a, b, c);
|
||||
} {
|
||||
SELECT a FROM t1 ORDER BY b;
|
||||
} {
|
||||
CREATE INDEX t1_idx_00000062 ON t1(b);
|
||||
SCAN t1 USING INDEX t1_idx_00000062
|
||||
}
|
||||
|
||||
do_setup_rec_test $tn.5 {
|
||||
CREATE TABLE t1(a, b, c);
|
||||
} {
|
||||
SELECT a FROM t1 WHERE a=? ORDER BY b;
|
||||
} {
|
||||
CREATE INDEX t1_idx_000123a7 ON t1(a, b);
|
||||
SEARCH t1 USING COVERING INDEX t1_idx_000123a7 (a=?)
|
||||
}
|
||||
|
||||
if 0 {
|
||||
do_setup_rec_test $tn.6 {
|
||||
CREATE TABLE t1(a, b, c);
|
||||
} {
|
||||
SELECT min(a) FROM t1
|
||||
} {
|
||||
CREATE INDEX t1_idx_00000061 ON t1(a);
|
||||
SEARCH t1 USING COVERING INDEX t1_idx_00000061
|
||||
}
|
||||
}
|
||||
|
||||
do_setup_rec_test $tn.7 {
|
||||
CREATE TABLE t1(a, b, c);
|
||||
} {
|
||||
SELECT * FROM t1 ORDER BY a, b, c;
|
||||
} {
|
||||
CREATE INDEX t1_idx_033e95fe ON t1(a, b, c);
|
||||
SCAN t1 USING COVERING INDEX t1_idx_033e95fe
|
||||
}
|
||||
|
||||
#do_setup_rec_test $tn.1.8 {
|
||||
# CREATE TABLE t1(a, b, c);
|
||||
#} {
|
||||
# SELECT * FROM t1 ORDER BY a ASC, b COLLATE nocase DESC, c ASC;
|
||||
#} {
|
||||
# CREATE INDEX t1_idx_5be6e222 ON t1(a, b COLLATE NOCASE DESC, c);
|
||||
# 0|0|0|SCAN t1 USING COVERING INDEX t1_idx_5be6e222
|
||||
#}
|
||||
|
||||
do_setup_rec_test $tn.8.1 {
|
||||
CREATE TABLE t1(a COLLATE NOCase, b, c);
|
||||
} {
|
||||
SELECT * FROM t1 WHERE a=?
|
||||
} {
|
||||
CREATE INDEX t1_idx_00000061 ON t1(a);
|
||||
SEARCH t1 USING INDEX t1_idx_00000061 (a=?)
|
||||
}
|
||||
do_setup_rec_test $tn.8.2 {
|
||||
CREATE TABLE t1(a, b COLLATE nocase, c);
|
||||
} {
|
||||
SELECT * FROM t1 ORDER BY a ASC, b DESC, c ASC;
|
||||
} {
|
||||
CREATE INDEX t1_idx_5cb97285 ON t1(a, b DESC, c);
|
||||
SCAN t1 USING COVERING INDEX t1_idx_5cb97285
|
||||
}
|
||||
|
||||
|
||||
# Tables with names that require quotes.
|
||||
#
|
||||
do_setup_rec_test $tn.9.1 {
|
||||
CREATE TABLE "t t"(a, b, c);
|
||||
} {
|
||||
SELECT * FROM "t t" WHERE a=?
|
||||
} {
|
||||
CREATE INDEX "t t_idx_00000061" ON "t t"(a);
|
||||
SEARCH t t USING INDEX t t_idx_00000061 (a=?)
|
||||
}
|
||||
|
||||
do_setup_rec_test $tn.9.2 {
|
||||
CREATE TABLE "t t"(a, b, c);
|
||||
} {
|
||||
SELECT * FROM "t t" WHERE b BETWEEN ? AND ?
|
||||
} {
|
||||
CREATE INDEX "t t_idx_00000062" ON "t t"(b);
|
||||
SEARCH t t USING INDEX t t_idx_00000062 (b>? AND b<?)
|
||||
}
|
||||
|
||||
# Columns with names that require quotes.
|
||||
#
|
||||
do_setup_rec_test $tn.10.1 {
|
||||
CREATE TABLE t3(a, "b b", c);
|
||||
} {
|
||||
SELECT * FROM t3 WHERE "b b" = ?
|
||||
} {
|
||||
CREATE INDEX t3_idx_00050c52 ON t3('b b');
|
||||
SEARCH t3 USING INDEX t3_idx_00050c52 (b b=?)
|
||||
}
|
||||
|
||||
do_setup_rec_test $tn.10.2 {
|
||||
CREATE TABLE t3(a, "b b", c);
|
||||
} {
|
||||
SELECT * FROM t3 ORDER BY "b b"
|
||||
} {
|
||||
CREATE INDEX t3_idx_00050c52 ON t3('b b');
|
||||
SCAN t3 USING INDEX t3_idx_00050c52
|
||||
}
|
||||
|
||||
# Transitive constraints
|
||||
#
|
||||
do_setup_rec_test $tn.11.1 {
|
||||
CREATE TABLE t5(a, b);
|
||||
CREATE TABLE t6(c, d);
|
||||
} {
|
||||
SELECT * FROM t5, t6 WHERE a=? AND b=c AND c=?
|
||||
} {
|
||||
CREATE INDEX t5_idx_000123a7 ON t5(a, b);
|
||||
CREATE INDEX t6_idx_00000063 ON t6(c);
|
||||
SEARCH t6 USING INDEX t6_idx_00000063 (c=?)
|
||||
SEARCH t5 USING COVERING INDEX t5_idx_000123a7 (a=? AND b=?)
|
||||
}
|
||||
|
||||
# OR terms.
|
||||
#
|
||||
do_setup_rec_test $tn.12.1 {
|
||||
CREATE TABLE t7(a, b);
|
||||
} {
|
||||
SELECT * FROM t7 WHERE a=? OR b=?
|
||||
} {
|
||||
CREATE INDEX t7_idx_00000062 ON t7(b);
|
||||
CREATE INDEX t7_idx_00000061 ON t7(a);
|
||||
MULTI-INDEX OR
|
||||
INDEX 1
|
||||
SEARCH t7 USING INDEX t7_idx_00000061 (a=?)
|
||||
INDEX 2
|
||||
SEARCH t7 USING INDEX t7_idx_00000062 (b=?)
|
||||
}
|
||||
|
||||
# rowid terms.
|
||||
#
|
||||
do_setup_rec_test $tn.13.1 {
|
||||
CREATE TABLE t8(a, b);
|
||||
} {
|
||||
SELECT * FROM t8 WHERE rowid=?
|
||||
} {
|
||||
(no new indexes)
|
||||
SEARCH t8 USING INTEGER PRIMARY KEY (rowid=?)
|
||||
}
|
||||
do_setup_rec_test $tn.13.2 {
|
||||
CREATE TABLE t8(a, b);
|
||||
} {
|
||||
SELECT * FROM t8 ORDER BY rowid
|
||||
} {
|
||||
(no new indexes)
|
||||
SCAN t8
|
||||
}
|
||||
do_setup_rec_test $tn.13.3 {
|
||||
CREATE TABLE t8(a, b);
|
||||
} {
|
||||
SELECT * FROM t8 WHERE a=? ORDER BY rowid
|
||||
} {
|
||||
CREATE INDEX t8_idx_00000061 ON t8(a);
|
||||
SEARCH t8 USING INDEX t8_idx_00000061 (a=?)
|
||||
}
|
||||
|
||||
# Triggers
|
||||
#
|
||||
do_setup_rec_test $tn.14 {
|
||||
CREATE TABLE t9(a, b, c);
|
||||
CREATE TABLE t10(a, b, c);
|
||||
CREATE TRIGGER t9t AFTER INSERT ON t9 BEGIN
|
||||
UPDATE t10 SET a=new.a WHERE b = new.b;
|
||||
END;
|
||||
} {
|
||||
INSERT INTO t9 VALUES(?, ?, ?);
|
||||
} {
|
||||
CREATE INDEX t10_idx_00000062 ON t10(b);
|
||||
SEARCH t10 USING INDEX t10_idx_00000062 (b=?)
|
||||
}
|
||||
|
||||
do_setup_rec_test $tn.15 {
|
||||
CREATE TABLE t1(a, b);
|
||||
CREATE TABLE t2(c, d);
|
||||
|
||||
WITH s(i) AS ( VALUES(1) UNION ALL SELECT i+1 FROM s WHERE i<100)
|
||||
INSERT INTO t1 SELECT (i-1)/50, (i-1)/20 FROM s;
|
||||
|
||||
WITH s(i) AS ( VALUES(1) UNION ALL SELECT i+1 FROM s WHERE i<100)
|
||||
INSERT INTO t2 SELECT (i-1)/20, (i-1)/5 FROM s;
|
||||
} {
|
||||
SELECT * FROM t2, t1 WHERE b=? AND d=? AND t2.rowid=t1.rowid
|
||||
} {
|
||||
CREATE INDEX t2_idx_00000064 ON t2(d);
|
||||
SEARCH t2 USING INDEX t2_idx_00000064 (d=?)
|
||||
SEARCH t1 USING INTEGER PRIMARY KEY (rowid=?)
|
||||
}
|
||||
|
||||
do_setup_rec_test $tn.16 {
|
||||
CREATE TABLE t1(a, b);
|
||||
} {
|
||||
SELECT * FROM t1 WHERE b IS NOT NULL;
|
||||
} {
|
||||
(no new indexes)
|
||||
SCAN t1
|
||||
}
|
||||
|
||||
do_setup_rec_test $tn.17.1 {
|
||||
CREATE TABLE example (A INTEGER, B INTEGER, C INTEGER, PRIMARY KEY (A,B));
|
||||
} {
|
||||
SELECT * FROM example WHERE a=?
|
||||
} {
|
||||
(no new indexes)
|
||||
SEARCH example USING INDEX sqlite_autoindex_example_1 (A=?)
|
||||
}
|
||||
do_setup_rec_test $tn.17.2 {
|
||||
CREATE TABLE example (A INTEGER, B INTEGER, C INTEGER, PRIMARY KEY (A,B));
|
||||
} {
|
||||
SELECT * FROM example WHERE b=?
|
||||
} {
|
||||
CREATE INDEX example_idx_00000042 ON example(B);
|
||||
SEARCH example USING INDEX example_idx_00000042 (B=?)
|
||||
}
|
||||
do_setup_rec_test $tn.17.3 {
|
||||
CREATE TABLE example (A INTEGER, B INTEGER, C INTEGER, PRIMARY KEY (A,B));
|
||||
} {
|
||||
SELECT * FROM example WHERE a=? AND b=?
|
||||
} {
|
||||
(no new indexes)
|
||||
SEARCH example USING INDEX sqlite_autoindex_example_1 (A=? AND B=?)
|
||||
}
|
||||
do_setup_rec_test $tn.17.4 {
|
||||
CREATE TABLE example (A INTEGER, B INTEGER, C INTEGER, PRIMARY KEY (A,B));
|
||||
} {
|
||||
SELECT * FROM example WHERE a=? AND b>?
|
||||
} {
|
||||
(no new indexes)
|
||||
SEARCH example USING INDEX sqlite_autoindex_example_1 (A=? AND B>?)
|
||||
}
|
||||
do_setup_rec_test $tn.17.5 {
|
||||
CREATE TABLE example (A INTEGER, B INTEGER, C INTEGER, PRIMARY KEY (A,B));
|
||||
} {
|
||||
SELECT * FROM example WHERE a>? AND b=?
|
||||
} {
|
||||
CREATE INDEX example_idx_0000cb3f ON example(B, A);
|
||||
SEARCH example USING INDEX example_idx_0000cb3f (B=? AND A>?)
|
||||
}
|
||||
|
||||
do_setup_rec_test $tn.18.0 {
|
||||
CREATE TABLE SomeObject (
|
||||
a INTEGER PRIMARY KEY,
|
||||
x TEXT GENERATED ALWAYS AS(HEX(a)) VIRTUAL
|
||||
);
|
||||
} {
|
||||
SELECT x FROM SomeObject;
|
||||
} {
|
||||
(no new indexes)
|
||||
SCAN SomeObject
|
||||
}
|
||||
do_setup_rec_test $tn.18.1 {
|
||||
CREATE TABLE SomeObject (
|
||||
a INTEGER PRIMARY KEY,
|
||||
x TEXT GENERATED ALWAYS AS(HEX(a)) VIRTUAL
|
||||
);
|
||||
} {
|
||||
SELECT * FROM SomeObject WHERE x=?;
|
||||
} {
|
||||
CREATE INDEX SomeObject_idx_00000078 ON SomeObject(x);
|
||||
SEARCH SomeObject USING COVERING INDEX SomeObject_idx_00000078 (x=?)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
proc do_candidates_test {tn sql res} {
|
||||
set res [squish [string trim $res]]
|
||||
|
||||
set expert [sqlite3_expert_new db]
|
||||
$expert sql $sql
|
||||
$expert analyze
|
||||
|
||||
set candidates [squish [string trim [$expert report 0 candidates]]]
|
||||
$expert destroy
|
||||
|
||||
uplevel [list do_test $tn [list set {} $candidates] $res]
|
||||
}
|
||||
|
||||
|
||||
reset_db
|
||||
do_execsql_test 5.0 {
|
||||
CREATE TABLE t1(a, b);
|
||||
CREATE TABLE t2(c, d);
|
||||
|
||||
WITH s(i) AS ( VALUES(1) UNION ALL SELECT i+1 FROM s WHERE i<100)
|
||||
INSERT INTO t1 SELECT (i-1)/50, (i-1)/20 FROM s;
|
||||
|
||||
WITH s(i) AS ( VALUES(1) UNION ALL SELECT i+1 FROM s WHERE i<100)
|
||||
INSERT INTO t2 SELECT (i-1)/20, (i-1)/5 FROM s;
|
||||
}
|
||||
do_candidates_test 5.1 {
|
||||
SELECT * FROM t1,t2 WHERE (b=? OR a=?) AND (c=? OR d=?)
|
||||
} {
|
||||
CREATE INDEX t1_idx_00000062 ON t1(b); -- stat1: 100 20
|
||||
CREATE INDEX t1_idx_00000061 ON t1(a); -- stat1: 100 50
|
||||
CREATE INDEX t2_idx_00000063 ON t2(c); -- stat1: 100 20
|
||||
CREATE INDEX t2_idx_00000064 ON t2(d); -- stat1: 100 5
|
||||
}
|
||||
|
||||
do_candidates_test 5.2 {
|
||||
SELECT * FROM t1,t2 WHERE a=? AND b=? AND c=? AND d=?
|
||||
} {
|
||||
CREATE INDEX t1_idx_000123a7 ON t1(a, b); -- stat1: 100 50 17
|
||||
CREATE INDEX t2_idx_0001295b ON t2(c, d); -- stat1: 100 20 5
|
||||
}
|
||||
|
||||
do_execsql_test 5.3 {
|
||||
CREATE INDEX t1_idx_00000061 ON t1(a); -- stat1: 100 50
|
||||
CREATE INDEX t1_idx_00000062 ON t1(b); -- stat1: 100 20
|
||||
CREATE INDEX t1_idx_000123a7 ON t1(a, b); -- stat1: 100 50 16
|
||||
|
||||
CREATE INDEX t2_idx_00000063 ON t2(c); -- stat1: 100 20
|
||||
CREATE INDEX t2_idx_00000064 ON t2(d); -- stat1: 100 5
|
||||
CREATE INDEX t2_idx_0001295b ON t2(c, d); -- stat1: 100 20 5
|
||||
|
||||
ANALYZE;
|
||||
SELECT * FROM sqlite_stat1 ORDER BY 1, 2;
|
||||
} {
|
||||
t1 t1_idx_00000061 {100 50}
|
||||
t1 t1_idx_00000062 {100 20}
|
||||
t1 t1_idx_000123a7 {100 50 17}
|
||||
t2 t2_idx_00000063 {100 20}
|
||||
t2 t2_idx_00000064 {100 5}
|
||||
t2 t2_idx_0001295b {100 20 5}
|
||||
}
|
||||
|
||||
finish_test
|
||||
2027
ext/expert/sqlite3expert.c
Normal file
2027
ext/expert/sqlite3expert.c
Normal file
File diff suppressed because it is too large
Load diff
168
ext/expert/sqlite3expert.h
Normal file
168
ext/expert/sqlite3expert.h
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
/*
|
||||
** 2017 April 07
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
*/
|
||||
#if !defined(SQLITEEXPERT_H)
|
||||
#define SQLITEEXPERT_H 1
|
||||
#include "sqlite3.h"
|
||||
|
||||
typedef struct sqlite3expert sqlite3expert;
|
||||
|
||||
/*
|
||||
** Create a new sqlite3expert object.
|
||||
**
|
||||
** If successful, a pointer to the new object is returned and (*pzErr) set
|
||||
** to NULL. Or, if an error occurs, NULL is returned and (*pzErr) set to
|
||||
** an English-language error message. In this case it is the responsibility
|
||||
** of the caller to eventually free the error message buffer using
|
||||
** sqlite3_free().
|
||||
*/
|
||||
sqlite3expert *sqlite3_expert_new(sqlite3 *db, char **pzErr);
|
||||
|
||||
/*
|
||||
** Configure an sqlite3expert object.
|
||||
**
|
||||
** EXPERT_CONFIG_SAMPLE:
|
||||
** By default, sqlite3_expert_analyze() generates sqlite_stat1 data for
|
||||
** each candidate index. This involves scanning and sorting the entire
|
||||
** contents of each user database table once for each candidate index
|
||||
** associated with the table. For large databases, this can be
|
||||
** prohibitively slow. This option allows the sqlite3expert object to
|
||||
** be configured so that sqlite_stat1 data is instead generated based on a
|
||||
** subset of each table, or so that no sqlite_stat1 data is used at all.
|
||||
**
|
||||
** A single integer argument is passed to this option. If the value is less
|
||||
** than or equal to zero, then no sqlite_stat1 data is generated or used by
|
||||
** the analysis - indexes are recommended based on the database schema only.
|
||||
** Or, if the value is 100 or greater, complete sqlite_stat1 data is
|
||||
** generated for each candidate index (this is the default). Finally, if the
|
||||
** value falls between 0 and 100, then it represents the percentage of user
|
||||
** table rows that should be considered when generating sqlite_stat1 data.
|
||||
**
|
||||
** Examples:
|
||||
**
|
||||
** // Do not generate any sqlite_stat1 data
|
||||
** sqlite3_expert_config(pExpert, EXPERT_CONFIG_SAMPLE, 0);
|
||||
**
|
||||
** // Generate sqlite_stat1 data based on 10% of the rows in each table.
|
||||
** sqlite3_expert_config(pExpert, EXPERT_CONFIG_SAMPLE, 10);
|
||||
*/
|
||||
int sqlite3_expert_config(sqlite3expert *p, int op, ...);
|
||||
|
||||
#define EXPERT_CONFIG_SAMPLE 1 /* int */
|
||||
|
||||
/*
|
||||
** Specify zero or more SQL statements to be included in the analysis.
|
||||
**
|
||||
** Buffer zSql must contain zero or more complete SQL statements. This
|
||||
** function parses all statements contained in the buffer and adds them
|
||||
** to the internal list of statements to analyze. If successful, SQLITE_OK
|
||||
** is returned and (*pzErr) set to NULL. Or, if an error occurs - for example
|
||||
** due to a error in the SQL - an SQLite error code is returned and (*pzErr)
|
||||
** may be set to point to an English language error message. In this case
|
||||
** the caller is responsible for eventually freeing the error message buffer
|
||||
** using sqlite3_free().
|
||||
**
|
||||
** If an error does occur while processing one of the statements in the
|
||||
** buffer passed as the second argument, none of the statements in the
|
||||
** buffer are added to the analysis.
|
||||
**
|
||||
** This function must be called before sqlite3_expert_analyze(). If a call
|
||||
** to this function is made on an sqlite3expert object that has already
|
||||
** been passed to sqlite3_expert_analyze() SQLITE_MISUSE is returned
|
||||
** immediately and no statements are added to the analysis.
|
||||
*/
|
||||
int sqlite3_expert_sql(
|
||||
sqlite3expert *p, /* From a successful sqlite3_expert_new() */
|
||||
const char *zSql, /* SQL statement(s) to add */
|
||||
char **pzErr /* OUT: Error message (if any) */
|
||||
);
|
||||
|
||||
|
||||
/*
|
||||
** This function is called after the sqlite3expert object has been configured
|
||||
** with all SQL statements using sqlite3_expert_sql() to actually perform
|
||||
** the analysis. Once this function has been called, it is not possible to
|
||||
** add further SQL statements to the analysis.
|
||||
**
|
||||
** If successful, SQLITE_OK is returned and (*pzErr) is set to NULL. Or, if
|
||||
** an error occurs, an SQLite error code is returned and (*pzErr) set to
|
||||
** point to a buffer containing an English language error message. In this
|
||||
** case it is the responsibility of the caller to eventually free the buffer
|
||||
** using sqlite3_free().
|
||||
**
|
||||
** If an error does occur within this function, the sqlite3expert object
|
||||
** is no longer useful for any purpose. At that point it is no longer
|
||||
** possible to add further SQL statements to the object or to re-attempt
|
||||
** the analysis. The sqlite3expert object must still be freed using a call
|
||||
** sqlite3_expert_destroy().
|
||||
*/
|
||||
int sqlite3_expert_analyze(sqlite3expert *p, char **pzErr);
|
||||
|
||||
/*
|
||||
** Return the total number of statements loaded using sqlite3_expert_sql().
|
||||
** The total number of SQL statements may be different from the total number
|
||||
** to calls to sqlite3_expert_sql().
|
||||
*/
|
||||
int sqlite3_expert_count(sqlite3expert*);
|
||||
|
||||
/*
|
||||
** Return a component of the report.
|
||||
**
|
||||
** This function is called after sqlite3_expert_analyze() to extract the
|
||||
** results of the analysis. Each call to this function returns either a
|
||||
** NULL pointer or a pointer to a buffer containing a nul-terminated string.
|
||||
** The value passed as the third argument must be one of the EXPERT_REPORT_*
|
||||
** #define constants defined below.
|
||||
**
|
||||
** For some EXPERT_REPORT_* parameters, the buffer returned contains
|
||||
** information relating to a specific SQL statement. In these cases that
|
||||
** SQL statement is identified by the value passed as the second argument.
|
||||
** SQL statements are numbered from 0 in the order in which they are parsed.
|
||||
** If an out-of-range value (less than zero or equal to or greater than the
|
||||
** value returned by sqlite3_expert_count()) is passed as the second argument
|
||||
** along with such an EXPERT_REPORT_* parameter, NULL is always returned.
|
||||
**
|
||||
** EXPERT_REPORT_SQL:
|
||||
** Return the text of SQL statement iStmt.
|
||||
**
|
||||
** EXPERT_REPORT_INDEXES:
|
||||
** Return a buffer containing the CREATE INDEX statements for all recommended
|
||||
** indexes for statement iStmt. If there are no new recommeded indexes, NULL
|
||||
** is returned.
|
||||
**
|
||||
** EXPERT_REPORT_PLAN:
|
||||
** Return a buffer containing the EXPLAIN QUERY PLAN output for SQL query
|
||||
** iStmt after the proposed indexes have been added to the database schema.
|
||||
**
|
||||
** EXPERT_REPORT_CANDIDATES:
|
||||
** Return a pointer to a buffer containing the CREATE INDEX statements
|
||||
** for all indexes that were tested (for all SQL statements). The iStmt
|
||||
** parameter is ignored for EXPERT_REPORT_CANDIDATES calls.
|
||||
*/
|
||||
const char *sqlite3_expert_report(sqlite3expert*, int iStmt, int eReport);
|
||||
|
||||
/*
|
||||
** Values for the third argument passed to sqlite3_expert_report().
|
||||
*/
|
||||
#define EXPERT_REPORT_SQL 1
|
||||
#define EXPERT_REPORT_INDEXES 2
|
||||
#define EXPERT_REPORT_PLAN 3
|
||||
#define EXPERT_REPORT_CANDIDATES 4
|
||||
|
||||
/*
|
||||
** Free an (sqlite3expert*) handle and all associated resources. There
|
||||
** should be one call to this function for each successful call to
|
||||
** sqlite3-expert_new().
|
||||
*/
|
||||
void sqlite3_expert_destroy(sqlite3expert*);
|
||||
|
||||
#endif /* !defined(SQLITEEXPERT_H) */
|
||||
220
ext/expert/test_expert.c
Normal file
220
ext/expert/test_expert.c
Normal file
|
|
@ -0,0 +1,220 @@
|
|||
/*
|
||||
** 2017 April 07
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
*/
|
||||
|
||||
#if defined(SQLITE_TEST)
|
||||
|
||||
#include "sqlite3expert.h"
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#if defined(INCLUDE_SQLITE_TCL_H)
|
||||
# include "sqlite_tcl.h"
|
||||
#else
|
||||
# include "tcl.h"
|
||||
# ifndef SQLITE_TCLAPI
|
||||
# define SQLITE_TCLAPI
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef SQLITE_OMIT_VIRTUALTABLE
|
||||
|
||||
/*
|
||||
** Extract an sqlite3* db handle from the object passed as the second
|
||||
** argument. If successful, set *pDb to point to the db handle and return
|
||||
** TCL_OK. Otherwise, return TCL_ERROR.
|
||||
*/
|
||||
static int dbHandleFromObj(Tcl_Interp *interp, Tcl_Obj *pObj, sqlite3 **pDb){
|
||||
Tcl_CmdInfo info;
|
||||
if( 0==Tcl_GetCommandInfo(interp, Tcl_GetString(pObj), &info) ){
|
||||
Tcl_AppendResult(interp, "no such handle: ", Tcl_GetString(pObj), 0);
|
||||
return TCL_ERROR;
|
||||
}
|
||||
|
||||
*pDb = *(sqlite3 **)info.objClientData;
|
||||
return TCL_OK;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Tclcmd: $expert sql SQL
|
||||
** $expert analyze
|
||||
** $expert count
|
||||
** $expert report STMT EREPORT
|
||||
** $expert destroy
|
||||
*/
|
||||
static int SQLITE_TCLAPI testExpertCmd(
|
||||
void *clientData,
|
||||
Tcl_Interp *interp,
|
||||
int objc,
|
||||
Tcl_Obj *CONST objv[]
|
||||
){
|
||||
sqlite3expert *pExpert = (sqlite3expert*)clientData;
|
||||
struct Subcmd {
|
||||
const char *zSub;
|
||||
int nArg;
|
||||
const char *zMsg;
|
||||
} aSub[] = {
|
||||
{ "sql", 1, "TABLE", }, /* 0 */
|
||||
{ "analyze", 0, "", }, /* 1 */
|
||||
{ "count", 0, "", }, /* 2 */
|
||||
{ "report", 2, "STMT EREPORT", }, /* 3 */
|
||||
{ "destroy", 0, "", }, /* 4 */
|
||||
{ 0 }
|
||||
};
|
||||
int iSub;
|
||||
int rc = TCL_OK;
|
||||
char *zErr = 0;
|
||||
|
||||
if( objc<2 ){
|
||||
Tcl_WrongNumArgs(interp, 1, objv, "SUBCOMMAND ...");
|
||||
return TCL_ERROR;
|
||||
}
|
||||
rc = Tcl_GetIndexFromObjStruct(interp,
|
||||
objv[1], aSub, sizeof(aSub[0]), "sub-command", 0, &iSub
|
||||
);
|
||||
if( rc!=TCL_OK ) return rc;
|
||||
if( objc!=2+aSub[iSub].nArg ){
|
||||
Tcl_WrongNumArgs(interp, 2, objv, aSub[iSub].zMsg);
|
||||
return TCL_ERROR;
|
||||
}
|
||||
|
||||
switch( iSub ){
|
||||
case 0: { /* sql */
|
||||
char *zArg = Tcl_GetString(objv[2]);
|
||||
rc = sqlite3_expert_sql(pExpert, zArg, &zErr);
|
||||
break;
|
||||
}
|
||||
|
||||
case 1: { /* analyze */
|
||||
rc = sqlite3_expert_analyze(pExpert, &zErr);
|
||||
break;
|
||||
}
|
||||
|
||||
case 2: { /* count */
|
||||
int n = sqlite3_expert_count(pExpert);
|
||||
Tcl_SetObjResult(interp, Tcl_NewIntObj(n));
|
||||
break;
|
||||
}
|
||||
|
||||
case 3: { /* report */
|
||||
const char *aEnum[] = {
|
||||
"sql", "indexes", "plan", "candidates", 0
|
||||
};
|
||||
int iEnum;
|
||||
int iStmt;
|
||||
const char *zReport;
|
||||
|
||||
if( Tcl_GetIntFromObj(interp, objv[2], &iStmt)
|
||||
|| Tcl_GetIndexFromObj(interp, objv[3], aEnum, "report", 0, &iEnum)
|
||||
){
|
||||
return TCL_ERROR;
|
||||
}
|
||||
|
||||
assert( EXPERT_REPORT_SQL==1 );
|
||||
assert( EXPERT_REPORT_INDEXES==2 );
|
||||
assert( EXPERT_REPORT_PLAN==3 );
|
||||
assert( EXPERT_REPORT_CANDIDATES==4 );
|
||||
zReport = sqlite3_expert_report(pExpert, iStmt, 1+iEnum);
|
||||
Tcl_SetObjResult(interp, Tcl_NewStringObj(zReport, -1));
|
||||
break;
|
||||
}
|
||||
|
||||
default: /* destroy */
|
||||
assert( iSub==4 );
|
||||
Tcl_DeleteCommand(interp, Tcl_GetString(objv[0]));
|
||||
break;
|
||||
}
|
||||
|
||||
if( rc!=TCL_OK ){
|
||||
if( zErr ){
|
||||
Tcl_SetObjResult(interp, Tcl_NewStringObj(zErr, -1));
|
||||
}else{
|
||||
extern const char *sqlite3ErrName(int);
|
||||
Tcl_SetObjResult(interp, Tcl_NewStringObj(sqlite3ErrName(rc), -1));
|
||||
}
|
||||
}
|
||||
sqlite3_free(zErr);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void SQLITE_TCLAPI testExpertDel(void *clientData){
|
||||
sqlite3expert *pExpert = (sqlite3expert*)clientData;
|
||||
sqlite3_expert_destroy(pExpert);
|
||||
}
|
||||
|
||||
/*
|
||||
** sqlite3_expert_new DB
|
||||
*/
|
||||
static int SQLITE_TCLAPI test_sqlite3_expert_new(
|
||||
void * clientData,
|
||||
Tcl_Interp *interp,
|
||||
int objc,
|
||||
Tcl_Obj *CONST objv[]
|
||||
){
|
||||
static int iCmd = 0;
|
||||
sqlite3 *db;
|
||||
char *zCmd = 0;
|
||||
char *zErr = 0;
|
||||
sqlite3expert *pExpert;
|
||||
int rc = TCL_OK;
|
||||
|
||||
if( objc!=2 ){
|
||||
Tcl_WrongNumArgs(interp, 1, objv, "DB");
|
||||
return TCL_ERROR;
|
||||
}
|
||||
if( dbHandleFromObj(interp, objv[1], &db) ){
|
||||
return TCL_ERROR;
|
||||
}
|
||||
|
||||
zCmd = sqlite3_mprintf("sqlite3expert%d", ++iCmd);
|
||||
if( zCmd==0 ){
|
||||
Tcl_AppendResult(interp, "out of memory", (char*)0);
|
||||
return TCL_ERROR;
|
||||
}
|
||||
|
||||
pExpert = sqlite3_expert_new(db, &zErr);
|
||||
if( pExpert==0 ){
|
||||
Tcl_AppendResult(interp, zErr, (char*)0);
|
||||
rc = TCL_ERROR;
|
||||
}else{
|
||||
void *p = (void*)pExpert;
|
||||
Tcl_CreateObjCommand(interp, zCmd, testExpertCmd, p, testExpertDel);
|
||||
Tcl_SetObjResult(interp, Tcl_NewStringObj(zCmd, -1));
|
||||
}
|
||||
|
||||
sqlite3_free(zCmd);
|
||||
sqlite3_free(zErr);
|
||||
return rc;
|
||||
}
|
||||
|
||||
#endif /* ifndef SQLITE_OMIT_VIRTUALTABLE */
|
||||
|
||||
int TestExpert_Init(Tcl_Interp *interp){
|
||||
#ifndef SQLITE_OMIT_VIRTUALTABLE
|
||||
struct Cmd {
|
||||
const char *zCmd;
|
||||
Tcl_ObjCmdProc *xProc;
|
||||
} aCmd[] = {
|
||||
{ "sqlite3_expert_new", test_sqlite3_expert_new },
|
||||
};
|
||||
int i;
|
||||
|
||||
for(i=0; i<sizeof(aCmd)/sizeof(struct Cmd); i++){
|
||||
struct Cmd *p = &aCmd[i];
|
||||
Tcl_CreateObjCommand(interp, p->zCmd, p->xProc, 0, 0);
|
||||
}
|
||||
#endif
|
||||
return TCL_OK;
|
||||
}
|
||||
|
||||
#endif
|
||||
2
ext/fts1/README.txt
Normal file
2
ext/fts1/README.txt
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
This folder contains source code to the first full-text search
|
||||
extension for SQLite.
|
||||
404
ext/fts1/ft_hash.c
Normal file
404
ext/fts1/ft_hash.c
Normal file
|
|
@ -0,0 +1,404 @@
|
|||
/*
|
||||
** 2001 September 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This is the implementation of generic hash-tables used in SQLite.
|
||||
** We've modified it slightly to serve as a standalone hash table
|
||||
** implementation for the full-text indexing module.
|
||||
*/
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "ft_hash.h"
|
||||
|
||||
void *malloc_and_zero(int n){
|
||||
void *p = malloc(n);
|
||||
if( p ){
|
||||
memset(p, 0, n);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Turn bulk memory into a hash table object by initializing the
|
||||
** fields of the Hash structure.
|
||||
**
|
||||
** "pNew" is a pointer to the hash table that is to be initialized.
|
||||
** keyClass is one of the constants HASH_INT, HASH_POINTER,
|
||||
** HASH_BINARY, or HASH_STRING. The value of keyClass
|
||||
** determines what kind of key the hash table will use. "copyKey" is
|
||||
** true if the hash table should make its own private copy of keys and
|
||||
** false if it should just use the supplied pointer. CopyKey only makes
|
||||
** sense for HASH_STRING and HASH_BINARY and is ignored
|
||||
** for other key classes.
|
||||
*/
|
||||
void HashInit(Hash *pNew, int keyClass, int copyKey){
|
||||
assert( pNew!=0 );
|
||||
assert( keyClass>=HASH_STRING && keyClass<=HASH_BINARY );
|
||||
pNew->keyClass = keyClass;
|
||||
#if 0
|
||||
if( keyClass==HASH_POINTER || keyClass==HASH_INT ) copyKey = 0;
|
||||
#endif
|
||||
pNew->copyKey = copyKey;
|
||||
pNew->first = 0;
|
||||
pNew->count = 0;
|
||||
pNew->htsize = 0;
|
||||
pNew->ht = 0;
|
||||
pNew->xMalloc = malloc_and_zero;
|
||||
pNew->xFree = free;
|
||||
}
|
||||
|
||||
/* Remove all entries from a hash table. Reclaim all memory.
|
||||
** Call this routine to delete a hash table or to reset a hash table
|
||||
** to the empty state.
|
||||
*/
|
||||
void HashClear(Hash *pH){
|
||||
HashElem *elem; /* For looping over all elements of the table */
|
||||
|
||||
assert( pH!=0 );
|
||||
elem = pH->first;
|
||||
pH->first = 0;
|
||||
if( pH->ht ) pH->xFree(pH->ht);
|
||||
pH->ht = 0;
|
||||
pH->htsize = 0;
|
||||
while( elem ){
|
||||
HashElem *next_elem = elem->next;
|
||||
if( pH->copyKey && elem->pKey ){
|
||||
pH->xFree(elem->pKey);
|
||||
}
|
||||
pH->xFree(elem);
|
||||
elem = next_elem;
|
||||
}
|
||||
pH->count = 0;
|
||||
}
|
||||
|
||||
#if 0 /* NOT USED */
|
||||
/*
|
||||
** Hash and comparison functions when the mode is HASH_INT
|
||||
*/
|
||||
static int intHash(const void *pKey, int nKey){
|
||||
return nKey ^ (nKey<<8) ^ (nKey>>8);
|
||||
}
|
||||
static int intCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
return n2 - n1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0 /* NOT USED */
|
||||
/*
|
||||
** Hash and comparison functions when the mode is HASH_POINTER
|
||||
*/
|
||||
static int ptrHash(const void *pKey, int nKey){
|
||||
uptr x = Addr(pKey);
|
||||
return x ^ (x<<8) ^ (x>>8);
|
||||
}
|
||||
static int ptrCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( pKey1==pKey2 ) return 0;
|
||||
if( pKey1<pKey2 ) return -1;
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Hash and comparison functions when the mode is HASH_STRING
|
||||
*/
|
||||
static int strHash(const void *pKey, int nKey){
|
||||
const char *z = (const char *)pKey;
|
||||
int h = 0;
|
||||
if( nKey<=0 ) nKey = (int) strlen(z);
|
||||
while( nKey > 0 ){
|
||||
h = (h<<3) ^ h ^ *z++;
|
||||
nKey--;
|
||||
}
|
||||
return h & 0x7fffffff;
|
||||
}
|
||||
static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( n1!=n2 ) return 1;
|
||||
return strncmp((const char*)pKey1,(const char*)pKey2,n1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Hash and comparison functions when the mode is HASH_BINARY
|
||||
*/
|
||||
static int binHash(const void *pKey, int nKey){
|
||||
int h = 0;
|
||||
const char *z = (const char *)pKey;
|
||||
while( nKey-- > 0 ){
|
||||
h = (h<<3) ^ h ^ *(z++);
|
||||
}
|
||||
return h & 0x7fffffff;
|
||||
}
|
||||
static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( n1!=n2 ) return 1;
|
||||
return memcmp(pKey1,pKey2,n1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a pointer to the appropriate hash function given the key class.
|
||||
**
|
||||
** The C syntax in this function definition may be unfamilar to some
|
||||
** programmers, so we provide the following additional explanation:
|
||||
**
|
||||
** The name of the function is "hashFunction". The function takes a
|
||||
** single parameter "keyClass". The return value of hashFunction()
|
||||
** is a pointer to another function. Specifically, the return value
|
||||
** of hashFunction() is a pointer to a function that takes two parameters
|
||||
** with types "const void*" and "int" and returns an "int".
|
||||
*/
|
||||
static int (*hashFunction(int keyClass))(const void*,int){
|
||||
#if 0 /* HASH_INT and HASH_POINTER are never used */
|
||||
switch( keyClass ){
|
||||
case HASH_INT: return &intHash;
|
||||
case HASH_POINTER: return &ptrHash;
|
||||
case HASH_STRING: return &strHash;
|
||||
case HASH_BINARY: return &binHash;;
|
||||
default: break;
|
||||
}
|
||||
return 0;
|
||||
#else
|
||||
if( keyClass==HASH_STRING ){
|
||||
return &strHash;
|
||||
}else{
|
||||
assert( keyClass==HASH_BINARY );
|
||||
return &binHash;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a pointer to the appropriate hash function given the key class.
|
||||
**
|
||||
** For help in interpreted the obscure C code in the function definition,
|
||||
** see the header comment on the previous function.
|
||||
*/
|
||||
static int (*compareFunction(int keyClass))(const void*,int,const void*,int){
|
||||
#if 0 /* HASH_INT and HASH_POINTER are never used */
|
||||
switch( keyClass ){
|
||||
case HASH_INT: return &intCompare;
|
||||
case HASH_POINTER: return &ptrCompare;
|
||||
case HASH_STRING: return &strCompare;
|
||||
case HASH_BINARY: return &binCompare;
|
||||
default: break;
|
||||
}
|
||||
return 0;
|
||||
#else
|
||||
if( keyClass==HASH_STRING ){
|
||||
return &strCompare;
|
||||
}else{
|
||||
assert( keyClass==HASH_BINARY );
|
||||
return &binCompare;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Link an element into the hash table
|
||||
*/
|
||||
static void insertElement(
|
||||
Hash *pH, /* The complete hash table */
|
||||
struct _ht *pEntry, /* The entry into which pNew is inserted */
|
||||
HashElem *pNew /* The element to be inserted */
|
||||
){
|
||||
HashElem *pHead; /* First element already in pEntry */
|
||||
pHead = pEntry->chain;
|
||||
if( pHead ){
|
||||
pNew->next = pHead;
|
||||
pNew->prev = pHead->prev;
|
||||
if( pHead->prev ){ pHead->prev->next = pNew; }
|
||||
else { pH->first = pNew; }
|
||||
pHead->prev = pNew;
|
||||
}else{
|
||||
pNew->next = pH->first;
|
||||
if( pH->first ){ pH->first->prev = pNew; }
|
||||
pNew->prev = 0;
|
||||
pH->first = pNew;
|
||||
}
|
||||
pEntry->count++;
|
||||
pEntry->chain = pNew;
|
||||
}
|
||||
|
||||
|
||||
/* Resize the hash table so that it cantains "new_size" buckets.
|
||||
** "new_size" must be a power of 2. The hash table might fail
|
||||
** to resize if sqliteMalloc() fails.
|
||||
*/
|
||||
static void rehash(Hash *pH, int new_size){
|
||||
struct _ht *new_ht; /* The new hash table */
|
||||
HashElem *elem, *next_elem; /* For looping over existing elements */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( (new_size & (new_size-1))==0 );
|
||||
new_ht = (struct _ht *)pH->xMalloc( new_size*sizeof(struct _ht) );
|
||||
if( new_ht==0 ) return;
|
||||
if( pH->ht ) pH->xFree(pH->ht);
|
||||
pH->ht = new_ht;
|
||||
pH->htsize = new_size;
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
for(elem=pH->first, pH->first=0; elem; elem = next_elem){
|
||||
int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
|
||||
next_elem = elem->next;
|
||||
insertElement(pH, &new_ht[h], elem);
|
||||
}
|
||||
}
|
||||
|
||||
/* This function (for internal use only) locates an element in an
|
||||
** hash table that matches the given key. The hash for this key has
|
||||
** already been computed and is passed as the 4th parameter.
|
||||
*/
|
||||
static HashElem *findElementGivenHash(
|
||||
const Hash *pH, /* The pH to be searched */
|
||||
const void *pKey, /* The key we are searching for */
|
||||
int nKey,
|
||||
int h /* The hash for this key. */
|
||||
){
|
||||
HashElem *elem; /* Used to loop thru the element list */
|
||||
int count; /* Number of elements left to test */
|
||||
int (*xCompare)(const void*,int,const void*,int); /* comparison function */
|
||||
|
||||
if( pH->ht ){
|
||||
struct _ht *pEntry = &pH->ht[h];
|
||||
elem = pEntry->chain;
|
||||
count = pEntry->count;
|
||||
xCompare = compareFunction(pH->keyClass);
|
||||
while( count-- && elem ){
|
||||
if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
|
||||
return elem;
|
||||
}
|
||||
elem = elem->next;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Remove a single entry from the hash table given a pointer to that
|
||||
** element and a hash on the element's key.
|
||||
*/
|
||||
static void removeElementGivenHash(
|
||||
Hash *pH, /* The pH containing "elem" */
|
||||
HashElem* elem, /* The element to be removed from the pH */
|
||||
int h /* Hash value for the element */
|
||||
){
|
||||
struct _ht *pEntry;
|
||||
if( elem->prev ){
|
||||
elem->prev->next = elem->next;
|
||||
}else{
|
||||
pH->first = elem->next;
|
||||
}
|
||||
if( elem->next ){
|
||||
elem->next->prev = elem->prev;
|
||||
}
|
||||
pEntry = &pH->ht[h];
|
||||
if( pEntry->chain==elem ){
|
||||
pEntry->chain = elem->next;
|
||||
}
|
||||
pEntry->count--;
|
||||
if( pEntry->count<=0 ){
|
||||
pEntry->chain = 0;
|
||||
}
|
||||
if( pH->copyKey && elem->pKey ){
|
||||
pH->xFree(elem->pKey);
|
||||
}
|
||||
pH->xFree( elem );
|
||||
pH->count--;
|
||||
if( pH->count<=0 ){
|
||||
assert( pH->first==0 );
|
||||
assert( pH->count==0 );
|
||||
HashClear(pH);
|
||||
}
|
||||
}
|
||||
|
||||
/* Attempt to locate an element of the hash table pH with a key
|
||||
** that matches pKey,nKey. Return the data for this element if it is
|
||||
** found, or NULL if there is no match.
|
||||
*/
|
||||
void *HashFind(const Hash *pH, const void *pKey, int nKey){
|
||||
int h; /* A hash on key */
|
||||
HashElem *elem; /* The element that matches key */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
if( pH==0 || pH->ht==0 ) return 0;
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
assert( xHash!=0 );
|
||||
h = (*xHash)(pKey,nKey);
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1));
|
||||
return elem ? elem->data : 0;
|
||||
}
|
||||
|
||||
/* Insert an element into the hash table pH. The key is pKey,nKey
|
||||
** and the data is "data".
|
||||
**
|
||||
** If no element exists with a matching key, then a new
|
||||
** element is created. A copy of the key is made if the copyKey
|
||||
** flag is set. NULL is returned.
|
||||
**
|
||||
** If another element already exists with the same key, then the
|
||||
** new data replaces the old data and the old data is returned.
|
||||
** The key is not copied in this instance. If a malloc fails, then
|
||||
** the new data is returned and the hash table is unchanged.
|
||||
**
|
||||
** If the "data" parameter to this function is NULL, then the
|
||||
** element corresponding to "key" is removed from the hash table.
|
||||
*/
|
||||
void *HashInsert(Hash *pH, const void *pKey, int nKey, void *data){
|
||||
int hraw; /* Raw hash value of the key */
|
||||
int h; /* the hash of the key modulo hash table size */
|
||||
HashElem *elem; /* Used to loop thru the element list */
|
||||
HashElem *new_elem; /* New element added to the pH */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( pH!=0 );
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
assert( xHash!=0 );
|
||||
hraw = (*xHash)(pKey, nKey);
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
h = hraw & (pH->htsize-1);
|
||||
elem = findElementGivenHash(pH,pKey,nKey,h);
|
||||
if( elem ){
|
||||
void *old_data = elem->data;
|
||||
if( data==0 ){
|
||||
removeElementGivenHash(pH,elem,h);
|
||||
}else{
|
||||
elem->data = data;
|
||||
}
|
||||
return old_data;
|
||||
}
|
||||
if( data==0 ) return 0;
|
||||
new_elem = (HashElem*)pH->xMalloc( sizeof(HashElem) );
|
||||
if( new_elem==0 ) return data;
|
||||
if( pH->copyKey && pKey!=0 ){
|
||||
new_elem->pKey = pH->xMalloc( nKey );
|
||||
if( new_elem->pKey==0 ){
|
||||
pH->xFree(new_elem);
|
||||
return data;
|
||||
}
|
||||
memcpy((void*)new_elem->pKey, pKey, nKey);
|
||||
}else{
|
||||
new_elem->pKey = (void*)pKey;
|
||||
}
|
||||
new_elem->nKey = nKey;
|
||||
pH->count++;
|
||||
if( pH->htsize==0 ){
|
||||
rehash(pH,8);
|
||||
if( pH->htsize==0 ){
|
||||
pH->count = 0;
|
||||
pH->xFree(new_elem);
|
||||
return data;
|
||||
}
|
||||
}
|
||||
if( pH->count > pH->htsize ){
|
||||
rehash(pH,pH->htsize*2);
|
||||
}
|
||||
assert( pH->htsize>0 );
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
h = hraw & (pH->htsize-1);
|
||||
insertElement(pH, &pH->ht[h], new_elem);
|
||||
new_elem->data = data;
|
||||
return 0;
|
||||
}
|
||||
111
ext/fts1/ft_hash.h
Normal file
111
ext/fts1/ft_hash.h
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
/*
|
||||
** 2001 September 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This is the header file for the generic hash-table implementation
|
||||
** used in SQLite. We've modified it slightly to serve as a standalone
|
||||
** hash table implementation for the full-text indexing module.
|
||||
**
|
||||
*/
|
||||
#ifndef _HASH_H_
|
||||
#define _HASH_H_
|
||||
|
||||
/* Forward declarations of structures. */
|
||||
typedef struct Hash Hash;
|
||||
typedef struct HashElem HashElem;
|
||||
|
||||
/* A complete hash table is an instance of the following structure.
|
||||
** The internals of this structure are intended to be opaque -- client
|
||||
** code should not attempt to access or modify the fields of this structure
|
||||
** directly. Change this structure only by using the routines below.
|
||||
** However, many of the "procedures" and "functions" for modifying and
|
||||
** accessing this structure are really macros, so we can't really make
|
||||
** this structure opaque.
|
||||
*/
|
||||
struct Hash {
|
||||
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
|
||||
char copyKey; /* True if copy of key made on insert */
|
||||
int count; /* Number of entries in this table */
|
||||
HashElem *first; /* The first element of the array */
|
||||
void *(*xMalloc)(int); /* malloc() function to use */
|
||||
void (*xFree)(void *); /* free() function to use */
|
||||
int htsize; /* Number of buckets in the hash table */
|
||||
struct _ht { /* the hash table */
|
||||
int count; /* Number of entries with this hash */
|
||||
HashElem *chain; /* Pointer to first entry with this hash */
|
||||
} *ht;
|
||||
};
|
||||
|
||||
/* Each element in the hash table is an instance of the following
|
||||
** structure. All elements are stored on a single doubly-linked list.
|
||||
**
|
||||
** Again, this structure is intended to be opaque, but it can't really
|
||||
** be opaque because it is used by macros.
|
||||
*/
|
||||
struct HashElem {
|
||||
HashElem *next, *prev; /* Next and previous elements in the table */
|
||||
void *data; /* Data associated with this element */
|
||||
void *pKey; int nKey; /* Key associated with this element */
|
||||
};
|
||||
|
||||
/*
|
||||
** There are 4 different modes of operation for a hash table:
|
||||
**
|
||||
** HASH_INT nKey is used as the key and pKey is ignored.
|
||||
**
|
||||
** HASH_POINTER pKey is used as the key and nKey is ignored.
|
||||
**
|
||||
** HASH_STRING pKey points to a string that is nKey bytes long
|
||||
** (including the null-terminator, if any). Case
|
||||
** is respected in comparisons.
|
||||
**
|
||||
** HASH_BINARY pKey points to binary data nKey bytes long.
|
||||
** memcmp() is used to compare keys.
|
||||
**
|
||||
** A copy of the key is made for HASH_STRING and HASH_BINARY
|
||||
** if the copyKey parameter to HashInit is 1.
|
||||
*/
|
||||
/* #define HASH_INT 1 // NOT USED */
|
||||
/* #define HASH_POINTER 2 // NOT USED */
|
||||
#define HASH_STRING 3
|
||||
#define HASH_BINARY 4
|
||||
|
||||
/*
|
||||
** Access routines. To delete, insert a NULL pointer.
|
||||
*/
|
||||
void HashInit(Hash*, int keytype, int copyKey);
|
||||
void *HashInsert(Hash*, const void *pKey, int nKey, void *pData);
|
||||
void *HashFind(const Hash*, const void *pKey, int nKey);
|
||||
void HashClear(Hash*);
|
||||
|
||||
/*
|
||||
** Macros for looping over all elements of a hash table. The idiom is
|
||||
** like this:
|
||||
**
|
||||
** Hash h;
|
||||
** HashElem *p;
|
||||
** ...
|
||||
** for(p=HashFirst(&h); p; p=HashNext(p)){
|
||||
** SomeStructure *pData = HashData(p);
|
||||
** // do something with pData
|
||||
** }
|
||||
*/
|
||||
#define HashFirst(H) ((H)->first)
|
||||
#define HashNext(E) ((E)->next)
|
||||
#define HashData(E) ((E)->data)
|
||||
#define HashKey(E) ((E)->pKey)
|
||||
#define HashKeysize(E) ((E)->nKey)
|
||||
|
||||
/*
|
||||
** Number of entries in a hash table
|
||||
*/
|
||||
#define HashCount(H) ((H)->count)
|
||||
|
||||
#endif /* _HASH_H_ */
|
||||
3348
ext/fts1/fts1.c
Normal file
3348
ext/fts1/fts1.c
Normal file
File diff suppressed because it is too large
Load diff
11
ext/fts1/fts1.h
Normal file
11
ext/fts1/fts1.h
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
#include "sqlite3.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
int sqlite3Fts1Init(sqlite3 *db);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif /* __cplusplus */
|
||||
369
ext/fts1/fts1_hash.c
Normal file
369
ext/fts1/fts1_hash.c
Normal file
|
|
@ -0,0 +1,369 @@
|
|||
/*
|
||||
** 2001 September 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This is the implementation of generic hash-tables used in SQLite.
|
||||
** We've modified it slightly to serve as a standalone hash table
|
||||
** implementation for the full-text indexing module.
|
||||
*/
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS1 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS1 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
|
||||
|
||||
|
||||
#include "fts1_hash.h"
|
||||
|
||||
static void *malloc_and_zero(int n){
|
||||
void *p = malloc(n);
|
||||
if( p ){
|
||||
memset(p, 0, n);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Turn bulk memory into a hash table object by initializing the
|
||||
** fields of the Hash structure.
|
||||
**
|
||||
** "pNew" is a pointer to the hash table that is to be initialized.
|
||||
** keyClass is one of the constants
|
||||
** FTS1_HASH_BINARY or FTS1_HASH_STRING. The value of keyClass
|
||||
** determines what kind of key the hash table will use. "copyKey" is
|
||||
** true if the hash table should make its own private copy of keys and
|
||||
** false if it should just use the supplied pointer.
|
||||
*/
|
||||
void sqlite3Fts1HashInit(fts1Hash *pNew, int keyClass, int copyKey){
|
||||
assert( pNew!=0 );
|
||||
assert( keyClass>=FTS1_HASH_STRING && keyClass<=FTS1_HASH_BINARY );
|
||||
pNew->keyClass = keyClass;
|
||||
pNew->copyKey = copyKey;
|
||||
pNew->first = 0;
|
||||
pNew->count = 0;
|
||||
pNew->htsize = 0;
|
||||
pNew->ht = 0;
|
||||
pNew->xMalloc = malloc_and_zero;
|
||||
pNew->xFree = free;
|
||||
}
|
||||
|
||||
/* Remove all entries from a hash table. Reclaim all memory.
|
||||
** Call this routine to delete a hash table or to reset a hash table
|
||||
** to the empty state.
|
||||
*/
|
||||
void sqlite3Fts1HashClear(fts1Hash *pH){
|
||||
fts1HashElem *elem; /* For looping over all elements of the table */
|
||||
|
||||
assert( pH!=0 );
|
||||
elem = pH->first;
|
||||
pH->first = 0;
|
||||
if( pH->ht ) pH->xFree(pH->ht);
|
||||
pH->ht = 0;
|
||||
pH->htsize = 0;
|
||||
while( elem ){
|
||||
fts1HashElem *next_elem = elem->next;
|
||||
if( pH->copyKey && elem->pKey ){
|
||||
pH->xFree(elem->pKey);
|
||||
}
|
||||
pH->xFree(elem);
|
||||
elem = next_elem;
|
||||
}
|
||||
pH->count = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Hash and comparison functions when the mode is FTS1_HASH_STRING
|
||||
*/
|
||||
static int strHash(const void *pKey, int nKey){
|
||||
const char *z = (const char *)pKey;
|
||||
int h = 0;
|
||||
if( nKey<=0 ) nKey = (int) strlen(z);
|
||||
while( nKey > 0 ){
|
||||
h = (h<<3) ^ h ^ *z++;
|
||||
nKey--;
|
||||
}
|
||||
return h & 0x7fffffff;
|
||||
}
|
||||
static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( n1!=n2 ) return 1;
|
||||
return strncmp((const char*)pKey1,(const char*)pKey2,n1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Hash and comparison functions when the mode is FTS1_HASH_BINARY
|
||||
*/
|
||||
static int binHash(const void *pKey, int nKey){
|
||||
int h = 0;
|
||||
const char *z = (const char *)pKey;
|
||||
while( nKey-- > 0 ){
|
||||
h = (h<<3) ^ h ^ *(z++);
|
||||
}
|
||||
return h & 0x7fffffff;
|
||||
}
|
||||
static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( n1!=n2 ) return 1;
|
||||
return memcmp(pKey1,pKey2,n1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a pointer to the appropriate hash function given the key class.
|
||||
**
|
||||
** The C syntax in this function definition may be unfamilar to some
|
||||
** programmers, so we provide the following additional explanation:
|
||||
**
|
||||
** The name of the function is "hashFunction". The function takes a
|
||||
** single parameter "keyClass". The return value of hashFunction()
|
||||
** is a pointer to another function. Specifically, the return value
|
||||
** of hashFunction() is a pointer to a function that takes two parameters
|
||||
** with types "const void*" and "int" and returns an "int".
|
||||
*/
|
||||
static int (*hashFunction(int keyClass))(const void*,int){
|
||||
if( keyClass==FTS1_HASH_STRING ){
|
||||
return &strHash;
|
||||
}else{
|
||||
assert( keyClass==FTS1_HASH_BINARY );
|
||||
return &binHash;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a pointer to the appropriate hash function given the key class.
|
||||
**
|
||||
** For help in interpreted the obscure C code in the function definition,
|
||||
** see the header comment on the previous function.
|
||||
*/
|
||||
static int (*compareFunction(int keyClass))(const void*,int,const void*,int){
|
||||
if( keyClass==FTS1_HASH_STRING ){
|
||||
return &strCompare;
|
||||
}else{
|
||||
assert( keyClass==FTS1_HASH_BINARY );
|
||||
return &binCompare;
|
||||
}
|
||||
}
|
||||
|
||||
/* Link an element into the hash table
|
||||
*/
|
||||
static void insertElement(
|
||||
fts1Hash *pH, /* The complete hash table */
|
||||
struct _fts1ht *pEntry, /* The entry into which pNew is inserted */
|
||||
fts1HashElem *pNew /* The element to be inserted */
|
||||
){
|
||||
fts1HashElem *pHead; /* First element already in pEntry */
|
||||
pHead = pEntry->chain;
|
||||
if( pHead ){
|
||||
pNew->next = pHead;
|
||||
pNew->prev = pHead->prev;
|
||||
if( pHead->prev ){ pHead->prev->next = pNew; }
|
||||
else { pH->first = pNew; }
|
||||
pHead->prev = pNew;
|
||||
}else{
|
||||
pNew->next = pH->first;
|
||||
if( pH->first ){ pH->first->prev = pNew; }
|
||||
pNew->prev = 0;
|
||||
pH->first = pNew;
|
||||
}
|
||||
pEntry->count++;
|
||||
pEntry->chain = pNew;
|
||||
}
|
||||
|
||||
|
||||
/* Resize the hash table so that it cantains "new_size" buckets.
|
||||
** "new_size" must be a power of 2. The hash table might fail
|
||||
** to resize if sqliteMalloc() fails.
|
||||
*/
|
||||
static void rehash(fts1Hash *pH, int new_size){
|
||||
struct _fts1ht *new_ht; /* The new hash table */
|
||||
fts1HashElem *elem, *next_elem; /* For looping over existing elements */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( (new_size & (new_size-1))==0 );
|
||||
new_ht = (struct _fts1ht *)pH->xMalloc( new_size*sizeof(struct _fts1ht) );
|
||||
if( new_ht==0 ) return;
|
||||
if( pH->ht ) pH->xFree(pH->ht);
|
||||
pH->ht = new_ht;
|
||||
pH->htsize = new_size;
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
for(elem=pH->first, pH->first=0; elem; elem = next_elem){
|
||||
int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
|
||||
next_elem = elem->next;
|
||||
insertElement(pH, &new_ht[h], elem);
|
||||
}
|
||||
}
|
||||
|
||||
/* This function (for internal use only) locates an element in an
|
||||
** hash table that matches the given key. The hash for this key has
|
||||
** already been computed and is passed as the 4th parameter.
|
||||
*/
|
||||
static fts1HashElem *findElementGivenHash(
|
||||
const fts1Hash *pH, /* The pH to be searched */
|
||||
const void *pKey, /* The key we are searching for */
|
||||
int nKey,
|
||||
int h /* The hash for this key. */
|
||||
){
|
||||
fts1HashElem *elem; /* Used to loop thru the element list */
|
||||
int count; /* Number of elements left to test */
|
||||
int (*xCompare)(const void*,int,const void*,int); /* comparison function */
|
||||
|
||||
if( pH->ht ){
|
||||
struct _fts1ht *pEntry = &pH->ht[h];
|
||||
elem = pEntry->chain;
|
||||
count = pEntry->count;
|
||||
xCompare = compareFunction(pH->keyClass);
|
||||
while( count-- && elem ){
|
||||
if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
|
||||
return elem;
|
||||
}
|
||||
elem = elem->next;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Remove a single entry from the hash table given a pointer to that
|
||||
** element and a hash on the element's key.
|
||||
*/
|
||||
static void removeElementGivenHash(
|
||||
fts1Hash *pH, /* The pH containing "elem" */
|
||||
fts1HashElem* elem, /* The element to be removed from the pH */
|
||||
int h /* Hash value for the element */
|
||||
){
|
||||
struct _fts1ht *pEntry;
|
||||
if( elem->prev ){
|
||||
elem->prev->next = elem->next;
|
||||
}else{
|
||||
pH->first = elem->next;
|
||||
}
|
||||
if( elem->next ){
|
||||
elem->next->prev = elem->prev;
|
||||
}
|
||||
pEntry = &pH->ht[h];
|
||||
if( pEntry->chain==elem ){
|
||||
pEntry->chain = elem->next;
|
||||
}
|
||||
pEntry->count--;
|
||||
if( pEntry->count<=0 ){
|
||||
pEntry->chain = 0;
|
||||
}
|
||||
if( pH->copyKey && elem->pKey ){
|
||||
pH->xFree(elem->pKey);
|
||||
}
|
||||
pH->xFree( elem );
|
||||
pH->count--;
|
||||
if( pH->count<=0 ){
|
||||
assert( pH->first==0 );
|
||||
assert( pH->count==0 );
|
||||
fts1HashClear(pH);
|
||||
}
|
||||
}
|
||||
|
||||
/* Attempt to locate an element of the hash table pH with a key
|
||||
** that matches pKey,nKey. Return the data for this element if it is
|
||||
** found, or NULL if there is no match.
|
||||
*/
|
||||
void *sqlite3Fts1HashFind(const fts1Hash *pH, const void *pKey, int nKey){
|
||||
int h; /* A hash on key */
|
||||
fts1HashElem *elem; /* The element that matches key */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
if( pH==0 || pH->ht==0 ) return 0;
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
assert( xHash!=0 );
|
||||
h = (*xHash)(pKey,nKey);
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1));
|
||||
return elem ? elem->data : 0;
|
||||
}
|
||||
|
||||
/* Insert an element into the hash table pH. The key is pKey,nKey
|
||||
** and the data is "data".
|
||||
**
|
||||
** If no element exists with a matching key, then a new
|
||||
** element is created. A copy of the key is made if the copyKey
|
||||
** flag is set. NULL is returned.
|
||||
**
|
||||
** If another element already exists with the same key, then the
|
||||
** new data replaces the old data and the old data is returned.
|
||||
** The key is not copied in this instance. If a malloc fails, then
|
||||
** the new data is returned and the hash table is unchanged.
|
||||
**
|
||||
** If the "data" parameter to this function is NULL, then the
|
||||
** element corresponding to "key" is removed from the hash table.
|
||||
*/
|
||||
void *sqlite3Fts1HashInsert(
|
||||
fts1Hash *pH, /* The hash table to insert into */
|
||||
const void *pKey, /* The key */
|
||||
int nKey, /* Number of bytes in the key */
|
||||
void *data /* The data */
|
||||
){
|
||||
int hraw; /* Raw hash value of the key */
|
||||
int h; /* the hash of the key modulo hash table size */
|
||||
fts1HashElem *elem; /* Used to loop thru the element list */
|
||||
fts1HashElem *new_elem; /* New element added to the pH */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( pH!=0 );
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
assert( xHash!=0 );
|
||||
hraw = (*xHash)(pKey, nKey);
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
h = hraw & (pH->htsize-1);
|
||||
elem = findElementGivenHash(pH,pKey,nKey,h);
|
||||
if( elem ){
|
||||
void *old_data = elem->data;
|
||||
if( data==0 ){
|
||||
removeElementGivenHash(pH,elem,h);
|
||||
}else{
|
||||
elem->data = data;
|
||||
}
|
||||
return old_data;
|
||||
}
|
||||
if( data==0 ) return 0;
|
||||
new_elem = (fts1HashElem*)pH->xMalloc( sizeof(fts1HashElem) );
|
||||
if( new_elem==0 ) return data;
|
||||
if( pH->copyKey && pKey!=0 ){
|
||||
new_elem->pKey = pH->xMalloc( nKey );
|
||||
if( new_elem->pKey==0 ){
|
||||
pH->xFree(new_elem);
|
||||
return data;
|
||||
}
|
||||
memcpy((void*)new_elem->pKey, pKey, nKey);
|
||||
}else{
|
||||
new_elem->pKey = (void*)pKey;
|
||||
}
|
||||
new_elem->nKey = nKey;
|
||||
pH->count++;
|
||||
if( pH->htsize==0 ){
|
||||
rehash(pH,8);
|
||||
if( pH->htsize==0 ){
|
||||
pH->count = 0;
|
||||
pH->xFree(new_elem);
|
||||
return data;
|
||||
}
|
||||
}
|
||||
if( pH->count > pH->htsize ){
|
||||
rehash(pH,pH->htsize*2);
|
||||
}
|
||||
assert( pH->htsize>0 );
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
h = hraw & (pH->htsize-1);
|
||||
insertElement(pH, &pH->ht[h], new_elem);
|
||||
new_elem->data = data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */
|
||||
112
ext/fts1/fts1_hash.h
Normal file
112
ext/fts1/fts1_hash.h
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
/*
|
||||
** 2001 September 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This is the header file for the generic hash-table implementation
|
||||
** used in SQLite. We've modified it slightly to serve as a standalone
|
||||
** hash table implementation for the full-text indexing module.
|
||||
**
|
||||
*/
|
||||
#ifndef _FTS1_HASH_H_
|
||||
#define _FTS1_HASH_H_
|
||||
|
||||
/* Forward declarations of structures. */
|
||||
typedef struct fts1Hash fts1Hash;
|
||||
typedef struct fts1HashElem fts1HashElem;
|
||||
|
||||
/* A complete hash table is an instance of the following structure.
|
||||
** The internals of this structure are intended to be opaque -- client
|
||||
** code should not attempt to access or modify the fields of this structure
|
||||
** directly. Change this structure only by using the routines below.
|
||||
** However, many of the "procedures" and "functions" for modifying and
|
||||
** accessing this structure are really macros, so we can't really make
|
||||
** this structure opaque.
|
||||
*/
|
||||
struct fts1Hash {
|
||||
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
|
||||
char copyKey; /* True if copy of key made on insert */
|
||||
int count; /* Number of entries in this table */
|
||||
fts1HashElem *first; /* The first element of the array */
|
||||
void *(*xMalloc)(int); /* malloc() function to use */
|
||||
void (*xFree)(void *); /* free() function to use */
|
||||
int htsize; /* Number of buckets in the hash table */
|
||||
struct _fts1ht { /* the hash table */
|
||||
int count; /* Number of entries with this hash */
|
||||
fts1HashElem *chain; /* Pointer to first entry with this hash */
|
||||
} *ht;
|
||||
};
|
||||
|
||||
/* Each element in the hash table is an instance of the following
|
||||
** structure. All elements are stored on a single doubly-linked list.
|
||||
**
|
||||
** Again, this structure is intended to be opaque, but it can't really
|
||||
** be opaque because it is used by macros.
|
||||
*/
|
||||
struct fts1HashElem {
|
||||
fts1HashElem *next, *prev; /* Next and previous elements in the table */
|
||||
void *data; /* Data associated with this element */
|
||||
void *pKey; int nKey; /* Key associated with this element */
|
||||
};
|
||||
|
||||
/*
|
||||
** There are 2 different modes of operation for a hash table:
|
||||
**
|
||||
** FTS1_HASH_STRING pKey points to a string that is nKey bytes long
|
||||
** (including the null-terminator, if any). Case
|
||||
** is respected in comparisons.
|
||||
**
|
||||
** FTS1_HASH_BINARY pKey points to binary data nKey bytes long.
|
||||
** memcmp() is used to compare keys.
|
||||
**
|
||||
** A copy of the key is made if the copyKey parameter to fts1HashInit is 1.
|
||||
*/
|
||||
#define FTS1_HASH_STRING 1
|
||||
#define FTS1_HASH_BINARY 2
|
||||
|
||||
/*
|
||||
** Access routines. To delete, insert a NULL pointer.
|
||||
*/
|
||||
void sqlite3Fts1HashInit(fts1Hash*, int keytype, int copyKey);
|
||||
void *sqlite3Fts1HashInsert(fts1Hash*, const void *pKey, int nKey, void *pData);
|
||||
void *sqlite3Fts1HashFind(const fts1Hash*, const void *pKey, int nKey);
|
||||
void sqlite3Fts1HashClear(fts1Hash*);
|
||||
|
||||
/*
|
||||
** Shorthand for the functions above
|
||||
*/
|
||||
#define fts1HashInit sqlite3Fts1HashInit
|
||||
#define fts1HashInsert sqlite3Fts1HashInsert
|
||||
#define fts1HashFind sqlite3Fts1HashFind
|
||||
#define fts1HashClear sqlite3Fts1HashClear
|
||||
|
||||
/*
|
||||
** Macros for looping over all elements of a hash table. The idiom is
|
||||
** like this:
|
||||
**
|
||||
** fts1Hash h;
|
||||
** fts1HashElem *p;
|
||||
** ...
|
||||
** for(p=fts1HashFirst(&h); p; p=fts1HashNext(p)){
|
||||
** SomeStructure *pData = fts1HashData(p);
|
||||
** // do something with pData
|
||||
** }
|
||||
*/
|
||||
#define fts1HashFirst(H) ((H)->first)
|
||||
#define fts1HashNext(E) ((E)->next)
|
||||
#define fts1HashData(E) ((E)->data)
|
||||
#define fts1HashKey(E) ((E)->pKey)
|
||||
#define fts1HashKeysize(E) ((E)->nKey)
|
||||
|
||||
/*
|
||||
** Number of entries in a hash table
|
||||
*/
|
||||
#define fts1HashCount(H) ((H)->count)
|
||||
|
||||
#endif /* _FTS1_HASH_H_ */
|
||||
643
ext/fts1/fts1_porter.c
Normal file
643
ext/fts1/fts1_porter.c
Normal file
|
|
@ -0,0 +1,643 @@
|
|||
/*
|
||||
** 2006 September 30
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** Implementation of the full-text-search tokenizer that implements
|
||||
** a Porter stemmer.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS1 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS1 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
|
||||
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "fts1_tokenizer.h"
|
||||
|
||||
/*
|
||||
** Class derived from sqlite3_tokenizer
|
||||
*/
|
||||
typedef struct porter_tokenizer {
|
||||
sqlite3_tokenizer base; /* Base class */
|
||||
} porter_tokenizer;
|
||||
|
||||
/*
|
||||
** Class derived from sqlit3_tokenizer_cursor
|
||||
*/
|
||||
typedef struct porter_tokenizer_cursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
const char *zInput; /* input we are tokenizing */
|
||||
int nInput; /* size of the input */
|
||||
int iOffset; /* current position in zInput */
|
||||
int iToken; /* index of next token to be returned */
|
||||
char *zToken; /* storage for current token */
|
||||
int nAllocated; /* space allocated to zToken buffer */
|
||||
} porter_tokenizer_cursor;
|
||||
|
||||
|
||||
/* Forward declaration */
|
||||
static const sqlite3_tokenizer_module porterTokenizerModule;
|
||||
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
*/
|
||||
static int porterCreate(
|
||||
int argc, const char * const *argv,
|
||||
sqlite3_tokenizer **ppTokenizer
|
||||
){
|
||||
porter_tokenizer *t;
|
||||
t = (porter_tokenizer *) calloc(sizeof(*t), 1);
|
||||
if( t==NULL ) return SQLITE_NOMEM;
|
||||
|
||||
*ppTokenizer = &t->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Destroy a tokenizer
|
||||
*/
|
||||
static int porterDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
free(pTokenizer);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Prepare to begin tokenizing a particular string. The input
|
||||
** string to be tokenized is zInput[0..nInput-1]. A cursor
|
||||
** used to incrementally tokenize this string is returned in
|
||||
** *ppCursor.
|
||||
*/
|
||||
static int porterOpen(
|
||||
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
|
||||
const char *zInput, int nInput, /* String to be tokenized */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
|
||||
){
|
||||
porter_tokenizer_cursor *c;
|
||||
|
||||
c = (porter_tokenizer_cursor *) malloc(sizeof(*c));
|
||||
if( c==NULL ) return SQLITE_NOMEM;
|
||||
|
||||
c->zInput = zInput;
|
||||
if( zInput==0 ){
|
||||
c->nInput = 0;
|
||||
}else if( nInput<0 ){
|
||||
c->nInput = (int)strlen(zInput);
|
||||
}else{
|
||||
c->nInput = nInput;
|
||||
}
|
||||
c->iOffset = 0; /* start tokenizing at the beginning */
|
||||
c->iToken = 0;
|
||||
c->zToken = NULL; /* no space allocated, yet. */
|
||||
c->nAllocated = 0;
|
||||
|
||||
*ppCursor = &c->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close a tokenization cursor previously opened by a call to
|
||||
** porterOpen() above.
|
||||
*/
|
||||
static int porterClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
|
||||
free(c->zToken);
|
||||
free(c);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
/*
|
||||
** Vowel or consonant
|
||||
*/
|
||||
static const char cType[] = {
|
||||
0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
|
||||
1, 1, 1, 2, 1
|
||||
};
|
||||
|
||||
/*
|
||||
** isConsonant() and isVowel() determine if their first character in
|
||||
** the string they point to is a consonant or a vowel, according
|
||||
** to Porter ruls.
|
||||
**
|
||||
** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.
|
||||
** 'Y' is a consonant unless it follows another consonant,
|
||||
** in which case it is a vowel.
|
||||
**
|
||||
** In these routine, the letters are in reverse order. So the 'y' rule
|
||||
** is that 'y' is a consonant unless it is followed by another
|
||||
** consonent.
|
||||
*/
|
||||
static int isVowel(const char*);
|
||||
static int isConsonant(const char *z){
|
||||
int j;
|
||||
char x = *z;
|
||||
if( x==0 ) return 0;
|
||||
assert( x>='a' && x<='z' );
|
||||
j = cType[x-'a'];
|
||||
if( j<2 ) return j;
|
||||
return z[1]==0 || isVowel(z + 1);
|
||||
}
|
||||
static int isVowel(const char *z){
|
||||
int j;
|
||||
char x = *z;
|
||||
if( x==0 ) return 0;
|
||||
assert( x>='a' && x<='z' );
|
||||
j = cType[x-'a'];
|
||||
if( j<2 ) return 1-j;
|
||||
return isConsonant(z + 1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Let any sequence of one or more vowels be represented by V and let
|
||||
** C be sequence of one or more consonants. Then every word can be
|
||||
** represented as:
|
||||
**
|
||||
** [C] (VC){m} [V]
|
||||
**
|
||||
** In prose: A word is an optional consonant followed by zero or
|
||||
** vowel-consonant pairs followed by an optional vowel. "m" is the
|
||||
** number of vowel consonant pairs. This routine computes the value
|
||||
** of m for the first i bytes of a word.
|
||||
**
|
||||
** Return true if the m-value for z is 1 or more. In other words,
|
||||
** return true if z contains at least one vowel that is followed
|
||||
** by a consonant.
|
||||
**
|
||||
** In this routine z[] is in reverse order. So we are really looking
|
||||
** for an instance of of a consonant followed by a vowel.
|
||||
*/
|
||||
static int m_gt_0(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/* Like mgt0 above except we are looking for a value of m which is
|
||||
** exactly 1
|
||||
*/
|
||||
static int m_eq_1(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 1;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z==0;
|
||||
}
|
||||
|
||||
/* Like mgt0 above except we are looking for a value of m>1 instead
|
||||
** or m>0
|
||||
*/
|
||||
static int m_gt_1(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if there is a vowel anywhere within z[0..n-1]
|
||||
*/
|
||||
static int hasVowel(const char *z){
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if the word ends in a double consonant.
|
||||
**
|
||||
** The text is reversed here. So we are really looking at
|
||||
** the first two characters of z[].
|
||||
*/
|
||||
static int doubleConsonant(const char *z){
|
||||
return isConsonant(z) && z[0]==z[1] && isConsonant(z+1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if the word ends with three letters which
|
||||
** are consonant-vowel-consonent and where the final consonant
|
||||
** is not 'w', 'x', or 'y'.
|
||||
**
|
||||
** The word is reversed here. So we are really checking the
|
||||
** first three letters and the first one cannot be in [wxy].
|
||||
*/
|
||||
static int star_oh(const char *z){
|
||||
return
|
||||
z[0]!=0 && isConsonant(z) &&
|
||||
z[0]!='w' && z[0]!='x' && z[0]!='y' &&
|
||||
z[1]!=0 && isVowel(z+1) &&
|
||||
z[2]!=0 && isConsonant(z+2);
|
||||
}
|
||||
|
||||
/*
|
||||
** If the word ends with zFrom and xCond() is true for the stem
|
||||
** of the word that preceeds the zFrom ending, then change the
|
||||
** ending to zTo.
|
||||
**
|
||||
** The input word *pz and zFrom are both in reverse order. zTo
|
||||
** is in normal order.
|
||||
**
|
||||
** Return TRUE if zFrom matches. Return FALSE if zFrom does not
|
||||
** match. Not that TRUE is returned even if xCond() fails and
|
||||
** no substitution occurs.
|
||||
*/
|
||||
static int stem(
|
||||
char **pz, /* The word being stemmed (Reversed) */
|
||||
const char *zFrom, /* If the ending matches this... (Reversed) */
|
||||
const char *zTo, /* ... change the ending to this (not reversed) */
|
||||
int (*xCond)(const char*) /* Condition that must be true */
|
||||
){
|
||||
char *z = *pz;
|
||||
while( *zFrom && *zFrom==*z ){ z++; zFrom++; }
|
||||
if( *zFrom!=0 ) return 0;
|
||||
if( xCond && !xCond(z) ) return 1;
|
||||
while( *zTo ){
|
||||
*(--z) = *(zTo++);
|
||||
}
|
||||
*pz = z;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
** This is the fallback stemmer used when the porter stemmer is
|
||||
** inappropriate. The input word is copied into the output with
|
||||
** US-ASCII case folding. If the input word is too long (more
|
||||
** than 20 bytes if it contains no digits or more than 6 bytes if
|
||||
** it contains digits) then word is truncated to 20 or 6 bytes
|
||||
** by taking 10 or 3 bytes from the beginning and end.
|
||||
*/
|
||||
static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
|
||||
int i, mx, j;
|
||||
int hasDigit = 0;
|
||||
for(i=0; i<nIn; i++){
|
||||
int c = zIn[i];
|
||||
if( c>='A' && c<='Z' ){
|
||||
zOut[i] = c - 'A' + 'a';
|
||||
}else{
|
||||
if( c>='0' && c<='9' ) hasDigit = 1;
|
||||
zOut[i] = c;
|
||||
}
|
||||
}
|
||||
mx = hasDigit ? 3 : 10;
|
||||
if( nIn>mx*2 ){
|
||||
for(j=mx, i=nIn-mx; i<nIn; i++, j++){
|
||||
zOut[j] = zOut[i];
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
zOut[i] = 0;
|
||||
*pnOut = i;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Stem the input word zIn[0..nIn-1]. Store the output in zOut.
|
||||
** zOut is at least big enough to hold nIn bytes. Write the actual
|
||||
** size of the output word (exclusive of the '\0' terminator) into *pnOut.
|
||||
**
|
||||
** Any upper-case characters in the US-ASCII character set ([A-Z])
|
||||
** are converted to lower case. Upper-case UTF characters are
|
||||
** unchanged.
|
||||
**
|
||||
** Words that are longer than about 20 bytes are stemmed by retaining
|
||||
** a few bytes from the beginning and the end of the word. If the
|
||||
** word contains digits, 3 bytes are taken from the beginning and
|
||||
** 3 bytes from the end. For long words without digits, 10 bytes
|
||||
** are taken from each end. US-ASCII case folding still applies.
|
||||
**
|
||||
** If the input word contains not digits but does characters not
|
||||
** in [a-zA-Z] then no stemming is attempted and this routine just
|
||||
** copies the input into the input into the output with US-ASCII
|
||||
** case folding.
|
||||
**
|
||||
** Stemming never increases the length of the word. So there is
|
||||
** no chance of overflowing the zOut buffer.
|
||||
*/
|
||||
static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
|
||||
int i, j, c;
|
||||
char zReverse[28];
|
||||
char *z, *z2;
|
||||
if( nIn<3 || nIn>=sizeof(zReverse)-7 ){
|
||||
/* The word is too big or too small for the porter stemmer.
|
||||
** Fallback to the copy stemmer */
|
||||
copy_stemmer(zIn, nIn, zOut, pnOut);
|
||||
return;
|
||||
}
|
||||
for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){
|
||||
c = zIn[i];
|
||||
if( c>='A' && c<='Z' ){
|
||||
zReverse[j] = c + 'a' - 'A';
|
||||
}else if( c>='a' && c<='z' ){
|
||||
zReverse[j] = c;
|
||||
}else{
|
||||
/* The use of a character not in [a-zA-Z] means that we fallback
|
||||
** to the copy stemmer */
|
||||
copy_stemmer(zIn, nIn, zOut, pnOut);
|
||||
return;
|
||||
}
|
||||
}
|
||||
memset(&zReverse[sizeof(zReverse)-5], 0, 5);
|
||||
z = &zReverse[j+1];
|
||||
|
||||
|
||||
/* Step 1a */
|
||||
if( z[0]=='s' ){
|
||||
if(
|
||||
!stem(&z, "sess", "ss", 0) &&
|
||||
!stem(&z, "sei", "i", 0) &&
|
||||
!stem(&z, "ss", "ss", 0)
|
||||
){
|
||||
z++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 1b */
|
||||
z2 = z;
|
||||
if( stem(&z, "dee", "ee", m_gt_0) ){
|
||||
/* Do nothing. The work was all in the test */
|
||||
}else if(
|
||||
(stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel))
|
||||
&& z!=z2
|
||||
){
|
||||
if( stem(&z, "ta", "ate", 0) ||
|
||||
stem(&z, "lb", "ble", 0) ||
|
||||
stem(&z, "zi", "ize", 0) ){
|
||||
/* Do nothing. The work was all in the test */
|
||||
}else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){
|
||||
z++;
|
||||
}else if( m_eq_1(z) && star_oh(z) ){
|
||||
*(--z) = 'e';
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 1c */
|
||||
if( z[0]=='y' && hasVowel(z+1) ){
|
||||
z[0] = 'i';
|
||||
}
|
||||
|
||||
/* Step 2 */
|
||||
switch( z[1] ){
|
||||
case 'a':
|
||||
stem(&z, "lanoita", "ate", m_gt_0) ||
|
||||
stem(&z, "lanoit", "tion", m_gt_0);
|
||||
break;
|
||||
case 'c':
|
||||
stem(&z, "icne", "ence", m_gt_0) ||
|
||||
stem(&z, "icna", "ance", m_gt_0);
|
||||
break;
|
||||
case 'e':
|
||||
stem(&z, "rezi", "ize", m_gt_0);
|
||||
break;
|
||||
case 'g':
|
||||
stem(&z, "igol", "log", m_gt_0);
|
||||
break;
|
||||
case 'l':
|
||||
stem(&z, "ilb", "ble", m_gt_0) ||
|
||||
stem(&z, "illa", "al", m_gt_0) ||
|
||||
stem(&z, "iltne", "ent", m_gt_0) ||
|
||||
stem(&z, "ile", "e", m_gt_0) ||
|
||||
stem(&z, "ilsuo", "ous", m_gt_0);
|
||||
break;
|
||||
case 'o':
|
||||
stem(&z, "noitazi", "ize", m_gt_0) ||
|
||||
stem(&z, "noita", "ate", m_gt_0) ||
|
||||
stem(&z, "rota", "ate", m_gt_0);
|
||||
break;
|
||||
case 's':
|
||||
stem(&z, "msila", "al", m_gt_0) ||
|
||||
stem(&z, "ssenevi", "ive", m_gt_0) ||
|
||||
stem(&z, "ssenluf", "ful", m_gt_0) ||
|
||||
stem(&z, "ssensuo", "ous", m_gt_0);
|
||||
break;
|
||||
case 't':
|
||||
stem(&z, "itila", "al", m_gt_0) ||
|
||||
stem(&z, "itivi", "ive", m_gt_0) ||
|
||||
stem(&z, "itilib", "ble", m_gt_0);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 3 */
|
||||
switch( z[0] ){
|
||||
case 'e':
|
||||
stem(&z, "etaci", "ic", m_gt_0) ||
|
||||
stem(&z, "evita", "", m_gt_0) ||
|
||||
stem(&z, "ezila", "al", m_gt_0);
|
||||
break;
|
||||
case 'i':
|
||||
stem(&z, "itici", "ic", m_gt_0);
|
||||
break;
|
||||
case 'l':
|
||||
stem(&z, "laci", "ic", m_gt_0) ||
|
||||
stem(&z, "luf", "", m_gt_0);
|
||||
break;
|
||||
case 's':
|
||||
stem(&z, "ssen", "", m_gt_0);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 4 */
|
||||
switch( z[1] ){
|
||||
case 'a':
|
||||
if( z[0]=='l' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'c':
|
||||
if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){
|
||||
z += 4;
|
||||
}
|
||||
break;
|
||||
case 'e':
|
||||
if( z[0]=='r' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'i':
|
||||
if( z[0]=='c' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'l':
|
||||
if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){
|
||||
z += 4;
|
||||
}
|
||||
break;
|
||||
case 'n':
|
||||
if( z[0]=='t' ){
|
||||
if( z[2]=='a' ){
|
||||
if( m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
}else if( z[2]=='e' ){
|
||||
stem(&z, "tneme", "", m_gt_1) ||
|
||||
stem(&z, "tnem", "", m_gt_1) ||
|
||||
stem(&z, "tne", "", m_gt_1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 'o':
|
||||
if( z[0]=='u' ){
|
||||
if( m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
}else if( z[3]=='s' || z[3]=='t' ){
|
||||
stem(&z, "noi", "", m_gt_1);
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
case 't':
|
||||
stem(&z, "eta", "", m_gt_1) ||
|
||||
stem(&z, "iti", "", m_gt_1);
|
||||
break;
|
||||
case 'u':
|
||||
if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
case 'v':
|
||||
case 'z':
|
||||
if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 5a */
|
||||
if( z[0]=='e' ){
|
||||
if( m_gt_1(z+1) ){
|
||||
z++;
|
||||
}else if( m_eq_1(z+1) && !star_oh(z+1) ){
|
||||
z++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 5b */
|
||||
if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){
|
||||
z++;
|
||||
}
|
||||
|
||||
/* z[] is now the stemmed word in reverse order. Flip it back
|
||||
** around into forward order and return.
|
||||
*/
|
||||
*pnOut = i = strlen(z);
|
||||
zOut[i] = 0;
|
||||
while( *z ){
|
||||
zOut[--i] = *(z++);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Characters that can be part of a token. We assume any character
|
||||
** whose value is greater than 0x80 (any UTF character) can be
|
||||
** part of a token. In other words, delimiters all must have
|
||||
** values of 0x7f or lower.
|
||||
*/
|
||||
static const char isIdChar[] = {
|
||||
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
|
||||
};
|
||||
#define idChar(C) (((ch=C)&0x80)!=0 || (ch>0x2f && isIdChar[ch-0x30]))
|
||||
#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !isIdChar[ch-0x30]))
|
||||
|
||||
/*
|
||||
** Extract the next token from a tokenization cursor. The cursor must
|
||||
** have been opened by a prior call to porterOpen().
|
||||
*/
|
||||
static int porterNext(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */
|
||||
const char **pzToken, /* OUT: *pzToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
|
||||
const char *z = c->zInput;
|
||||
|
||||
while( c->iOffset<c->nInput ){
|
||||
int iStartOffset, ch;
|
||||
|
||||
/* Scan past delimiter characters */
|
||||
while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
/* Count non-delimiter characters. */
|
||||
iStartOffset = c->iOffset;
|
||||
while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
if( c->iOffset>iStartOffset ){
|
||||
int n = c->iOffset-iStartOffset;
|
||||
if( n>c->nAllocated ){
|
||||
c->nAllocated = n+20;
|
||||
c->zToken = realloc(c->zToken, c->nAllocated);
|
||||
if( c->zToken==NULL ) return SQLITE_NOMEM;
|
||||
}
|
||||
porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
|
||||
*pzToken = c->zToken;
|
||||
*piStartOffset = iStartOffset;
|
||||
*piEndOffset = c->iOffset;
|
||||
*piPosition = c->iToken++;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
}
|
||||
return SQLITE_DONE;
|
||||
}
|
||||
|
||||
/*
|
||||
** The set of routines that implement the porter-stemmer tokenizer
|
||||
*/
|
||||
static const sqlite3_tokenizer_module porterTokenizerModule = {
|
||||
0,
|
||||
porterCreate,
|
||||
porterDestroy,
|
||||
porterOpen,
|
||||
porterClose,
|
||||
porterNext,
|
||||
};
|
||||
|
||||
/*
|
||||
** Allocate a new porter tokenizer. Return a pointer to the new
|
||||
** tokenizer in *ppModule
|
||||
*/
|
||||
void sqlite3Fts1PorterTokenizerModule(
|
||||
sqlite3_tokenizer_module const**ppModule
|
||||
){
|
||||
*ppModule = &porterTokenizerModule;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */
|
||||
90
ext/fts1/fts1_tokenizer.h
Normal file
90
ext/fts1/fts1_tokenizer.h
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
** 2006 July 10
|
||||
**
|
||||
** The author disclaims copyright to this source code.
|
||||
**
|
||||
*************************************************************************
|
||||
** Defines the interface to tokenizers used by fulltext-search. There
|
||||
** are three basic components:
|
||||
**
|
||||
** sqlite3_tokenizer_module is a singleton defining the tokenizer
|
||||
** interface functions. This is essentially the class structure for
|
||||
** tokenizers.
|
||||
**
|
||||
** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
|
||||
** including customization information defined at creation time.
|
||||
**
|
||||
** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
|
||||
** tokens from a particular input.
|
||||
*/
|
||||
#ifndef _FTS1_TOKENIZER_H_
|
||||
#define _FTS1_TOKENIZER_H_
|
||||
|
||||
/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
|
||||
** If tokenizers are to be allowed to call sqlite3_*() functions, then
|
||||
** we will need a way to register the API consistently.
|
||||
*/
|
||||
#include "sqlite3.h"
|
||||
|
||||
/*
|
||||
** Structures used by the tokenizer interface.
|
||||
*/
|
||||
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
|
||||
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
|
||||
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
|
||||
|
||||
struct sqlite3_tokenizer_module {
|
||||
int iVersion; /* currently 0 */
|
||||
|
||||
/*
|
||||
** Create and destroy a tokenizer. argc/argv are passed down from
|
||||
** the fulltext virtual table creation to allow customization.
|
||||
*/
|
||||
int (*xCreate)(int argc, const char *const*argv,
|
||||
sqlite3_tokenizer **ppTokenizer);
|
||||
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
|
||||
|
||||
/*
|
||||
** Tokenize a particular input. Call xOpen() to prepare to
|
||||
** tokenize, xNext() repeatedly until it returns SQLITE_DONE, then
|
||||
** xClose() to free any internal state. The pInput passed to
|
||||
** xOpen() must exist until the cursor is closed. The ppToken
|
||||
** result from xNext() is only valid until the next call to xNext()
|
||||
** or until xClose() is called.
|
||||
*/
|
||||
/* TODO(shess) current implementation requires pInput to be
|
||||
** nul-terminated. This should either be fixed, or pInput/nBytes
|
||||
** should be converted to zInput.
|
||||
*/
|
||||
int (*xOpen)(sqlite3_tokenizer *pTokenizer,
|
||||
const char *pInput, int nBytes,
|
||||
sqlite3_tokenizer_cursor **ppCursor);
|
||||
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
|
||||
int (*xNext)(sqlite3_tokenizer_cursor *pCursor,
|
||||
const char **ppToken, int *pnBytes,
|
||||
int *piStartOffset, int *piEndOffset, int *piPosition);
|
||||
};
|
||||
|
||||
struct sqlite3_tokenizer {
|
||||
const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
|
||||
/* Tokenizer implementations will typically add additional fields */
|
||||
};
|
||||
|
||||
struct sqlite3_tokenizer_cursor {
|
||||
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
|
||||
/* Tokenizer implementations will typically add additional fields */
|
||||
};
|
||||
|
||||
/*
|
||||
** Get the module for a tokenizer which generates tokens based on a
|
||||
** set of non-token characters. The default is to break tokens at any
|
||||
** non-alnum character, though the set of delimiters can also be
|
||||
** specified by the first argv argument to xCreate().
|
||||
*/
|
||||
/* TODO(shess) This doesn't belong here. Need some sort of
|
||||
** registration process.
|
||||
*/
|
||||
void sqlite3Fts1SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
|
||||
void sqlite3Fts1PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
|
||||
|
||||
#endif /* _FTS1_TOKENIZER_H_ */
|
||||
221
ext/fts1/fts1_tokenizer1.c
Normal file
221
ext/fts1/fts1_tokenizer1.c
Normal file
|
|
@ -0,0 +1,221 @@
|
|||
/*
|
||||
** The author disclaims copyright to this source code.
|
||||
**
|
||||
*************************************************************************
|
||||
** Implementation of the "simple" full-text-search tokenizer.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS1 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS1 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
|
||||
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "fts1_tokenizer.h"
|
||||
|
||||
typedef struct simple_tokenizer {
|
||||
sqlite3_tokenizer base;
|
||||
char delim[128]; /* flag ASCII delimiters */
|
||||
} simple_tokenizer;
|
||||
|
||||
typedef struct simple_tokenizer_cursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
const char *pInput; /* input we are tokenizing */
|
||||
int nBytes; /* size of the input */
|
||||
int iOffset; /* current position in pInput */
|
||||
int iToken; /* index of next token to be returned */
|
||||
char *pToken; /* storage for current token */
|
||||
int nTokenAllocated; /* space allocated to zToken buffer */
|
||||
} simple_tokenizer_cursor;
|
||||
|
||||
|
||||
/* Forward declaration */
|
||||
static const sqlite3_tokenizer_module simpleTokenizerModule;
|
||||
|
||||
static int isDelim(simple_tokenizer *t, unsigned char c){
|
||||
return c<0x80 && t->delim[c];
|
||||
}
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
*/
|
||||
static int simpleCreate(
|
||||
int argc, const char * const *argv,
|
||||
sqlite3_tokenizer **ppTokenizer
|
||||
){
|
||||
simple_tokenizer *t;
|
||||
|
||||
t = (simple_tokenizer *) calloc(sizeof(*t), 1);
|
||||
if( t==NULL ) return SQLITE_NOMEM;
|
||||
|
||||
/* TODO(shess) Delimiters need to remain the same from run to run,
|
||||
** else we need to reindex. One solution would be a meta-table to
|
||||
** track such information in the database, then we'd only want this
|
||||
** information on the initial create.
|
||||
*/
|
||||
if( argc>1 ){
|
||||
int i, n = strlen(argv[1]);
|
||||
for(i=0; i<n; i++){
|
||||
unsigned char ch = argv[1][i];
|
||||
/* We explicitly don't support UTF-8 delimiters for now. */
|
||||
if( ch>=0x80 ){
|
||||
free(t);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
t->delim[ch] = 1;
|
||||
}
|
||||
} else {
|
||||
/* Mark non-alphanumeric ASCII characters as delimiters */
|
||||
int i;
|
||||
for(i=1; i<0x80; i++){
|
||||
t->delim[i] = !isalnum(i);
|
||||
}
|
||||
}
|
||||
|
||||
*ppTokenizer = &t->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Destroy a tokenizer
|
||||
*/
|
||||
static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
free(pTokenizer);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Prepare to begin tokenizing a particular string. The input
|
||||
** string to be tokenized is pInput[0..nBytes-1]. A cursor
|
||||
** used to incrementally tokenize this string is returned in
|
||||
** *ppCursor.
|
||||
*/
|
||||
static int simpleOpen(
|
||||
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
|
||||
const char *pInput, int nBytes, /* String to be tokenized */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
|
||||
){
|
||||
simple_tokenizer_cursor *c;
|
||||
|
||||
c = (simple_tokenizer_cursor *) malloc(sizeof(*c));
|
||||
if( c==NULL ) return SQLITE_NOMEM;
|
||||
|
||||
c->pInput = pInput;
|
||||
if( pInput==0 ){
|
||||
c->nBytes = 0;
|
||||
}else if( nBytes<0 ){
|
||||
c->nBytes = (int)strlen(pInput);
|
||||
}else{
|
||||
c->nBytes = nBytes;
|
||||
}
|
||||
c->iOffset = 0; /* start tokenizing at the beginning */
|
||||
c->iToken = 0;
|
||||
c->pToken = NULL; /* no space allocated, yet. */
|
||||
c->nTokenAllocated = 0;
|
||||
|
||||
*ppCursor = &c->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close a tokenization cursor previously opened by a call to
|
||||
** simpleOpen() above.
|
||||
*/
|
||||
static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
|
||||
free(c->pToken);
|
||||
free(c);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Extract the next token from a tokenization cursor. The cursor must
|
||||
** have been opened by a prior call to simpleOpen().
|
||||
*/
|
||||
static int simpleNext(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
|
||||
const char **ppToken, /* OUT: *ppToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
|
||||
simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
|
||||
unsigned char *p = (unsigned char *)c->pInput;
|
||||
|
||||
while( c->iOffset<c->nBytes ){
|
||||
int iStartOffset;
|
||||
|
||||
/* Scan past delimiter characters */
|
||||
while( c->iOffset<c->nBytes && isDelim(t, p[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
/* Count non-delimiter characters. */
|
||||
iStartOffset = c->iOffset;
|
||||
while( c->iOffset<c->nBytes && !isDelim(t, p[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
if( c->iOffset>iStartOffset ){
|
||||
int i, n = c->iOffset-iStartOffset;
|
||||
if( n>c->nTokenAllocated ){
|
||||
c->nTokenAllocated = n+20;
|
||||
c->pToken = realloc(c->pToken, c->nTokenAllocated);
|
||||
if( c->pToken==NULL ) return SQLITE_NOMEM;
|
||||
}
|
||||
for(i=0; i<n; i++){
|
||||
/* TODO(shess) This needs expansion to handle UTF-8
|
||||
** case-insensitivity.
|
||||
*/
|
||||
unsigned char ch = p[iStartOffset+i];
|
||||
c->pToken[i] = ch<0x80 ? tolower(ch) : ch;
|
||||
}
|
||||
*ppToken = c->pToken;
|
||||
*pnBytes = n;
|
||||
*piStartOffset = iStartOffset;
|
||||
*piEndOffset = c->iOffset;
|
||||
*piPosition = c->iToken++;
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
}
|
||||
return SQLITE_DONE;
|
||||
}
|
||||
|
||||
/*
|
||||
** The set of routines that implement the simple tokenizer
|
||||
*/
|
||||
static const sqlite3_tokenizer_module simpleTokenizerModule = {
|
||||
0,
|
||||
simpleCreate,
|
||||
simpleDestroy,
|
||||
simpleOpen,
|
||||
simpleClose,
|
||||
simpleNext,
|
||||
};
|
||||
|
||||
/*
|
||||
** Allocate a new simple tokenizer. Return a pointer to the new
|
||||
** tokenizer in *ppModule
|
||||
*/
|
||||
void sqlite3Fts1SimpleTokenizerModule(
|
||||
sqlite3_tokenizer_module const**ppModule
|
||||
){
|
||||
*ppModule = &simpleTokenizerModule;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */
|
||||
1511
ext/fts1/fulltext.c
Normal file
1511
ext/fts1/fulltext.c
Normal file
File diff suppressed because it is too large
Load diff
11
ext/fts1/fulltext.h
Normal file
11
ext/fts1/fulltext.h
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
#include "sqlite3.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
int fulltext_init(sqlite3 *db);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif /* __cplusplus */
|
||||
174
ext/fts1/simple_tokenizer.c
Normal file
174
ext/fts1/simple_tokenizer.c
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
/*
|
||||
** The author disclaims copyright to this source code.
|
||||
**
|
||||
*************************************************************************
|
||||
** Implementation of the "simple" full-text-search tokenizer.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#if !defined(__APPLE__)
|
||||
#include <malloc.h>
|
||||
#else
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "tokenizer.h"
|
||||
|
||||
/* Duplicate a string; the caller must free() the returned string.
|
||||
* (We don't use strdup() since it's not part of the standard C library and
|
||||
* may not be available everywhere.) */
|
||||
/* TODO(shess) Copied from fulltext.c, consider util.c for such
|
||||
** things. */
|
||||
static char *string_dup(const char *s){
|
||||
char *str = malloc(strlen(s) + 1);
|
||||
strcpy(str, s);
|
||||
return str;
|
||||
}
|
||||
|
||||
typedef struct simple_tokenizer {
|
||||
sqlite3_tokenizer base;
|
||||
const char *zDelim; /* token delimiters */
|
||||
} simple_tokenizer;
|
||||
|
||||
typedef struct simple_tokenizer_cursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
const char *pInput; /* input we are tokenizing */
|
||||
int nBytes; /* size of the input */
|
||||
const char *pCurrent; /* current position in pInput */
|
||||
int iToken; /* index of next token to be returned */
|
||||
char *zToken; /* storage for current token */
|
||||
int nTokenBytes; /* actual size of current token */
|
||||
int nTokenAllocated; /* space allocated to zToken buffer */
|
||||
} simple_tokenizer_cursor;
|
||||
|
||||
static sqlite3_tokenizer_module simpleTokenizerModule;/* forward declaration */
|
||||
|
||||
static int simpleCreate(
|
||||
int argc, const char **argv,
|
||||
sqlite3_tokenizer **ppTokenizer
|
||||
){
|
||||
simple_tokenizer *t;
|
||||
|
||||
t = (simple_tokenizer *) malloc(sizeof(simple_tokenizer));
|
||||
/* TODO(shess) Delimiters need to remain the same from run to run,
|
||||
** else we need to reindex. One solution would be a meta-table to
|
||||
** track such information in the database, then we'd only want this
|
||||
** information on the initial create.
|
||||
*/
|
||||
if( argc>1 ){
|
||||
t->zDelim = string_dup(argv[1]);
|
||||
} else {
|
||||
/* Build a string excluding alphanumeric ASCII characters */
|
||||
char zDelim[0x80]; /* nul-terminated, so nul not a member */
|
||||
int i, j;
|
||||
for(i=1, j=0; i<0x80; i++){
|
||||
if( !isalnum(i) ){
|
||||
zDelim[j++] = i;
|
||||
}
|
||||
}
|
||||
zDelim[j++] = '\0';
|
||||
assert( j<=sizeof(zDelim) );
|
||||
t->zDelim = string_dup(zDelim);
|
||||
}
|
||||
|
||||
*ppTokenizer = &t->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
simple_tokenizer *t = (simple_tokenizer *) pTokenizer;
|
||||
|
||||
free((void *) t->zDelim);
|
||||
free(t);
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int simpleOpen(
|
||||
sqlite3_tokenizer *pTokenizer,
|
||||
const char *pInput, int nBytes,
|
||||
sqlite3_tokenizer_cursor **ppCursor
|
||||
){
|
||||
simple_tokenizer_cursor *c;
|
||||
|
||||
c = (simple_tokenizer_cursor *) malloc(sizeof(simple_tokenizer_cursor));
|
||||
c->pInput = pInput;
|
||||
c->nBytes = nBytes<0 ? (int) strlen(pInput) : nBytes;
|
||||
c->pCurrent = c->pInput; /* start tokenizing at the beginning */
|
||||
c->iToken = 0;
|
||||
c->zToken = NULL; /* no space allocated, yet. */
|
||||
c->nTokenBytes = 0;
|
||||
c->nTokenAllocated = 0;
|
||||
|
||||
*ppCursor = &c->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
|
||||
|
||||
if( NULL!=c->zToken ){
|
||||
free(c->zToken);
|
||||
}
|
||||
free(c);
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int simpleNext(
|
||||
sqlite3_tokenizer_cursor *pCursor,
|
||||
const char **ppToken, int *pnBytes,
|
||||
int *piStartOffset, int *piEndOffset, int *piPosition
|
||||
){
|
||||
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
|
||||
simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
|
||||
int ii;
|
||||
|
||||
while( c->pCurrent-c->pInput<c->nBytes ){
|
||||
int n = (int) strcspn(c->pCurrent, t->zDelim);
|
||||
if( n>0 ){
|
||||
if( n+1>c->nTokenAllocated ){
|
||||
c->zToken = realloc(c->zToken, n+1);
|
||||
}
|
||||
for(ii=0; ii<n; ii++){
|
||||
/* TODO(shess) This needs expansion to handle UTF-8
|
||||
** case-insensitivity.
|
||||
*/
|
||||
char ch = c->pCurrent[ii];
|
||||
c->zToken[ii] = (unsigned char)ch<0x80 ? tolower((unsigned char)ch):ch;
|
||||
}
|
||||
c->zToken[n] = '\0';
|
||||
*ppToken = c->zToken;
|
||||
*pnBytes = n;
|
||||
*piStartOffset = (int) (c->pCurrent-c->pInput);
|
||||
*piEndOffset = *piStartOffset+n;
|
||||
*piPosition = c->iToken++;
|
||||
c->pCurrent += n + 1;
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
c->pCurrent += n + 1;
|
||||
/* TODO(shess) could strspn() to skip delimiters en masse. Needs
|
||||
** to happen in two places, though, which is annoying.
|
||||
*/
|
||||
}
|
||||
return SQLITE_DONE;
|
||||
}
|
||||
|
||||
static sqlite3_tokenizer_module simpleTokenizerModule = {
|
||||
0,
|
||||
simpleCreate,
|
||||
simpleDestroy,
|
||||
simpleOpen,
|
||||
simpleClose,
|
||||
simpleNext,
|
||||
};
|
||||
|
||||
void get_simple_tokenizer_module(
|
||||
sqlite3_tokenizer_module **ppModule
|
||||
){
|
||||
*ppModule = &simpleTokenizerModule;
|
||||
}
|
||||
89
ext/fts1/tokenizer.h
Normal file
89
ext/fts1/tokenizer.h
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
/*
|
||||
** 2006 July 10
|
||||
**
|
||||
** The author disclaims copyright to this source code.
|
||||
**
|
||||
*************************************************************************
|
||||
** Defines the interface to tokenizers used by fulltext-search. There
|
||||
** are three basic components:
|
||||
**
|
||||
** sqlite3_tokenizer_module is a singleton defining the tokenizer
|
||||
** interface functions. This is essentially the class structure for
|
||||
** tokenizers.
|
||||
**
|
||||
** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
|
||||
** including customization information defined at creation time.
|
||||
**
|
||||
** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
|
||||
** tokens from a particular input.
|
||||
*/
|
||||
#ifndef _TOKENIZER_H_
|
||||
#define _TOKENIZER_H_
|
||||
|
||||
/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
|
||||
** If tokenizers are to be allowed to call sqlite3_*() functions, then
|
||||
** we will need a way to register the API consistently.
|
||||
*/
|
||||
#include "sqlite3.h"
|
||||
|
||||
/*
|
||||
** Structures used by the tokenizer interface.
|
||||
*/
|
||||
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
|
||||
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
|
||||
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
|
||||
|
||||
struct sqlite3_tokenizer_module {
|
||||
int iVersion; /* currently 0 */
|
||||
|
||||
/*
|
||||
** Create and destroy a tokenizer. argc/argv are passed down from
|
||||
** the fulltext virtual table creation to allow customization.
|
||||
*/
|
||||
int (*xCreate)(int argc, const char **argv,
|
||||
sqlite3_tokenizer **ppTokenizer);
|
||||
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
|
||||
|
||||
/*
|
||||
** Tokenize a particular input. Call xOpen() to prepare to
|
||||
** tokenize, xNext() repeatedly until it returns SQLITE_DONE, then
|
||||
** xClose() to free any internal state. The pInput passed to
|
||||
** xOpen() must exist until the cursor is closed. The ppToken
|
||||
** result from xNext() is only valid until the next call to xNext()
|
||||
** or until xClose() is called.
|
||||
*/
|
||||
/* TODO(shess) current implementation requires pInput to be
|
||||
** nul-terminated. This should either be fixed, or pInput/nBytes
|
||||
** should be converted to zInput.
|
||||
*/
|
||||
int (*xOpen)(sqlite3_tokenizer *pTokenizer,
|
||||
const char *pInput, int nBytes,
|
||||
sqlite3_tokenizer_cursor **ppCursor);
|
||||
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
|
||||
int (*xNext)(sqlite3_tokenizer_cursor *pCursor,
|
||||
const char **ppToken, int *pnBytes,
|
||||
int *piStartOffset, int *piEndOffset, int *piPosition);
|
||||
};
|
||||
|
||||
struct sqlite3_tokenizer {
|
||||
sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
|
||||
/* Tokenizer implementations will typically add additional fields */
|
||||
};
|
||||
|
||||
struct sqlite3_tokenizer_cursor {
|
||||
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
|
||||
/* Tokenizer implementations will typically add additional fields */
|
||||
};
|
||||
|
||||
/*
|
||||
** Get the module for a tokenizer which generates tokens based on a
|
||||
** set of non-token characters. The default is to break tokens at any
|
||||
** non-alnum character, though the set of delimiters can also be
|
||||
** specified by the first argv argument to xCreate().
|
||||
*/
|
||||
/* TODO(shess) This doesn't belong here. Need some sort of
|
||||
** registration process.
|
||||
*/
|
||||
void get_simple_tokenizer_module(sqlite3_tokenizer_module **ppModule);
|
||||
|
||||
#endif /* _TOKENIZER_H_ */
|
||||
133
ext/fts2/README.tokenizers
Normal file
133
ext/fts2/README.tokenizers
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
|
||||
1. FTS2 Tokenizers
|
||||
|
||||
When creating a new full-text table, FTS2 allows the user to select
|
||||
the text tokenizer implementation to be used when indexing text
|
||||
by specifying a "tokenizer" clause as part of the CREATE VIRTUAL TABLE
|
||||
statement:
|
||||
|
||||
CREATE VIRTUAL TABLE <table-name> USING fts2(
|
||||
<columns ...> [, tokenizer <tokenizer-name> [<tokenizer-args>]]
|
||||
);
|
||||
|
||||
The built-in tokenizers (valid values to pass as <tokenizer name>) are
|
||||
"simple" and "porter".
|
||||
|
||||
<tokenizer-args> should consist of zero or more white-space separated
|
||||
arguments to pass to the selected tokenizer implementation. The
|
||||
interpretation of the arguments, if any, depends on the individual
|
||||
tokenizer.
|
||||
|
||||
2. Custom Tokenizers
|
||||
|
||||
FTS2 allows users to provide custom tokenizer implementations. The
|
||||
interface used to create a new tokenizer is defined and described in
|
||||
the fts2_tokenizer.h source file.
|
||||
|
||||
Registering a new FTS2 tokenizer is similar to registering a new
|
||||
virtual table module with SQLite. The user passes a pointer to a
|
||||
structure containing pointers to various callback functions that
|
||||
make up the implementation of the new tokenizer type. For tokenizers,
|
||||
the structure (defined in fts2_tokenizer.h) is called
|
||||
"sqlite3_tokenizer_module".
|
||||
|
||||
FTS2 does not expose a C-function that users call to register new
|
||||
tokenizer types with a database handle. Instead, the pointer must
|
||||
be encoded as an SQL blob value and passed to FTS2 through the SQL
|
||||
engine by evaluating a special scalar function, "fts2_tokenizer()".
|
||||
The fts2_tokenizer() function may be called with one or two arguments,
|
||||
as follows:
|
||||
|
||||
SELECT fts2_tokenizer(<tokenizer-name>);
|
||||
SELECT fts2_tokenizer(<tokenizer-name>, <sqlite3_tokenizer_module ptr>);
|
||||
|
||||
Where <tokenizer-name> is a string identifying the tokenizer and
|
||||
<sqlite3_tokenizer_module ptr> is a pointer to an sqlite3_tokenizer_module
|
||||
structure encoded as an SQL blob. If the second argument is present,
|
||||
it is registered as tokenizer <tokenizer-name> and a copy of it
|
||||
returned. If only one argument is passed, a pointer to the tokenizer
|
||||
implementation currently registered as <tokenizer-name> is returned,
|
||||
encoded as a blob. Or, if no such tokenizer exists, an SQL exception
|
||||
(error) is raised.
|
||||
|
||||
SECURITY: If the fts2 extension is used in an environment where potentially
|
||||
malicious users may execute arbitrary SQL (i.e. gears), they should be
|
||||
prevented from invoking the fts2_tokenizer() function, possibly using the
|
||||
authorisation callback.
|
||||
|
||||
See "Sample code" below for an example of calling the fts2_tokenizer()
|
||||
function from C code.
|
||||
|
||||
3. ICU Library Tokenizers
|
||||
|
||||
If this extension is compiled with the SQLITE_ENABLE_ICU pre-processor
|
||||
symbol defined, then there exists a built-in tokenizer named "icu"
|
||||
implemented using the ICU library. The first argument passed to the
|
||||
xCreate() method (see fts2_tokenizer.h) of this tokenizer may be
|
||||
an ICU locale identifier. For example "tr_TR" for Turkish as used
|
||||
in Turkey, or "en_AU" for English as used in Australia. For example:
|
||||
|
||||
"CREATE VIRTUAL TABLE thai_text USING fts2(text, tokenizer icu th_TH)"
|
||||
|
||||
The ICU tokenizer implementation is very simple. It splits the input
|
||||
text according to the ICU rules for finding word boundaries and discards
|
||||
any tokens that consist entirely of white-space. This may be suitable
|
||||
for some applications in some locales, but not all. If more complex
|
||||
processing is required, for example to implement stemming or
|
||||
discard punctuation, this can be done by creating a tokenizer
|
||||
implementation that uses the ICU tokenizer as part of its implementation.
|
||||
|
||||
When using the ICU tokenizer this way, it is safe to overwrite the
|
||||
contents of the strings returned by the xNext() method (see
|
||||
fts2_tokenizer.h).
|
||||
|
||||
4. Sample code.
|
||||
|
||||
The following two code samples illustrate the way C code should invoke
|
||||
the fts2_tokenizer() scalar function:
|
||||
|
||||
int registerTokenizer(
|
||||
sqlite3 *db,
|
||||
char *zName,
|
||||
const sqlite3_tokenizer_module *p
|
||||
){
|
||||
int rc;
|
||||
sqlite3_stmt *pStmt;
|
||||
const char zSql[] = "SELECT fts2_tokenizer(?, ?)";
|
||||
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
|
||||
sqlite3_step(pStmt);
|
||||
|
||||
return sqlite3_finalize(pStmt);
|
||||
}
|
||||
|
||||
int queryTokenizer(
|
||||
sqlite3 *db,
|
||||
char *zName,
|
||||
const sqlite3_tokenizer_module **pp
|
||||
){
|
||||
int rc;
|
||||
sqlite3_stmt *pStmt;
|
||||
const char zSql[] = "SELECT fts2_tokenizer(?)";
|
||||
|
||||
*pp = 0;
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
|
||||
if( SQLITE_ROW==sqlite3_step(pStmt) ){
|
||||
if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
|
||||
memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
|
||||
}
|
||||
}
|
||||
|
||||
return sqlite3_finalize(pStmt);
|
||||
}
|
||||
4
ext/fts2/README.txt
Normal file
4
ext/fts2/README.txt
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
This folder contains source code to the second full-text search
|
||||
extension for SQLite. While the API is the same, this version uses a
|
||||
substantially different storage schema from fts1, so tables will need
|
||||
to be rebuilt.
|
||||
6860
ext/fts2/fts2.c
Normal file
6860
ext/fts2/fts2.c
Normal file
File diff suppressed because it is too large
Load diff
26
ext/fts2/fts2.h
Normal file
26
ext/fts2/fts2.h
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
/*
|
||||
** 2006 Oct 10
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** This header file is used by programs that want to link against the
|
||||
** FTS2 library. All it does is declare the sqlite3Fts2Init() interface.
|
||||
*/
|
||||
#include "sqlite3.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
int sqlite3Fts2Init(sqlite3 *db);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif /* __cplusplus */
|
||||
376
ext/fts2/fts2_hash.c
Normal file
376
ext/fts2/fts2_hash.c
Normal file
|
|
@ -0,0 +1,376 @@
|
|||
/*
|
||||
** 2001 September 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This is the implementation of generic hash-tables used in SQLite.
|
||||
** We've modified it slightly to serve as a standalone hash table
|
||||
** implementation for the full-text indexing module.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS2 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS2 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sqlite3.h"
|
||||
#include "sqlite3ext.h"
|
||||
SQLITE_EXTENSION_INIT3
|
||||
#include "fts2_hash.h"
|
||||
|
||||
/*
|
||||
** Malloc and Free functions
|
||||
*/
|
||||
static void *fts2HashMalloc(int n){
|
||||
void *p = sqlite3_malloc(n);
|
||||
if( p ){
|
||||
memset(p, 0, n);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
static void fts2HashFree(void *p){
|
||||
sqlite3_free(p);
|
||||
}
|
||||
|
||||
/* Turn bulk memory into a hash table object by initializing the
|
||||
** fields of the Hash structure.
|
||||
**
|
||||
** "pNew" is a pointer to the hash table that is to be initialized.
|
||||
** keyClass is one of the constants
|
||||
** FTS2_HASH_BINARY or FTS2_HASH_STRING. The value of keyClass
|
||||
** determines what kind of key the hash table will use. "copyKey" is
|
||||
** true if the hash table should make its own private copy of keys and
|
||||
** false if it should just use the supplied pointer.
|
||||
*/
|
||||
void sqlite3Fts2HashInit(fts2Hash *pNew, int keyClass, int copyKey){
|
||||
assert( pNew!=0 );
|
||||
assert( keyClass>=FTS2_HASH_STRING && keyClass<=FTS2_HASH_BINARY );
|
||||
pNew->keyClass = keyClass;
|
||||
pNew->copyKey = copyKey;
|
||||
pNew->first = 0;
|
||||
pNew->count = 0;
|
||||
pNew->htsize = 0;
|
||||
pNew->ht = 0;
|
||||
}
|
||||
|
||||
/* Remove all entries from a hash table. Reclaim all memory.
|
||||
** Call this routine to delete a hash table or to reset a hash table
|
||||
** to the empty state.
|
||||
*/
|
||||
void sqlite3Fts2HashClear(fts2Hash *pH){
|
||||
fts2HashElem *elem; /* For looping over all elements of the table */
|
||||
|
||||
assert( pH!=0 );
|
||||
elem = pH->first;
|
||||
pH->first = 0;
|
||||
fts2HashFree(pH->ht);
|
||||
pH->ht = 0;
|
||||
pH->htsize = 0;
|
||||
while( elem ){
|
||||
fts2HashElem *next_elem = elem->next;
|
||||
if( pH->copyKey && elem->pKey ){
|
||||
fts2HashFree(elem->pKey);
|
||||
}
|
||||
fts2HashFree(elem);
|
||||
elem = next_elem;
|
||||
}
|
||||
pH->count = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Hash and comparison functions when the mode is FTS2_HASH_STRING
|
||||
*/
|
||||
static int strHash(const void *pKey, int nKey){
|
||||
const char *z = (const char *)pKey;
|
||||
int h = 0;
|
||||
if( nKey<=0 ) nKey = (int) strlen(z);
|
||||
while( nKey > 0 ){
|
||||
h = (h<<3) ^ h ^ *z++;
|
||||
nKey--;
|
||||
}
|
||||
return h & 0x7fffffff;
|
||||
}
|
||||
static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( n1!=n2 ) return 1;
|
||||
return strncmp((const char*)pKey1,(const char*)pKey2,n1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Hash and comparison functions when the mode is FTS2_HASH_BINARY
|
||||
*/
|
||||
static int binHash(const void *pKey, int nKey){
|
||||
int h = 0;
|
||||
const char *z = (const char *)pKey;
|
||||
while( nKey-- > 0 ){
|
||||
h = (h<<3) ^ h ^ *(z++);
|
||||
}
|
||||
return h & 0x7fffffff;
|
||||
}
|
||||
static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( n1!=n2 ) return 1;
|
||||
return memcmp(pKey1,pKey2,n1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a pointer to the appropriate hash function given the key class.
|
||||
**
|
||||
** The C syntax in this function definition may be unfamilar to some
|
||||
** programmers, so we provide the following additional explanation:
|
||||
**
|
||||
** The name of the function is "hashFunction". The function takes a
|
||||
** single parameter "keyClass". The return value of hashFunction()
|
||||
** is a pointer to another function. Specifically, the return value
|
||||
** of hashFunction() is a pointer to a function that takes two parameters
|
||||
** with types "const void*" and "int" and returns an "int".
|
||||
*/
|
||||
static int (*hashFunction(int keyClass))(const void*,int){
|
||||
if( keyClass==FTS2_HASH_STRING ){
|
||||
return &strHash;
|
||||
}else{
|
||||
assert( keyClass==FTS2_HASH_BINARY );
|
||||
return &binHash;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a pointer to the appropriate hash function given the key class.
|
||||
**
|
||||
** For help in interpreted the obscure C code in the function definition,
|
||||
** see the header comment on the previous function.
|
||||
*/
|
||||
static int (*compareFunction(int keyClass))(const void*,int,const void*,int){
|
||||
if( keyClass==FTS2_HASH_STRING ){
|
||||
return &strCompare;
|
||||
}else{
|
||||
assert( keyClass==FTS2_HASH_BINARY );
|
||||
return &binCompare;
|
||||
}
|
||||
}
|
||||
|
||||
/* Link an element into the hash table
|
||||
*/
|
||||
static void insertElement(
|
||||
fts2Hash *pH, /* The complete hash table */
|
||||
struct _fts2ht *pEntry, /* The entry into which pNew is inserted */
|
||||
fts2HashElem *pNew /* The element to be inserted */
|
||||
){
|
||||
fts2HashElem *pHead; /* First element already in pEntry */
|
||||
pHead = pEntry->chain;
|
||||
if( pHead ){
|
||||
pNew->next = pHead;
|
||||
pNew->prev = pHead->prev;
|
||||
if( pHead->prev ){ pHead->prev->next = pNew; }
|
||||
else { pH->first = pNew; }
|
||||
pHead->prev = pNew;
|
||||
}else{
|
||||
pNew->next = pH->first;
|
||||
if( pH->first ){ pH->first->prev = pNew; }
|
||||
pNew->prev = 0;
|
||||
pH->first = pNew;
|
||||
}
|
||||
pEntry->count++;
|
||||
pEntry->chain = pNew;
|
||||
}
|
||||
|
||||
|
||||
/* Resize the hash table so that it cantains "new_size" buckets.
|
||||
** "new_size" must be a power of 2. The hash table might fail
|
||||
** to resize if sqliteMalloc() fails.
|
||||
*/
|
||||
static void rehash(fts2Hash *pH, int new_size){
|
||||
struct _fts2ht *new_ht; /* The new hash table */
|
||||
fts2HashElem *elem, *next_elem; /* For looping over existing elements */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( (new_size & (new_size-1))==0 );
|
||||
new_ht = (struct _fts2ht *)fts2HashMalloc( new_size*sizeof(struct _fts2ht) );
|
||||
if( new_ht==0 ) return;
|
||||
fts2HashFree(pH->ht);
|
||||
pH->ht = new_ht;
|
||||
pH->htsize = new_size;
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
for(elem=pH->first, pH->first=0; elem; elem = next_elem){
|
||||
int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
|
||||
next_elem = elem->next;
|
||||
insertElement(pH, &new_ht[h], elem);
|
||||
}
|
||||
}
|
||||
|
||||
/* This function (for internal use only) locates an element in an
|
||||
** hash table that matches the given key. The hash for this key has
|
||||
** already been computed and is passed as the 4th parameter.
|
||||
*/
|
||||
static fts2HashElem *findElementGivenHash(
|
||||
const fts2Hash *pH, /* The pH to be searched */
|
||||
const void *pKey, /* The key we are searching for */
|
||||
int nKey,
|
||||
int h /* The hash for this key. */
|
||||
){
|
||||
fts2HashElem *elem; /* Used to loop thru the element list */
|
||||
int count; /* Number of elements left to test */
|
||||
int (*xCompare)(const void*,int,const void*,int); /* comparison function */
|
||||
|
||||
if( pH->ht ){
|
||||
struct _fts2ht *pEntry = &pH->ht[h];
|
||||
elem = pEntry->chain;
|
||||
count = pEntry->count;
|
||||
xCompare = compareFunction(pH->keyClass);
|
||||
while( count-- && elem ){
|
||||
if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
|
||||
return elem;
|
||||
}
|
||||
elem = elem->next;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Remove a single entry from the hash table given a pointer to that
|
||||
** element and a hash on the element's key.
|
||||
*/
|
||||
static void removeElementGivenHash(
|
||||
fts2Hash *pH, /* The pH containing "elem" */
|
||||
fts2HashElem* elem, /* The element to be removed from the pH */
|
||||
int h /* Hash value for the element */
|
||||
){
|
||||
struct _fts2ht *pEntry;
|
||||
if( elem->prev ){
|
||||
elem->prev->next = elem->next;
|
||||
}else{
|
||||
pH->first = elem->next;
|
||||
}
|
||||
if( elem->next ){
|
||||
elem->next->prev = elem->prev;
|
||||
}
|
||||
pEntry = &pH->ht[h];
|
||||
if( pEntry->chain==elem ){
|
||||
pEntry->chain = elem->next;
|
||||
}
|
||||
pEntry->count--;
|
||||
if( pEntry->count<=0 ){
|
||||
pEntry->chain = 0;
|
||||
}
|
||||
if( pH->copyKey && elem->pKey ){
|
||||
fts2HashFree(elem->pKey);
|
||||
}
|
||||
fts2HashFree( elem );
|
||||
pH->count--;
|
||||
if( pH->count<=0 ){
|
||||
assert( pH->first==0 );
|
||||
assert( pH->count==0 );
|
||||
fts2HashClear(pH);
|
||||
}
|
||||
}
|
||||
|
||||
/* Attempt to locate an element of the hash table pH with a key
|
||||
** that matches pKey,nKey. Return the data for this element if it is
|
||||
** found, or NULL if there is no match.
|
||||
*/
|
||||
void *sqlite3Fts2HashFind(const fts2Hash *pH, const void *pKey, int nKey){
|
||||
int h; /* A hash on key */
|
||||
fts2HashElem *elem; /* The element that matches key */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
if( pH==0 || pH->ht==0 ) return 0;
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
assert( xHash!=0 );
|
||||
h = (*xHash)(pKey,nKey);
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1));
|
||||
return elem ? elem->data : 0;
|
||||
}
|
||||
|
||||
/* Insert an element into the hash table pH. The key is pKey,nKey
|
||||
** and the data is "data".
|
||||
**
|
||||
** If no element exists with a matching key, then a new
|
||||
** element is created. A copy of the key is made if the copyKey
|
||||
** flag is set. NULL is returned.
|
||||
**
|
||||
** If another element already exists with the same key, then the
|
||||
** new data replaces the old data and the old data is returned.
|
||||
** The key is not copied in this instance. If a malloc fails, then
|
||||
** the new data is returned and the hash table is unchanged.
|
||||
**
|
||||
** If the "data" parameter to this function is NULL, then the
|
||||
** element corresponding to "key" is removed from the hash table.
|
||||
*/
|
||||
void *sqlite3Fts2HashInsert(
|
||||
fts2Hash *pH, /* The hash table to insert into */
|
||||
const void *pKey, /* The key */
|
||||
int nKey, /* Number of bytes in the key */
|
||||
void *data /* The data */
|
||||
){
|
||||
int hraw; /* Raw hash value of the key */
|
||||
int h; /* the hash of the key modulo hash table size */
|
||||
fts2HashElem *elem; /* Used to loop thru the element list */
|
||||
fts2HashElem *new_elem; /* New element added to the pH */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( pH!=0 );
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
assert( xHash!=0 );
|
||||
hraw = (*xHash)(pKey, nKey);
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
h = hraw & (pH->htsize-1);
|
||||
elem = findElementGivenHash(pH,pKey,nKey,h);
|
||||
if( elem ){
|
||||
void *old_data = elem->data;
|
||||
if( data==0 ){
|
||||
removeElementGivenHash(pH,elem,h);
|
||||
}else{
|
||||
elem->data = data;
|
||||
}
|
||||
return old_data;
|
||||
}
|
||||
if( data==0 ) return 0;
|
||||
new_elem = (fts2HashElem*)fts2HashMalloc( sizeof(fts2HashElem) );
|
||||
if( new_elem==0 ) return data;
|
||||
if( pH->copyKey && pKey!=0 ){
|
||||
new_elem->pKey = fts2HashMalloc( nKey );
|
||||
if( new_elem->pKey==0 ){
|
||||
fts2HashFree(new_elem);
|
||||
return data;
|
||||
}
|
||||
memcpy((void*)new_elem->pKey, pKey, nKey);
|
||||
}else{
|
||||
new_elem->pKey = (void*)pKey;
|
||||
}
|
||||
new_elem->nKey = nKey;
|
||||
pH->count++;
|
||||
if( pH->htsize==0 ){
|
||||
rehash(pH,8);
|
||||
if( pH->htsize==0 ){
|
||||
pH->count = 0;
|
||||
fts2HashFree(new_elem);
|
||||
return data;
|
||||
}
|
||||
}
|
||||
if( pH->count > pH->htsize ){
|
||||
rehash(pH,pH->htsize*2);
|
||||
}
|
||||
assert( pH->htsize>0 );
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
h = hraw & (pH->htsize-1);
|
||||
insertElement(pH, &pH->ht[h], new_elem);
|
||||
new_elem->data = data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */
|
||||
110
ext/fts2/fts2_hash.h
Normal file
110
ext/fts2/fts2_hash.h
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
/*
|
||||
** 2001 September 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This is the header file for the generic hash-table implementation
|
||||
** used in SQLite. We've modified it slightly to serve as a standalone
|
||||
** hash table implementation for the full-text indexing module.
|
||||
**
|
||||
*/
|
||||
#ifndef _FTS2_HASH_H_
|
||||
#define _FTS2_HASH_H_
|
||||
|
||||
/* Forward declarations of structures. */
|
||||
typedef struct fts2Hash fts2Hash;
|
||||
typedef struct fts2HashElem fts2HashElem;
|
||||
|
||||
/* A complete hash table is an instance of the following structure.
|
||||
** The internals of this structure are intended to be opaque -- client
|
||||
** code should not attempt to access or modify the fields of this structure
|
||||
** directly. Change this structure only by using the routines below.
|
||||
** However, many of the "procedures" and "functions" for modifying and
|
||||
** accessing this structure are really macros, so we can't really make
|
||||
** this structure opaque.
|
||||
*/
|
||||
struct fts2Hash {
|
||||
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
|
||||
char copyKey; /* True if copy of key made on insert */
|
||||
int count; /* Number of entries in this table */
|
||||
fts2HashElem *first; /* The first element of the array */
|
||||
int htsize; /* Number of buckets in the hash table */
|
||||
struct _fts2ht { /* the hash table */
|
||||
int count; /* Number of entries with this hash */
|
||||
fts2HashElem *chain; /* Pointer to first entry with this hash */
|
||||
} *ht;
|
||||
};
|
||||
|
||||
/* Each element in the hash table is an instance of the following
|
||||
** structure. All elements are stored on a single doubly-linked list.
|
||||
**
|
||||
** Again, this structure is intended to be opaque, but it can't really
|
||||
** be opaque because it is used by macros.
|
||||
*/
|
||||
struct fts2HashElem {
|
||||
fts2HashElem *next, *prev; /* Next and previous elements in the table */
|
||||
void *data; /* Data associated with this element */
|
||||
void *pKey; int nKey; /* Key associated with this element */
|
||||
};
|
||||
|
||||
/*
|
||||
** There are 2 different modes of operation for a hash table:
|
||||
**
|
||||
** FTS2_HASH_STRING pKey points to a string that is nKey bytes long
|
||||
** (including the null-terminator, if any). Case
|
||||
** is respected in comparisons.
|
||||
**
|
||||
** FTS2_HASH_BINARY pKey points to binary data nKey bytes long.
|
||||
** memcmp() is used to compare keys.
|
||||
**
|
||||
** A copy of the key is made if the copyKey parameter to fts2HashInit is 1.
|
||||
*/
|
||||
#define FTS2_HASH_STRING 1
|
||||
#define FTS2_HASH_BINARY 2
|
||||
|
||||
/*
|
||||
** Access routines. To delete, insert a NULL pointer.
|
||||
*/
|
||||
void sqlite3Fts2HashInit(fts2Hash*, int keytype, int copyKey);
|
||||
void *sqlite3Fts2HashInsert(fts2Hash*, const void *pKey, int nKey, void *pData);
|
||||
void *sqlite3Fts2HashFind(const fts2Hash*, const void *pKey, int nKey);
|
||||
void sqlite3Fts2HashClear(fts2Hash*);
|
||||
|
||||
/*
|
||||
** Shorthand for the functions above
|
||||
*/
|
||||
#define fts2HashInit sqlite3Fts2HashInit
|
||||
#define fts2HashInsert sqlite3Fts2HashInsert
|
||||
#define fts2HashFind sqlite3Fts2HashFind
|
||||
#define fts2HashClear sqlite3Fts2HashClear
|
||||
|
||||
/*
|
||||
** Macros for looping over all elements of a hash table. The idiom is
|
||||
** like this:
|
||||
**
|
||||
** fts2Hash h;
|
||||
** fts2HashElem *p;
|
||||
** ...
|
||||
** for(p=fts2HashFirst(&h); p; p=fts2HashNext(p)){
|
||||
** SomeStructure *pData = fts2HashData(p);
|
||||
** // do something with pData
|
||||
** }
|
||||
*/
|
||||
#define fts2HashFirst(H) ((H)->first)
|
||||
#define fts2HashNext(E) ((E)->next)
|
||||
#define fts2HashData(E) ((E)->data)
|
||||
#define fts2HashKey(E) ((E)->pKey)
|
||||
#define fts2HashKeysize(E) ((E)->nKey)
|
||||
|
||||
/*
|
||||
** Number of entries in a hash table
|
||||
*/
|
||||
#define fts2HashCount(H) ((H)->count)
|
||||
|
||||
#endif /* _FTS2_HASH_H_ */
|
||||
260
ext/fts2/fts2_icu.c
Normal file
260
ext/fts2/fts2_icu.c
Normal file
|
|
@ -0,0 +1,260 @@
|
|||
/*
|
||||
** 2007 June 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This file implements a tokenizer for fts2 based on the ICU library.
|
||||
**
|
||||
** $Id: fts2_icu.c,v 1.3 2008/12/18 05:30:26 danielk1977 Exp $
|
||||
*/
|
||||
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
|
||||
#ifdef SQLITE_ENABLE_ICU
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include "fts2_tokenizer.h"
|
||||
|
||||
#include <unicode/ubrk.h>
|
||||
#include <unicode/ucol.h>
|
||||
#include <unicode/ustring.h>
|
||||
#include <unicode/utf16.h>
|
||||
|
||||
typedef struct IcuTokenizer IcuTokenizer;
|
||||
typedef struct IcuCursor IcuCursor;
|
||||
|
||||
struct IcuTokenizer {
|
||||
sqlite3_tokenizer base;
|
||||
char *zLocale;
|
||||
};
|
||||
|
||||
struct IcuCursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
|
||||
UBreakIterator *pIter; /* ICU break-iterator object */
|
||||
int nChar; /* Number of UChar elements in pInput */
|
||||
UChar *aChar; /* Copy of input using utf-16 encoding */
|
||||
int *aOffset; /* Offsets of each character in utf-8 input */
|
||||
|
||||
int nBuffer;
|
||||
char *zBuffer;
|
||||
|
||||
int iToken;
|
||||
};
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
*/
|
||||
static int icuCreate(
|
||||
int argc, /* Number of entries in argv[] */
|
||||
const char * const *argv, /* Tokenizer creation arguments */
|
||||
sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
|
||||
){
|
||||
IcuTokenizer *p;
|
||||
int n = 0;
|
||||
|
||||
if( argc>0 ){
|
||||
n = strlen(argv[0])+1;
|
||||
}
|
||||
p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n);
|
||||
if( !p ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
memset(p, 0, sizeof(IcuTokenizer));
|
||||
|
||||
if( n ){
|
||||
p->zLocale = (char *)&p[1];
|
||||
memcpy(p->zLocale, argv[0], n);
|
||||
}
|
||||
|
||||
*ppTokenizer = (sqlite3_tokenizer *)p;
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Destroy a tokenizer
|
||||
*/
|
||||
static int icuDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
|
||||
sqlite3_free(p);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Prepare to begin tokenizing a particular string. The input
|
||||
** string to be tokenized is pInput[0..nBytes-1]. A cursor
|
||||
** used to incrementally tokenize this string is returned in
|
||||
** *ppCursor.
|
||||
*/
|
||||
static int icuOpen(
|
||||
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
|
||||
const char *zInput, /* Input string */
|
||||
int nInput, /* Length of zInput in bytes */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
|
||||
){
|
||||
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
|
||||
IcuCursor *pCsr;
|
||||
|
||||
const int32_t opt = U_FOLD_CASE_DEFAULT;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int nChar;
|
||||
|
||||
UChar32 c;
|
||||
int iInput = 0;
|
||||
int iOut = 0;
|
||||
|
||||
*ppCursor = 0;
|
||||
|
||||
if( nInput<0 ){
|
||||
nInput = strlen(zInput);
|
||||
}
|
||||
nChar = nInput+1;
|
||||
pCsr = (IcuCursor *)sqlite3_malloc(
|
||||
sizeof(IcuCursor) + /* IcuCursor */
|
||||
((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */
|
||||
(nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */
|
||||
);
|
||||
if( !pCsr ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
memset(pCsr, 0, sizeof(IcuCursor));
|
||||
pCsr->aChar = (UChar *)&pCsr[1];
|
||||
pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3];
|
||||
|
||||
pCsr->aOffset[iOut] = iInput;
|
||||
U8_NEXT(zInput, iInput, nInput, c);
|
||||
while( c>0 ){
|
||||
int isError = 0;
|
||||
c = u_foldCase(c, opt);
|
||||
U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
|
||||
if( isError ){
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
pCsr->aOffset[iOut] = iInput;
|
||||
|
||||
if( iInput<nInput ){
|
||||
U8_NEXT(zInput, iInput, nInput, c);
|
||||
}else{
|
||||
c = 0;
|
||||
}
|
||||
}
|
||||
|
||||
pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
|
||||
if( !U_SUCCESS(status) ){
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
pCsr->nChar = iOut;
|
||||
|
||||
ubrk_first(pCsr->pIter);
|
||||
*ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close a tokenization cursor previously opened by a call to icuOpen().
|
||||
*/
|
||||
static int icuClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
IcuCursor *pCsr = (IcuCursor *)pCursor;
|
||||
ubrk_close(pCsr->pIter);
|
||||
sqlite3_free(pCsr->zBuffer);
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Extract the next token from a tokenization cursor.
|
||||
*/
|
||||
static int icuNext(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
|
||||
const char **ppToken, /* OUT: *ppToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
IcuCursor *pCsr = (IcuCursor *)pCursor;
|
||||
|
||||
int iStart = 0;
|
||||
int iEnd = 0;
|
||||
int nByte = 0;
|
||||
|
||||
while( iStart==iEnd ){
|
||||
UChar32 c;
|
||||
|
||||
iStart = ubrk_current(pCsr->pIter);
|
||||
iEnd = ubrk_next(pCsr->pIter);
|
||||
if( iEnd==UBRK_DONE ){
|
||||
return SQLITE_DONE;
|
||||
}
|
||||
|
||||
while( iStart<iEnd ){
|
||||
int iWhite = iStart;
|
||||
U8_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
|
||||
if( u_isspace(c) ){
|
||||
iStart = iWhite;
|
||||
}else{
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert(iStart<=iEnd);
|
||||
}
|
||||
|
||||
do {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
if( nByte ){
|
||||
char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
|
||||
if( !zNew ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
pCsr->zBuffer = zNew;
|
||||
pCsr->nBuffer = nByte;
|
||||
}
|
||||
|
||||
u_strToUTF8(
|
||||
pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */
|
||||
&pCsr->aChar[iStart], iEnd-iStart, /* Input vars */
|
||||
&status /* Output success/failure */
|
||||
);
|
||||
} while( nByte>pCsr->nBuffer );
|
||||
|
||||
*ppToken = pCsr->zBuffer;
|
||||
*pnBytes = nByte;
|
||||
*piStartOffset = pCsr->aOffset[iStart];
|
||||
*piEndOffset = pCsr->aOffset[iEnd];
|
||||
*piPosition = pCsr->iToken++;
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** The set of routines that implement the simple tokenizer
|
||||
*/
|
||||
static const sqlite3_tokenizer_module icuTokenizerModule = {
|
||||
0, /* iVersion */
|
||||
icuCreate, /* xCreate */
|
||||
icuDestroy, /* xCreate */
|
||||
icuOpen, /* xOpen */
|
||||
icuClose, /* xClose */
|
||||
icuNext, /* xNext */
|
||||
};
|
||||
|
||||
/*
|
||||
** Set *ppModule to point at the implementation of the ICU tokenizer.
|
||||
*/
|
||||
void sqlite3Fts2IcuTokenizerModule(
|
||||
sqlite3_tokenizer_module const**ppModule
|
||||
){
|
||||
*ppModule = &icuTokenizerModule;
|
||||
}
|
||||
|
||||
#endif /* defined(SQLITE_ENABLE_ICU) */
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */
|
||||
644
ext/fts2/fts2_porter.c
Normal file
644
ext/fts2/fts2_porter.c
Normal file
|
|
@ -0,0 +1,644 @@
|
|||
/*
|
||||
** 2006 September 30
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** Implementation of the full-text-search tokenizer that implements
|
||||
** a Porter stemmer.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS2 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS2 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
|
||||
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sqlite3.h"
|
||||
#include "sqlite3ext.h"
|
||||
SQLITE_EXTENSION_INIT3
|
||||
#include "fts2_tokenizer.h"
|
||||
|
||||
/*
|
||||
** Class derived from sqlite3_tokenizer
|
||||
*/
|
||||
typedef struct porter_tokenizer {
|
||||
sqlite3_tokenizer base; /* Base class */
|
||||
} porter_tokenizer;
|
||||
|
||||
/*
|
||||
** Class derived from sqlit3_tokenizer_cursor
|
||||
*/
|
||||
typedef struct porter_tokenizer_cursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
const char *zInput; /* input we are tokenizing */
|
||||
int nInput; /* size of the input */
|
||||
int iOffset; /* current position in zInput */
|
||||
int iToken; /* index of next token to be returned */
|
||||
char *zToken; /* storage for current token */
|
||||
int nAllocated; /* space allocated to zToken buffer */
|
||||
} porter_tokenizer_cursor;
|
||||
|
||||
|
||||
/* Forward declaration */
|
||||
static const sqlite3_tokenizer_module porterTokenizerModule;
|
||||
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
*/
|
||||
static int porterCreate(
|
||||
int argc, const char * const *argv,
|
||||
sqlite3_tokenizer **ppTokenizer
|
||||
){
|
||||
porter_tokenizer *t;
|
||||
t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t));
|
||||
if( t==NULL ) return SQLITE_NOMEM;
|
||||
memset(t, 0, sizeof(*t));
|
||||
*ppTokenizer = &t->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Destroy a tokenizer
|
||||
*/
|
||||
static int porterDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
sqlite3_free(pTokenizer);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Prepare to begin tokenizing a particular string. The input
|
||||
** string to be tokenized is zInput[0..nInput-1]. A cursor
|
||||
** used to incrementally tokenize this string is returned in
|
||||
** *ppCursor.
|
||||
*/
|
||||
static int porterOpen(
|
||||
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
|
||||
const char *zInput, int nInput, /* String to be tokenized */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
|
||||
){
|
||||
porter_tokenizer_cursor *c;
|
||||
|
||||
c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
|
||||
if( c==NULL ) return SQLITE_NOMEM;
|
||||
|
||||
c->zInput = zInput;
|
||||
if( zInput==0 ){
|
||||
c->nInput = 0;
|
||||
}else if( nInput<0 ){
|
||||
c->nInput = (int)strlen(zInput);
|
||||
}else{
|
||||
c->nInput = nInput;
|
||||
}
|
||||
c->iOffset = 0; /* start tokenizing at the beginning */
|
||||
c->iToken = 0;
|
||||
c->zToken = NULL; /* no space allocated, yet. */
|
||||
c->nAllocated = 0;
|
||||
|
||||
*ppCursor = &c->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close a tokenization cursor previously opened by a call to
|
||||
** porterOpen() above.
|
||||
*/
|
||||
static int porterClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
|
||||
sqlite3_free(c->zToken);
|
||||
sqlite3_free(c);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
/*
|
||||
** Vowel or consonant
|
||||
*/
|
||||
static const char cType[] = {
|
||||
0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
|
||||
1, 1, 1, 2, 1
|
||||
};
|
||||
|
||||
/*
|
||||
** isConsonant() and isVowel() determine if their first character in
|
||||
** the string they point to is a consonant or a vowel, according
|
||||
** to Porter ruls.
|
||||
**
|
||||
** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.
|
||||
** 'Y' is a consonant unless it follows another consonant,
|
||||
** in which case it is a vowel.
|
||||
**
|
||||
** In these routine, the letters are in reverse order. So the 'y' rule
|
||||
** is that 'y' is a consonant unless it is followed by another
|
||||
** consonent.
|
||||
*/
|
||||
static int isVowel(const char*);
|
||||
static int isConsonant(const char *z){
|
||||
int j;
|
||||
char x = *z;
|
||||
if( x==0 ) return 0;
|
||||
assert( x>='a' && x<='z' );
|
||||
j = cType[x-'a'];
|
||||
if( j<2 ) return j;
|
||||
return z[1]==0 || isVowel(z + 1);
|
||||
}
|
||||
static int isVowel(const char *z){
|
||||
int j;
|
||||
char x = *z;
|
||||
if( x==0 ) return 0;
|
||||
assert( x>='a' && x<='z' );
|
||||
j = cType[x-'a'];
|
||||
if( j<2 ) return 1-j;
|
||||
return isConsonant(z + 1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Let any sequence of one or more vowels be represented by V and let
|
||||
** C be sequence of one or more consonants. Then every word can be
|
||||
** represented as:
|
||||
**
|
||||
** [C] (VC){m} [V]
|
||||
**
|
||||
** In prose: A word is an optional consonant followed by zero or
|
||||
** vowel-consonant pairs followed by an optional vowel. "m" is the
|
||||
** number of vowel consonant pairs. This routine computes the value
|
||||
** of m for the first i bytes of a word.
|
||||
**
|
||||
** Return true if the m-value for z is 1 or more. In other words,
|
||||
** return true if z contains at least one vowel that is followed
|
||||
** by a consonant.
|
||||
**
|
||||
** In this routine z[] is in reverse order. So we are really looking
|
||||
** for an instance of of a consonant followed by a vowel.
|
||||
*/
|
||||
static int m_gt_0(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/* Like mgt0 above except we are looking for a value of m which is
|
||||
** exactly 1
|
||||
*/
|
||||
static int m_eq_1(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 1;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z==0;
|
||||
}
|
||||
|
||||
/* Like mgt0 above except we are looking for a value of m>1 instead
|
||||
** or m>0
|
||||
*/
|
||||
static int m_gt_1(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if there is a vowel anywhere within z[0..n-1]
|
||||
*/
|
||||
static int hasVowel(const char *z){
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if the word ends in a double consonant.
|
||||
**
|
||||
** The text is reversed here. So we are really looking at
|
||||
** the first two characters of z[].
|
||||
*/
|
||||
static int doubleConsonant(const char *z){
|
||||
return isConsonant(z) && z[0]==z[1] && isConsonant(z+1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if the word ends with three letters which
|
||||
** are consonant-vowel-consonent and where the final consonant
|
||||
** is not 'w', 'x', or 'y'.
|
||||
**
|
||||
** The word is reversed here. So we are really checking the
|
||||
** first three letters and the first one cannot be in [wxy].
|
||||
*/
|
||||
static int star_oh(const char *z){
|
||||
return
|
||||
z[0]!=0 && isConsonant(z) &&
|
||||
z[0]!='w' && z[0]!='x' && z[0]!='y' &&
|
||||
z[1]!=0 && isVowel(z+1) &&
|
||||
z[2]!=0 && isConsonant(z+2);
|
||||
}
|
||||
|
||||
/*
|
||||
** If the word ends with zFrom and xCond() is true for the stem
|
||||
** of the word that preceeds the zFrom ending, then change the
|
||||
** ending to zTo.
|
||||
**
|
||||
** The input word *pz and zFrom are both in reverse order. zTo
|
||||
** is in normal order.
|
||||
**
|
||||
** Return TRUE if zFrom matches. Return FALSE if zFrom does not
|
||||
** match. Not that TRUE is returned even if xCond() fails and
|
||||
** no substitution occurs.
|
||||
*/
|
||||
static int stem(
|
||||
char **pz, /* The word being stemmed (Reversed) */
|
||||
const char *zFrom, /* If the ending matches this... (Reversed) */
|
||||
const char *zTo, /* ... change the ending to this (not reversed) */
|
||||
int (*xCond)(const char*) /* Condition that must be true */
|
||||
){
|
||||
char *z = *pz;
|
||||
while( *zFrom && *zFrom==*z ){ z++; zFrom++; }
|
||||
if( *zFrom!=0 ) return 0;
|
||||
if( xCond && !xCond(z) ) return 1;
|
||||
while( *zTo ){
|
||||
*(--z) = *(zTo++);
|
||||
}
|
||||
*pz = z;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
** This is the fallback stemmer used when the porter stemmer is
|
||||
** inappropriate. The input word is copied into the output with
|
||||
** US-ASCII case folding. If the input word is too long (more
|
||||
** than 20 bytes if it contains no digits or more than 6 bytes if
|
||||
** it contains digits) then word is truncated to 20 or 6 bytes
|
||||
** by taking 10 or 3 bytes from the beginning and end.
|
||||
*/
|
||||
static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
|
||||
int i, mx, j;
|
||||
int hasDigit = 0;
|
||||
for(i=0; i<nIn; i++){
|
||||
int c = zIn[i];
|
||||
if( c>='A' && c<='Z' ){
|
||||
zOut[i] = c - 'A' + 'a';
|
||||
}else{
|
||||
if( c>='0' && c<='9' ) hasDigit = 1;
|
||||
zOut[i] = c;
|
||||
}
|
||||
}
|
||||
mx = hasDigit ? 3 : 10;
|
||||
if( nIn>mx*2 ){
|
||||
for(j=mx, i=nIn-mx; i<nIn; i++, j++){
|
||||
zOut[j] = zOut[i];
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
zOut[i] = 0;
|
||||
*pnOut = i;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Stem the input word zIn[0..nIn-1]. Store the output in zOut.
|
||||
** zOut is at least big enough to hold nIn bytes. Write the actual
|
||||
** size of the output word (exclusive of the '\0' terminator) into *pnOut.
|
||||
**
|
||||
** Any upper-case characters in the US-ASCII character set ([A-Z])
|
||||
** are converted to lower case. Upper-case UTF characters are
|
||||
** unchanged.
|
||||
**
|
||||
** Words that are longer than about 20 bytes are stemmed by retaining
|
||||
** a few bytes from the beginning and the end of the word. If the
|
||||
** word contains digits, 3 bytes are taken from the beginning and
|
||||
** 3 bytes from the end. For long words without digits, 10 bytes
|
||||
** are taken from each end. US-ASCII case folding still applies.
|
||||
**
|
||||
** If the input word contains not digits but does characters not
|
||||
** in [a-zA-Z] then no stemming is attempted and this routine just
|
||||
** copies the input into the input into the output with US-ASCII
|
||||
** case folding.
|
||||
**
|
||||
** Stemming never increases the length of the word. So there is
|
||||
** no chance of overflowing the zOut buffer.
|
||||
*/
|
||||
static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
|
||||
int i, j, c;
|
||||
char zReverse[28];
|
||||
char *z, *z2;
|
||||
if( nIn<3 || nIn>=sizeof(zReverse)-7 ){
|
||||
/* The word is too big or too small for the porter stemmer.
|
||||
** Fallback to the copy stemmer */
|
||||
copy_stemmer(zIn, nIn, zOut, pnOut);
|
||||
return;
|
||||
}
|
||||
for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){
|
||||
c = zIn[i];
|
||||
if( c>='A' && c<='Z' ){
|
||||
zReverse[j] = c + 'a' - 'A';
|
||||
}else if( c>='a' && c<='z' ){
|
||||
zReverse[j] = c;
|
||||
}else{
|
||||
/* The use of a character not in [a-zA-Z] means that we fallback
|
||||
** to the copy stemmer */
|
||||
copy_stemmer(zIn, nIn, zOut, pnOut);
|
||||
return;
|
||||
}
|
||||
}
|
||||
memset(&zReverse[sizeof(zReverse)-5], 0, 5);
|
||||
z = &zReverse[j+1];
|
||||
|
||||
|
||||
/* Step 1a */
|
||||
if( z[0]=='s' ){
|
||||
if(
|
||||
!stem(&z, "sess", "ss", 0) &&
|
||||
!stem(&z, "sei", "i", 0) &&
|
||||
!stem(&z, "ss", "ss", 0)
|
||||
){
|
||||
z++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 1b */
|
||||
z2 = z;
|
||||
if( stem(&z, "dee", "ee", m_gt_0) ){
|
||||
/* Do nothing. The work was all in the test */
|
||||
}else if(
|
||||
(stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel))
|
||||
&& z!=z2
|
||||
){
|
||||
if( stem(&z, "ta", "ate", 0) ||
|
||||
stem(&z, "lb", "ble", 0) ||
|
||||
stem(&z, "zi", "ize", 0) ){
|
||||
/* Do nothing. The work was all in the test */
|
||||
}else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){
|
||||
z++;
|
||||
}else if( m_eq_1(z) && star_oh(z) ){
|
||||
*(--z) = 'e';
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 1c */
|
||||
if( z[0]=='y' && hasVowel(z+1) ){
|
||||
z[0] = 'i';
|
||||
}
|
||||
|
||||
/* Step 2 */
|
||||
switch( z[1] ){
|
||||
case 'a':
|
||||
stem(&z, "lanoita", "ate", m_gt_0) ||
|
||||
stem(&z, "lanoit", "tion", m_gt_0);
|
||||
break;
|
||||
case 'c':
|
||||
stem(&z, "icne", "ence", m_gt_0) ||
|
||||
stem(&z, "icna", "ance", m_gt_0);
|
||||
break;
|
||||
case 'e':
|
||||
stem(&z, "rezi", "ize", m_gt_0);
|
||||
break;
|
||||
case 'g':
|
||||
stem(&z, "igol", "log", m_gt_0);
|
||||
break;
|
||||
case 'l':
|
||||
stem(&z, "ilb", "ble", m_gt_0) ||
|
||||
stem(&z, "illa", "al", m_gt_0) ||
|
||||
stem(&z, "iltne", "ent", m_gt_0) ||
|
||||
stem(&z, "ile", "e", m_gt_0) ||
|
||||
stem(&z, "ilsuo", "ous", m_gt_0);
|
||||
break;
|
||||
case 'o':
|
||||
stem(&z, "noitazi", "ize", m_gt_0) ||
|
||||
stem(&z, "noita", "ate", m_gt_0) ||
|
||||
stem(&z, "rota", "ate", m_gt_0);
|
||||
break;
|
||||
case 's':
|
||||
stem(&z, "msila", "al", m_gt_0) ||
|
||||
stem(&z, "ssenevi", "ive", m_gt_0) ||
|
||||
stem(&z, "ssenluf", "ful", m_gt_0) ||
|
||||
stem(&z, "ssensuo", "ous", m_gt_0);
|
||||
break;
|
||||
case 't':
|
||||
stem(&z, "itila", "al", m_gt_0) ||
|
||||
stem(&z, "itivi", "ive", m_gt_0) ||
|
||||
stem(&z, "itilib", "ble", m_gt_0);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 3 */
|
||||
switch( z[0] ){
|
||||
case 'e':
|
||||
stem(&z, "etaci", "ic", m_gt_0) ||
|
||||
stem(&z, "evita", "", m_gt_0) ||
|
||||
stem(&z, "ezila", "al", m_gt_0);
|
||||
break;
|
||||
case 'i':
|
||||
stem(&z, "itici", "ic", m_gt_0);
|
||||
break;
|
||||
case 'l':
|
||||
stem(&z, "laci", "ic", m_gt_0) ||
|
||||
stem(&z, "luf", "", m_gt_0);
|
||||
break;
|
||||
case 's':
|
||||
stem(&z, "ssen", "", m_gt_0);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 4 */
|
||||
switch( z[1] ){
|
||||
case 'a':
|
||||
if( z[0]=='l' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'c':
|
||||
if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){
|
||||
z += 4;
|
||||
}
|
||||
break;
|
||||
case 'e':
|
||||
if( z[0]=='r' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'i':
|
||||
if( z[0]=='c' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'l':
|
||||
if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){
|
||||
z += 4;
|
||||
}
|
||||
break;
|
||||
case 'n':
|
||||
if( z[0]=='t' ){
|
||||
if( z[2]=='a' ){
|
||||
if( m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
}else if( z[2]=='e' ){
|
||||
stem(&z, "tneme", "", m_gt_1) ||
|
||||
stem(&z, "tnem", "", m_gt_1) ||
|
||||
stem(&z, "tne", "", m_gt_1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 'o':
|
||||
if( z[0]=='u' ){
|
||||
if( m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
}else if( z[3]=='s' || z[3]=='t' ){
|
||||
stem(&z, "noi", "", m_gt_1);
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
case 't':
|
||||
stem(&z, "eta", "", m_gt_1) ||
|
||||
stem(&z, "iti", "", m_gt_1);
|
||||
break;
|
||||
case 'u':
|
||||
if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
case 'v':
|
||||
case 'z':
|
||||
if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 5a */
|
||||
if( z[0]=='e' ){
|
||||
if( m_gt_1(z+1) ){
|
||||
z++;
|
||||
}else if( m_eq_1(z+1) && !star_oh(z+1) ){
|
||||
z++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 5b */
|
||||
if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){
|
||||
z++;
|
||||
}
|
||||
|
||||
/* z[] is now the stemmed word in reverse order. Flip it back
|
||||
** around into forward order and return.
|
||||
*/
|
||||
*pnOut = i = strlen(z);
|
||||
zOut[i] = 0;
|
||||
while( *z ){
|
||||
zOut[--i] = *(z++);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Characters that can be part of a token. We assume any character
|
||||
** whose value is greater than 0x80 (any UTF character) can be
|
||||
** part of a token. In other words, delimiters all must have
|
||||
** values of 0x7f or lower.
|
||||
*/
|
||||
static const char porterIdChar[] = {
|
||||
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
|
||||
};
|
||||
#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !porterIdChar[ch-0x30]))
|
||||
|
||||
/*
|
||||
** Extract the next token from a tokenization cursor. The cursor must
|
||||
** have been opened by a prior call to porterOpen().
|
||||
*/
|
||||
static int porterNext(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */
|
||||
const char **pzToken, /* OUT: *pzToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
|
||||
const char *z = c->zInput;
|
||||
|
||||
while( c->iOffset<c->nInput ){
|
||||
int iStartOffset, ch;
|
||||
|
||||
/* Scan past delimiter characters */
|
||||
while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
/* Count non-delimiter characters. */
|
||||
iStartOffset = c->iOffset;
|
||||
while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
if( c->iOffset>iStartOffset ){
|
||||
int n = c->iOffset-iStartOffset;
|
||||
if( n>c->nAllocated ){
|
||||
c->nAllocated = n+20;
|
||||
c->zToken = sqlite3_realloc(c->zToken, c->nAllocated);
|
||||
if( c->zToken==NULL ) return SQLITE_NOMEM;
|
||||
}
|
||||
porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
|
||||
*pzToken = c->zToken;
|
||||
*piStartOffset = iStartOffset;
|
||||
*piEndOffset = c->iOffset;
|
||||
*piPosition = c->iToken++;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
}
|
||||
return SQLITE_DONE;
|
||||
}
|
||||
|
||||
/*
|
||||
** The set of routines that implement the porter-stemmer tokenizer
|
||||
*/
|
||||
static const sqlite3_tokenizer_module porterTokenizerModule = {
|
||||
0,
|
||||
porterCreate,
|
||||
porterDestroy,
|
||||
porterOpen,
|
||||
porterClose,
|
||||
porterNext,
|
||||
};
|
||||
|
||||
/*
|
||||
** Allocate a new porter tokenizer. Return a pointer to the new
|
||||
** tokenizer in *ppModule
|
||||
*/
|
||||
void sqlite3Fts2PorterTokenizerModule(
|
||||
sqlite3_tokenizer_module const**ppModule
|
||||
){
|
||||
*ppModule = &porterTokenizerModule;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */
|
||||
375
ext/fts2/fts2_tokenizer.c
Normal file
375
ext/fts2/fts2_tokenizer.c
Normal file
|
|
@ -0,0 +1,375 @@
|
|||
/*
|
||||
** 2007 June 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** This is part of an SQLite module implementing full-text search.
|
||||
** This particular file implements the generic tokenizer interface.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS2 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS2 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
|
||||
|
||||
|
||||
#include "sqlite3.h"
|
||||
#include "sqlite3ext.h"
|
||||
SQLITE_EXTENSION_INIT3
|
||||
|
||||
#include "fts2_hash.h"
|
||||
#include "fts2_tokenizer.h"
|
||||
#include <assert.h>
|
||||
|
||||
/*
|
||||
** Implementation of the SQL scalar function for accessing the underlying
|
||||
** hash table. This function may be called as follows:
|
||||
**
|
||||
** SELECT <function-name>(<key-name>);
|
||||
** SELECT <function-name>(<key-name>, <pointer>);
|
||||
**
|
||||
** where <function-name> is the name passed as the second argument
|
||||
** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer').
|
||||
**
|
||||
** If the <pointer> argument is specified, it must be a blob value
|
||||
** containing a pointer to be stored as the hash data corresponding
|
||||
** to the string <key-name>. If <pointer> is not specified, then
|
||||
** the string <key-name> must already exist in the has table. Otherwise,
|
||||
** an error is returned.
|
||||
**
|
||||
** Whether or not the <pointer> argument is specified, the value returned
|
||||
** is a blob containing the pointer stored as the hash data corresponding
|
||||
** to string <key-name> (after the hash-table is updated, if applicable).
|
||||
*/
|
||||
static void scalarFunc(
|
||||
sqlite3_context *context,
|
||||
int argc,
|
||||
sqlite3_value **argv
|
||||
){
|
||||
fts2Hash *pHash;
|
||||
void *pPtr = 0;
|
||||
const unsigned char *zName;
|
||||
int nName;
|
||||
|
||||
assert( argc==1 || argc==2 );
|
||||
|
||||
pHash = (fts2Hash *)sqlite3_user_data(context);
|
||||
|
||||
zName = sqlite3_value_text(argv[0]);
|
||||
nName = sqlite3_value_bytes(argv[0])+1;
|
||||
|
||||
if( argc==2 ){
|
||||
void *pOld;
|
||||
int n = sqlite3_value_bytes(argv[1]);
|
||||
if( n!=sizeof(pPtr) ){
|
||||
sqlite3_result_error(context, "argument type mismatch", -1);
|
||||
return;
|
||||
}
|
||||
pPtr = *(void **)sqlite3_value_blob(argv[1]);
|
||||
pOld = sqlite3Fts2HashInsert(pHash, (void *)zName, nName, pPtr);
|
||||
if( pOld==pPtr ){
|
||||
sqlite3_result_error(context, "out of memory", -1);
|
||||
return;
|
||||
}
|
||||
}else{
|
||||
pPtr = sqlite3Fts2HashFind(pHash, zName, nName);
|
||||
if( !pPtr ){
|
||||
char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
|
||||
sqlite3_result_error(context, zErr, -1);
|
||||
sqlite3_free(zErr);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
|
||||
}
|
||||
|
||||
#ifdef SQLITE_TEST
|
||||
|
||||
#if defined(INCLUDE_SQLITE_TCL_H)
|
||||
# include "sqlite_tcl.h"
|
||||
#else
|
||||
# include "tcl.h"
|
||||
#endif
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
** Implementation of a special SQL scalar function for testing tokenizers
|
||||
** designed to be used in concert with the Tcl testing framework. This
|
||||
** function must be called with two arguments:
|
||||
**
|
||||
** SELECT <function-name>(<key-name>, <input-string>);
|
||||
** SELECT <function-name>(<key-name>, <pointer>);
|
||||
**
|
||||
** where <function-name> is the name passed as the second argument
|
||||
** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer')
|
||||
** concatenated with the string '_test' (e.g. 'fts2_tokenizer_test').
|
||||
**
|
||||
** The return value is a string that may be interpreted as a Tcl
|
||||
** list. For each token in the <input-string>, three elements are
|
||||
** added to the returned list. The first is the token position, the
|
||||
** second is the token text (folded, stemmed, etc.) and the third is the
|
||||
** substring of <input-string> associated with the token. For example,
|
||||
** using the built-in "simple" tokenizer:
|
||||
**
|
||||
** SELECT fts_tokenizer_test('simple', 'I don't see how');
|
||||
**
|
||||
** will return the string:
|
||||
**
|
||||
** "{0 i I 1 dont don't 2 see see 3 how how}"
|
||||
**
|
||||
*/
|
||||
static void testFunc(
|
||||
sqlite3_context *context,
|
||||
int argc,
|
||||
sqlite3_value **argv
|
||||
){
|
||||
fts2Hash *pHash;
|
||||
sqlite3_tokenizer_module *p;
|
||||
sqlite3_tokenizer *pTokenizer = 0;
|
||||
sqlite3_tokenizer_cursor *pCsr = 0;
|
||||
|
||||
const char *zErr = 0;
|
||||
|
||||
const char *zName;
|
||||
int nName;
|
||||
const char *zInput;
|
||||
int nInput;
|
||||
|
||||
const char *zArg = 0;
|
||||
|
||||
const char *zToken;
|
||||
int nToken;
|
||||
int iStart;
|
||||
int iEnd;
|
||||
int iPos;
|
||||
|
||||
Tcl_Obj *pRet;
|
||||
|
||||
assert( argc==2 || argc==3 );
|
||||
|
||||
nName = sqlite3_value_bytes(argv[0]);
|
||||
zName = (const char *)sqlite3_value_text(argv[0]);
|
||||
nInput = sqlite3_value_bytes(argv[argc-1]);
|
||||
zInput = (const char *)sqlite3_value_text(argv[argc-1]);
|
||||
|
||||
if( argc==3 ){
|
||||
zArg = (const char *)sqlite3_value_text(argv[1]);
|
||||
}
|
||||
|
||||
pHash = (fts2Hash *)sqlite3_user_data(context);
|
||||
p = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zName, nName+1);
|
||||
|
||||
if( !p ){
|
||||
char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
|
||||
sqlite3_result_error(context, zErr, -1);
|
||||
sqlite3_free(zErr);
|
||||
return;
|
||||
}
|
||||
|
||||
pRet = Tcl_NewObj();
|
||||
Tcl_IncrRefCount(pRet);
|
||||
|
||||
if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){
|
||||
zErr = "error in xCreate()";
|
||||
goto finish;
|
||||
}
|
||||
pTokenizer->pModule = p;
|
||||
if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){
|
||||
zErr = "error in xOpen()";
|
||||
goto finish;
|
||||
}
|
||||
pCsr->pTokenizer = pTokenizer;
|
||||
|
||||
while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
|
||||
Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
|
||||
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
|
||||
zToken = &zInput[iStart];
|
||||
nToken = iEnd-iStart;
|
||||
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
|
||||
}
|
||||
|
||||
if( SQLITE_OK!=p->xClose(pCsr) ){
|
||||
zErr = "error in xClose()";
|
||||
goto finish;
|
||||
}
|
||||
if( SQLITE_OK!=p->xDestroy(pTokenizer) ){
|
||||
zErr = "error in xDestroy()";
|
||||
goto finish;
|
||||
}
|
||||
|
||||
finish:
|
||||
if( zErr ){
|
||||
sqlite3_result_error(context, zErr, -1);
|
||||
}else{
|
||||
sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT);
|
||||
}
|
||||
Tcl_DecrRefCount(pRet);
|
||||
}
|
||||
|
||||
static
|
||||
int registerTokenizer(
|
||||
sqlite3 *db,
|
||||
char *zName,
|
||||
const sqlite3_tokenizer_module *p
|
||||
){
|
||||
int rc;
|
||||
sqlite3_stmt *pStmt;
|
||||
const char zSql[] = "SELECT fts2_tokenizer(?, ?)";
|
||||
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
|
||||
sqlite3_step(pStmt);
|
||||
|
||||
return sqlite3_finalize(pStmt);
|
||||
}
|
||||
|
||||
static
|
||||
int queryFts2Tokenizer(
|
||||
sqlite3 *db,
|
||||
char *zName,
|
||||
const sqlite3_tokenizer_module **pp
|
||||
){
|
||||
int rc;
|
||||
sqlite3_stmt *pStmt;
|
||||
const char zSql[] = "SELECT fts2_tokenizer(?)";
|
||||
|
||||
*pp = 0;
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
|
||||
if( SQLITE_ROW==sqlite3_step(pStmt) ){
|
||||
if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
|
||||
memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
|
||||
}
|
||||
}
|
||||
|
||||
return sqlite3_finalize(pStmt);
|
||||
}
|
||||
|
||||
void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
|
||||
|
||||
/*
|
||||
** Implementation of the scalar function fts2_tokenizer_internal_test().
|
||||
** This function is used for testing only, it is not included in the
|
||||
** build unless SQLITE_TEST is defined.
|
||||
**
|
||||
** The purpose of this is to test that the fts2_tokenizer() function
|
||||
** can be used as designed by the C-code in the queryFts2Tokenizer and
|
||||
** registerTokenizer() functions above. These two functions are repeated
|
||||
** in the README.tokenizer file as an example, so it is important to
|
||||
** test them.
|
||||
**
|
||||
** To run the tests, evaluate the fts2_tokenizer_internal_test() scalar
|
||||
** function with no arguments. An assert() will fail if a problem is
|
||||
** detected. i.e.:
|
||||
**
|
||||
** SELECT fts2_tokenizer_internal_test();
|
||||
**
|
||||
*/
|
||||
static void intTestFunc(
|
||||
sqlite3_context *context,
|
||||
int argc,
|
||||
sqlite3_value **argv
|
||||
){
|
||||
int rc;
|
||||
const sqlite3_tokenizer_module *p1;
|
||||
const sqlite3_tokenizer_module *p2;
|
||||
sqlite3 *db = (sqlite3 *)sqlite3_user_data(context);
|
||||
|
||||
/* Test the query function */
|
||||
sqlite3Fts2SimpleTokenizerModule(&p1);
|
||||
rc = queryFts2Tokenizer(db, "simple", &p2);
|
||||
assert( rc==SQLITE_OK );
|
||||
assert( p1==p2 );
|
||||
rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2);
|
||||
assert( rc==SQLITE_ERROR );
|
||||
assert( p2==0 );
|
||||
assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") );
|
||||
|
||||
/* Test the storage function */
|
||||
rc = registerTokenizer(db, "nosuchtokenizer", p1);
|
||||
assert( rc==SQLITE_OK );
|
||||
rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2);
|
||||
assert( rc==SQLITE_OK );
|
||||
assert( p2==p1 );
|
||||
|
||||
sqlite3_result_text(context, "ok", -1, SQLITE_STATIC);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Set up SQL objects in database db used to access the contents of
|
||||
** the hash table pointed to by argument pHash. The hash table must
|
||||
** been initialized to use string keys, and to take a private copy
|
||||
** of the key when a value is inserted. i.e. by a call similar to:
|
||||
**
|
||||
** sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1);
|
||||
**
|
||||
** This function adds a scalar function (see header comment above
|
||||
** scalarFunc() in this file for details) and, if ENABLE_TABLE is
|
||||
** defined at compilation time, a temporary virtual table (see header
|
||||
** comment above struct HashTableVtab) to the database schema. Both
|
||||
** provide read/write access to the contents of *pHash.
|
||||
**
|
||||
** The third argument to this function, zName, is used as the name
|
||||
** of both the scalar and, if created, the virtual table.
|
||||
*/
|
||||
int sqlite3Fts2InitHashTable(
|
||||
sqlite3 *db,
|
||||
fts2Hash *pHash,
|
||||
const char *zName
|
||||
){
|
||||
int rc = SQLITE_OK;
|
||||
void *p = (void *)pHash;
|
||||
const int any = SQLITE_ANY;
|
||||
char *zTest = 0;
|
||||
char *zTest2 = 0;
|
||||
|
||||
#ifdef SQLITE_TEST
|
||||
void *pdb = (void *)db;
|
||||
zTest = sqlite3_mprintf("%s_test", zName);
|
||||
zTest2 = sqlite3_mprintf("%s_internal_test", zName);
|
||||
if( !zTest || !zTest2 ){
|
||||
rc = SQLITE_NOMEM;
|
||||
}
|
||||
#endif
|
||||
|
||||
if( rc!=SQLITE_OK
|
||||
|| (rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0))
|
||||
|| (rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0))
|
||||
#ifdef SQLITE_TEST
|
||||
|| (rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0))
|
||||
|| (rc = sqlite3_create_function(db, zTest, 3, any, p, testFunc, 0, 0))
|
||||
|| (rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0))
|
||||
#endif
|
||||
);
|
||||
|
||||
sqlite3_free(zTest);
|
||||
sqlite3_free(zTest2);
|
||||
return rc;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */
|
||||
145
ext/fts2/fts2_tokenizer.h
Normal file
145
ext/fts2/fts2_tokenizer.h
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
/*
|
||||
** 2006 July 10
|
||||
**
|
||||
** The author disclaims copyright to this source code.
|
||||
**
|
||||
*************************************************************************
|
||||
** Defines the interface to tokenizers used by fulltext-search. There
|
||||
** are three basic components:
|
||||
**
|
||||
** sqlite3_tokenizer_module is a singleton defining the tokenizer
|
||||
** interface functions. This is essentially the class structure for
|
||||
** tokenizers.
|
||||
**
|
||||
** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
|
||||
** including customization information defined at creation time.
|
||||
**
|
||||
** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
|
||||
** tokens from a particular input.
|
||||
*/
|
||||
#ifndef _FTS2_TOKENIZER_H_
|
||||
#define _FTS2_TOKENIZER_H_
|
||||
|
||||
/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
|
||||
** If tokenizers are to be allowed to call sqlite3_*() functions, then
|
||||
** we will need a way to register the API consistently.
|
||||
*/
|
||||
#include "sqlite3.h"
|
||||
|
||||
/*
|
||||
** Structures used by the tokenizer interface. When a new tokenizer
|
||||
** implementation is registered, the caller provides a pointer to
|
||||
** an sqlite3_tokenizer_module containing pointers to the callback
|
||||
** functions that make up an implementation.
|
||||
**
|
||||
** When an fts2 table is created, it passes any arguments passed to
|
||||
** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the
|
||||
** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer
|
||||
** implementation. The xCreate() function in turn returns an
|
||||
** sqlite3_tokenizer structure representing the specific tokenizer to
|
||||
** be used for the fts2 table (customized by the tokenizer clause arguments).
|
||||
**
|
||||
** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen()
|
||||
** method is called. It returns an sqlite3_tokenizer_cursor object
|
||||
** that may be used to tokenize a specific input buffer based on
|
||||
** the tokenization rules supplied by a specific sqlite3_tokenizer
|
||||
** object.
|
||||
*/
|
||||
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
|
||||
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
|
||||
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
|
||||
|
||||
struct sqlite3_tokenizer_module {
|
||||
|
||||
/*
|
||||
** Structure version. Should always be set to 0.
|
||||
*/
|
||||
int iVersion;
|
||||
|
||||
/*
|
||||
** Create a new tokenizer. The values in the argv[] array are the
|
||||
** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
|
||||
** TABLE statement that created the fts2 table. For example, if
|
||||
** the following SQL is executed:
|
||||
**
|
||||
** CREATE .. USING fts2( ... , tokenizer <tokenizer-name> arg1 arg2)
|
||||
**
|
||||
** then argc is set to 2, and the argv[] array contains pointers
|
||||
** to the strings "arg1" and "arg2".
|
||||
**
|
||||
** This method should return either SQLITE_OK (0), or an SQLite error
|
||||
** code. If SQLITE_OK is returned, then *ppTokenizer should be set
|
||||
** to point at the newly created tokenizer structure. The generic
|
||||
** sqlite3_tokenizer.pModule variable should not be initialized by
|
||||
** this callback. The caller will do so.
|
||||
*/
|
||||
int (*xCreate)(
|
||||
int argc, /* Size of argv array */
|
||||
const char *const*argv, /* Tokenizer argument strings */
|
||||
sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
|
||||
);
|
||||
|
||||
/*
|
||||
** Destroy an existing tokenizer. The fts2 module calls this method
|
||||
** exactly once for each successful call to xCreate().
|
||||
*/
|
||||
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
|
||||
|
||||
/*
|
||||
** Create a tokenizer cursor to tokenize an input buffer. The caller
|
||||
** is responsible for ensuring that the input buffer remains valid
|
||||
** until the cursor is closed (using the xClose() method).
|
||||
*/
|
||||
int (*xOpen)(
|
||||
sqlite3_tokenizer *pTokenizer, /* Tokenizer object */
|
||||
const char *pInput, int nBytes, /* Input buffer */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */
|
||||
);
|
||||
|
||||
/*
|
||||
** Destroy an existing tokenizer cursor. The fts2 module calls this
|
||||
** method exactly once for each successful call to xOpen().
|
||||
*/
|
||||
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
|
||||
|
||||
/*
|
||||
** Retrieve the next token from the tokenizer cursor pCursor. This
|
||||
** method should either return SQLITE_OK and set the values of the
|
||||
** "OUT" variables identified below, or SQLITE_DONE to indicate that
|
||||
** the end of the buffer has been reached, or an SQLite error code.
|
||||
**
|
||||
** *ppToken should be set to point at a buffer containing the
|
||||
** normalized version of the token (i.e. after any case-folding and/or
|
||||
** stemming has been performed). *pnBytes should be set to the length
|
||||
** of this buffer in bytes. The input text that generated the token is
|
||||
** identified by the byte offsets returned in *piStartOffset and
|
||||
** *piEndOffset.
|
||||
**
|
||||
** The buffer *ppToken is set to point at is managed by the tokenizer
|
||||
** implementation. It is only required to be valid until the next call
|
||||
** to xNext() or xClose().
|
||||
*/
|
||||
/* TODO(shess) current implementation requires pInput to be
|
||||
** nul-terminated. This should either be fixed, or pInput/nBytes
|
||||
** should be converted to zInput.
|
||||
*/
|
||||
int (*xNext)(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */
|
||||
const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */
|
||||
int *piStartOffset, /* OUT: Byte offset of token in input buffer */
|
||||
int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */
|
||||
int *piPosition /* OUT: Number of tokens returned before this one */
|
||||
);
|
||||
};
|
||||
|
||||
struct sqlite3_tokenizer {
|
||||
const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
|
||||
/* Tokenizer implementations will typically add additional fields */
|
||||
};
|
||||
|
||||
struct sqlite3_tokenizer_cursor {
|
||||
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
|
||||
/* Tokenizer implementations will typically add additional fields */
|
||||
};
|
||||
|
||||
#endif /* _FTS2_TOKENIZER_H_ */
|
||||
233
ext/fts2/fts2_tokenizer1.c
Normal file
233
ext/fts2/fts2_tokenizer1.c
Normal file
|
|
@ -0,0 +1,233 @@
|
|||
/*
|
||||
** 2006 Oct 10
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** Implementation of the "simple" full-text-search tokenizer.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS2 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS2 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
|
||||
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sqlite3.h"
|
||||
#include "sqlite3ext.h"
|
||||
SQLITE_EXTENSION_INIT3
|
||||
#include "fts2_tokenizer.h"
|
||||
|
||||
typedef struct simple_tokenizer {
|
||||
sqlite3_tokenizer base;
|
||||
char delim[128]; /* flag ASCII delimiters */
|
||||
} simple_tokenizer;
|
||||
|
||||
typedef struct simple_tokenizer_cursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
const char *pInput; /* input we are tokenizing */
|
||||
int nBytes; /* size of the input */
|
||||
int iOffset; /* current position in pInput */
|
||||
int iToken; /* index of next token to be returned */
|
||||
char *pToken; /* storage for current token */
|
||||
int nTokenAllocated; /* space allocated to zToken buffer */
|
||||
} simple_tokenizer_cursor;
|
||||
|
||||
|
||||
/* Forward declaration */
|
||||
static const sqlite3_tokenizer_module simpleTokenizerModule;
|
||||
|
||||
static int simpleDelim(simple_tokenizer *t, unsigned char c){
|
||||
return c<0x80 && t->delim[c];
|
||||
}
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
*/
|
||||
static int simpleCreate(
|
||||
int argc, const char * const *argv,
|
||||
sqlite3_tokenizer **ppTokenizer
|
||||
){
|
||||
simple_tokenizer *t;
|
||||
|
||||
t = (simple_tokenizer *) sqlite3_malloc(sizeof(*t));
|
||||
if( t==NULL ) return SQLITE_NOMEM;
|
||||
memset(t, 0, sizeof(*t));
|
||||
|
||||
/* TODO(shess) Delimiters need to remain the same from run to run,
|
||||
** else we need to reindex. One solution would be a meta-table to
|
||||
** track such information in the database, then we'd only want this
|
||||
** information on the initial create.
|
||||
*/
|
||||
if( argc>1 ){
|
||||
int i, n = strlen(argv[1]);
|
||||
for(i=0; i<n; i++){
|
||||
unsigned char ch = argv[1][i];
|
||||
/* We explicitly don't support UTF-8 delimiters for now. */
|
||||
if( ch>=0x80 ){
|
||||
sqlite3_free(t);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
t->delim[ch] = 1;
|
||||
}
|
||||
} else {
|
||||
/* Mark non-alphanumeric ASCII characters as delimiters */
|
||||
int i;
|
||||
for(i=1; i<0x80; i++){
|
||||
t->delim[i] = !((i>='0' && i<='9') || (i>='A' && i<='Z') ||
|
||||
(i>='a' && i<='z'));
|
||||
}
|
||||
}
|
||||
|
||||
*ppTokenizer = &t->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Destroy a tokenizer
|
||||
*/
|
||||
static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
sqlite3_free(pTokenizer);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Prepare to begin tokenizing a particular string. The input
|
||||
** string to be tokenized is pInput[0..nBytes-1]. A cursor
|
||||
** used to incrementally tokenize this string is returned in
|
||||
** *ppCursor.
|
||||
*/
|
||||
static int simpleOpen(
|
||||
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
|
||||
const char *pInput, int nBytes, /* String to be tokenized */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
|
||||
){
|
||||
simple_tokenizer_cursor *c;
|
||||
|
||||
c = (simple_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
|
||||
if( c==NULL ) return SQLITE_NOMEM;
|
||||
|
||||
c->pInput = pInput;
|
||||
if( pInput==0 ){
|
||||
c->nBytes = 0;
|
||||
}else if( nBytes<0 ){
|
||||
c->nBytes = (int)strlen(pInput);
|
||||
}else{
|
||||
c->nBytes = nBytes;
|
||||
}
|
||||
c->iOffset = 0; /* start tokenizing at the beginning */
|
||||
c->iToken = 0;
|
||||
c->pToken = NULL; /* no space allocated, yet. */
|
||||
c->nTokenAllocated = 0;
|
||||
|
||||
*ppCursor = &c->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close a tokenization cursor previously opened by a call to
|
||||
** simpleOpen() above.
|
||||
*/
|
||||
static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
|
||||
sqlite3_free(c->pToken);
|
||||
sqlite3_free(c);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Extract the next token from a tokenization cursor. The cursor must
|
||||
** have been opened by a prior call to simpleOpen().
|
||||
*/
|
||||
static int simpleNext(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
|
||||
const char **ppToken, /* OUT: *ppToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
|
||||
simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
|
||||
unsigned char *p = (unsigned char *)c->pInput;
|
||||
|
||||
while( c->iOffset<c->nBytes ){
|
||||
int iStartOffset;
|
||||
|
||||
/* Scan past delimiter characters */
|
||||
while( c->iOffset<c->nBytes && simpleDelim(t, p[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
/* Count non-delimiter characters. */
|
||||
iStartOffset = c->iOffset;
|
||||
while( c->iOffset<c->nBytes && !simpleDelim(t, p[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
if( c->iOffset>iStartOffset ){
|
||||
int i, n = c->iOffset-iStartOffset;
|
||||
if( n>c->nTokenAllocated ){
|
||||
c->nTokenAllocated = n+20;
|
||||
c->pToken = sqlite3_realloc(c->pToken, c->nTokenAllocated);
|
||||
if( c->pToken==NULL ) return SQLITE_NOMEM;
|
||||
}
|
||||
for(i=0; i<n; i++){
|
||||
/* TODO(shess) This needs expansion to handle UTF-8
|
||||
** case-insensitivity.
|
||||
*/
|
||||
unsigned char ch = p[iStartOffset+i];
|
||||
c->pToken[i] = (ch>='A' && ch<='Z') ? (ch - 'A' + 'a') : ch;
|
||||
}
|
||||
*ppToken = c->pToken;
|
||||
*pnBytes = n;
|
||||
*piStartOffset = iStartOffset;
|
||||
*piEndOffset = c->iOffset;
|
||||
*piPosition = c->iToken++;
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
}
|
||||
return SQLITE_DONE;
|
||||
}
|
||||
|
||||
/*
|
||||
** The set of routines that implement the simple tokenizer
|
||||
*/
|
||||
static const sqlite3_tokenizer_module simpleTokenizerModule = {
|
||||
0,
|
||||
simpleCreate,
|
||||
simpleDestroy,
|
||||
simpleOpen,
|
||||
simpleClose,
|
||||
simpleNext,
|
||||
};
|
||||
|
||||
/*
|
||||
** Allocate a new simple tokenizer. Return a pointer to the new
|
||||
** tokenizer in *ppModule
|
||||
*/
|
||||
void sqlite3Fts2SimpleTokenizerModule(
|
||||
sqlite3_tokenizer_module const**ppModule
|
||||
){
|
||||
*ppModule = &simpleTokenizerModule;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */
|
||||
116
ext/fts2/mkfts2amal.tcl
Normal file
116
ext/fts2/mkfts2amal.tcl
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
#!/usr/bin/tclsh
|
||||
#
|
||||
# This script builds a single C code file holding all of FTS2 code.
|
||||
# The name of the output file is fts2amal.c. To build this file,
|
||||
# first do:
|
||||
#
|
||||
# make target_source
|
||||
#
|
||||
# The make target above moves all of the source code files into
|
||||
# a subdirectory named "tsrc". (This script expects to find the files
|
||||
# there and will not work if they are not found.)
|
||||
#
|
||||
# After the "tsrc" directory has been created and populated, run
|
||||
# this script:
|
||||
#
|
||||
# tclsh mkfts2amal.tcl
|
||||
#
|
||||
# The amalgamated FTS2 code will be written into fts2amal.c
|
||||
#
|
||||
|
||||
# Open the output file and write a header comment at the beginning
|
||||
# of the file.
|
||||
#
|
||||
set out [open fts2amal.c w]
|
||||
set today [clock format [clock seconds] -format "%Y-%m-%d %H:%M:%S UTC" -gmt 1]
|
||||
puts $out [subst \
|
||||
{/******************************************************************************
|
||||
** This file is an amalgamation of separate C source files from the SQLite
|
||||
** Full Text Search extension 2 (fts2). By combining all the individual C
|
||||
** code files into this single large file, the entire code can be compiled
|
||||
** as a one translation unit. This allows many compilers to do optimizations
|
||||
** that would not be possible if the files were compiled separately. It also
|
||||
** makes the code easier to import into other projects.
|
||||
**
|
||||
** This amalgamation was generated on $today.
|
||||
*/}]
|
||||
|
||||
# These are the header files used by FTS2. The first time any of these
|
||||
# files are seen in a #include statement in the C code, include the complete
|
||||
# text of the file in-line. The file only needs to be included once.
|
||||
#
|
||||
foreach hdr {
|
||||
fts2.h
|
||||
fts2_hash.h
|
||||
fts2_tokenizer.h
|
||||
sqlite3.h
|
||||
sqlite3ext.h
|
||||
} {
|
||||
set available_hdr($hdr) 1
|
||||
}
|
||||
|
||||
# 78 stars used for comment formatting.
|
||||
set s78 \
|
||||
{*****************************************************************************}
|
||||
|
||||
# Insert a comment into the code
|
||||
#
|
||||
proc section_comment {text} {
|
||||
global out s78
|
||||
set n [string length $text]
|
||||
set nstar [expr {60 - $n}]
|
||||
set stars [string range $s78 0 $nstar]
|
||||
puts $out "/************** $text $stars/"
|
||||
}
|
||||
|
||||
# Read the source file named $filename and write it into the
|
||||
# sqlite3.c output file. If any #include statements are seen,
|
||||
# process them approprately.
|
||||
#
|
||||
proc copy_file {filename} {
|
||||
global seen_hdr available_hdr out
|
||||
set tail [file tail $filename]
|
||||
section_comment "Begin file $tail"
|
||||
set in [open $filename r]
|
||||
while {![eof $in]} {
|
||||
set line [gets $in]
|
||||
if {[regexp {^#\s*include\s+["<]([^">]+)[">]} $line all hdr]} {
|
||||
if {[info exists available_hdr($hdr)]} {
|
||||
if {$available_hdr($hdr)} {
|
||||
section_comment "Include $hdr in the middle of $tail"
|
||||
copy_file tsrc/$hdr
|
||||
section_comment "Continuing where we left off in $tail"
|
||||
}
|
||||
} elseif {![info exists seen_hdr($hdr)]} {
|
||||
set seen_hdr($hdr) 1
|
||||
puts $out $line
|
||||
}
|
||||
} elseif {[regexp {^#ifdef __cplusplus} $line]} {
|
||||
puts $out "#if 0"
|
||||
} elseif {[regexp {^#line} $line]} {
|
||||
# Skip #line directives.
|
||||
} else {
|
||||
puts $out $line
|
||||
}
|
||||
}
|
||||
close $in
|
||||
section_comment "End of $tail"
|
||||
}
|
||||
|
||||
|
||||
# Process the source files. Process files containing commonly
|
||||
# used subroutines first in order to help the compiler find
|
||||
# inlining opportunities.
|
||||
#
|
||||
foreach file {
|
||||
fts2.c
|
||||
fts2_hash.c
|
||||
fts2_porter.c
|
||||
fts2_tokenizer.c
|
||||
fts2_tokenizer1.c
|
||||
fts2_icu.c
|
||||
} {
|
||||
copy_file tsrc/$file
|
||||
}
|
||||
|
||||
close $out
|
||||
176
ext/fts3/README.content
Normal file
176
ext/fts3/README.content
Normal file
|
|
@ -0,0 +1,176 @@
|
|||
|
||||
FTS4 CONTENT OPTION
|
||||
|
||||
Normally, in order to create a full-text index on a dataset, the FTS4
|
||||
module stores a copy of all indexed documents in a specially created
|
||||
database table.
|
||||
|
||||
As of SQLite version 3.7.9, FTS4 supports a new option - "content" -
|
||||
designed to extend FTS4 to support the creation of full-text indexes where:
|
||||
|
||||
* The indexed documents are not stored within the SQLite database
|
||||
at all (a "contentless" FTS4 table), or
|
||||
|
||||
* The indexed documents are stored in a database table created and
|
||||
managed by the user (an "external content" FTS4 table).
|
||||
|
||||
Because the indexed documents themselves are usually much larger than
|
||||
the full-text index, the content option can sometimes be used to achieve
|
||||
significant space savings.
|
||||
|
||||
CONTENTLESS FTS4 TABLES
|
||||
|
||||
In order to create an FTS4 table that does not store a copy of the indexed
|
||||
documents at all, the content option should be set to an empty string.
|
||||
For example, the following SQL creates such an FTS4 table with three
|
||||
columns - "a", "b", and "c":
|
||||
|
||||
CREATE VIRTUAL TABLE t1 USING fts4(content="", a, b, c);
|
||||
|
||||
Data can be inserted into such an FTS4 table using an INSERT statements.
|
||||
However, unlike ordinary FTS4 tables, the user must supply an explicit
|
||||
integer docid value. For example:
|
||||
|
||||
-- This statement is Ok:
|
||||
INSERT INTO t1(docid, a, b, c) VALUES(1, 'a b c', 'd e f', 'g h i');
|
||||
|
||||
-- This statement causes an error, as no docid value has been provided:
|
||||
INSERT INTO t1(a, b, c) VALUES('j k l', 'm n o', 'p q r');
|
||||
|
||||
It is not possible to UPDATE or DELETE a row stored in a contentless FTS4
|
||||
table. Attempting to do so is an error.
|
||||
|
||||
Contentless FTS4 tables also support SELECT statements. However, it is
|
||||
an error to attempt to retrieve the value of any table column other than
|
||||
the docid column. The auxiliary function matchinfo() may be used, but
|
||||
snippet() and offsets() may not. For example:
|
||||
|
||||
-- The following statements are Ok:
|
||||
SELECT docid FROM t1 WHERE t1 MATCH 'xxx';
|
||||
SELECT docid FROM t1 WHERE a MATCH 'xxx';
|
||||
SELECT matchinfo(t1) FROM t1 WHERE t1 MATCH 'xxx';
|
||||
|
||||
-- The following statements all cause errors, as the value of columns
|
||||
-- other than docid are required to evaluate them.
|
||||
SELECT * FROM t1;
|
||||
SELECT a, b FROM t1 WHERE t1 MATCH 'xxx';
|
||||
SELECT docid FROM t1 WHERE a LIKE 'xxx%';
|
||||
SELECT snippet(t1) FROM t1 WHERE t1 MATCH 'xxx';
|
||||
|
||||
Errors related to attempting to retrieve column values other than docid
|
||||
are runtime errors that occur within sqlite3_step(). In some cases, for
|
||||
example if the MATCH expression in a SELECT query matches zero rows, there
|
||||
may be no error at all even if a statement does refer to column values
|
||||
other than docid.
|
||||
|
||||
EXTERNAL CONTENT FTS4 TABLES
|
||||
|
||||
An "external content" FTS4 table is similar to a contentless table, except
|
||||
that if evaluation of a query requires the value of a column other than
|
||||
docid, FTS4 attempts to retrieve that value from a table (or view, or
|
||||
virtual table) nominated by the user (hereafter referred to as the "content
|
||||
table"). The FTS4 module never writes to the content table, and writing
|
||||
to the content table does not affect the full-text index. It is the
|
||||
responsibility of the user to ensure that the content table and the
|
||||
full-text index are consistent.
|
||||
|
||||
An external content FTS4 table is created by setting the content option
|
||||
to the name of a table (or view, or virtual table) that may be queried by
|
||||
FTS4 to retrieve column values when required. If the nominated table does
|
||||
not exist, then an external content table behaves in the same way as
|
||||
a contentless table. For example:
|
||||
|
||||
CREATE TABLE t2(id INTEGER PRIMARY KEY, a, b, c);
|
||||
CREATE VIRTUAL TABLE t3 USING fts4(content="t2", a, c);
|
||||
|
||||
Assuming the nominated table does exist, then its columns must be the same
|
||||
as or a superset of those defined for the FTS table.
|
||||
|
||||
When a users query on the FTS table requires a column value other than
|
||||
docid, FTS attempts to read this value from the corresponding column of
|
||||
the row in the content table with a rowid value equal to the current FTS
|
||||
docid. Or, if such a row cannot be found in the content table, a NULL
|
||||
value is used instead. For example:
|
||||
|
||||
CREATE TABLE t2(id INTEGER PRIMARY KEY, a, b, c, d);
|
||||
CREATE VIRTUAL TABLE t3 USING fts4(content="t2", b, c);
|
||||
|
||||
INSERT INTO t2 VALUES(2, 'a b', 'c d', 'e f');
|
||||
INSERT INTO t2 VALUES(3, 'g h', 'i j', 'k l');
|
||||
INSERT INTO t3(docid, b, c) SELECT id, b, c FROM t2;
|
||||
|
||||
-- The following query returns a single row with two columns containing
|
||||
-- the text values "i j" and "k l".
|
||||
--
|
||||
-- The query uses the full-text index to discover that the MATCH
|
||||
-- term matches the row with docid=3. It then retrieves the values
|
||||
-- of columns b and c from the row with rowid=3 in the content table
|
||||
-- to return.
|
||||
--
|
||||
SELECT * FROM t3 WHERE t3 MATCH 'k';
|
||||
|
||||
-- Following the UPDATE, the query still returns a single row, this
|
||||
-- time containing the text values "xxx" and "yyy". This is because the
|
||||
-- full-text index still indicates that the row with docid=3 matches
|
||||
-- the FTS4 query 'k', even though the documents stored in the content
|
||||
-- table have been modified.
|
||||
--
|
||||
UPDATE t2 SET b = 'xxx', c = 'yyy' WHERE rowid = 3;
|
||||
SELECT * FROM t3 WHERE t3 MATCH 'k';
|
||||
|
||||
-- Following the DELETE below, the query returns one row containing two
|
||||
-- NULL values. NULL values are returned because FTS is unable to find
|
||||
-- a row with rowid=3 within the content table.
|
||||
--
|
||||
DELETE FROM t2;
|
||||
SELECT * FROM t3 WHERE t3 MATCH 'k';
|
||||
|
||||
When a row is deleted from an external content FTS4 table, FTS4 needs to
|
||||
retrieve the column values of the row being deleted from the content table.
|
||||
This is so that FTS4 can update the full-text index entries for each token
|
||||
that occurs within the deleted row to indicate that that row has been
|
||||
deleted. If the content table row cannot be found, or if it contains values
|
||||
inconsistent with the contents of the FTS index, the results can be difficult
|
||||
to predict. The FTS index may be left containing entries corresponding to the
|
||||
deleted row, which can lead to seemingly nonsensical results being returned
|
||||
by subsequent SELECT queries. The same applies when a row is updated, as
|
||||
internally an UPDATE is the same as a DELETE followed by an INSERT.
|
||||
|
||||
Instead of writing separately to the full-text index and the content table,
|
||||
some users may wish to use database triggers to keep the full-text index
|
||||
up to date with respect to the set of documents stored in the content table.
|
||||
For example, using the tables from earlier examples:
|
||||
|
||||
CREATE TRIGGER t2_bu BEFORE UPDATE ON t2 BEGIN
|
||||
DELETE FROM t3 WHERE docid=old.rowid;
|
||||
END;
|
||||
CREATE TRIGGER t2_bd BEFORE DELETE ON t2 BEGIN
|
||||
DELETE FROM t3 WHERE docid=old.rowid;
|
||||
END;
|
||||
|
||||
CREATE TRIGGER t2_bu AFTER UPDATE ON t2 BEGIN
|
||||
INSERT INTO t3(docid, b, c) VALUES(new.rowid, new.b, new.c);
|
||||
END;
|
||||
CREATE TRIGGER t2_bd AFTER INSERT ON t2 BEGIN
|
||||
INSERT INTO t3(docid, b, c) VALUES(new.rowid, new.b, new.c);
|
||||
END;
|
||||
|
||||
The DELETE trigger must be fired before the actual delete takes place
|
||||
on the content table. This is so that FTS4 can still retrieve the original
|
||||
values in order to update the full-text index. And the INSERT trigger must
|
||||
be fired after the new row is inserted, so as to handle the case where the
|
||||
rowid is assigned automatically within the system. The UPDATE trigger must
|
||||
be split into two parts, one fired before and one after the update of the
|
||||
content table, for the same reasons.
|
||||
|
||||
FTS4 features a special command similar to the 'optimize' command that
|
||||
deletes the entire full-text index and rebuilds it based on the current
|
||||
set of documents in the content table. Assuming again that "t3" is the
|
||||
name of the external content FTS4 table, the command is:
|
||||
|
||||
INSERT INTO t3(t3) VALUES('rebuild');
|
||||
|
||||
This command may also be used with ordinary FTS4 tables, although it may
|
||||
only be useful if the full-text index has somehow become corrupt. It is an
|
||||
error to attempt to rebuild the full-text index maintained by a contentless
|
||||
FTS4 table.
|
||||
209
ext/fts3/README.syntax
Normal file
209
ext/fts3/README.syntax
Normal file
|
|
@ -0,0 +1,209 @@
|
|||
|
||||
1. OVERVIEW
|
||||
|
||||
This README file describes the syntax of the arguments that may be passed to
|
||||
the FTS3 MATCH operator used for full-text queries. For example, if table
|
||||
"t1" is an Fts3 virtual table, the following SQL query:
|
||||
|
||||
SELECT * FROM t1 WHERE <col> MATCH <full-text query>
|
||||
|
||||
may be used to retrieve all rows that match a specified for full-text query.
|
||||
The text "<col>" should be replaced by either the name of the fts3 table
|
||||
(in this case "t1"), or by the name of one of the columns of the fts3
|
||||
table. <full-text-query> should be replaced by an SQL expression that
|
||||
computes to a string containing an Fts3 query.
|
||||
|
||||
If the left-hand-side of the MATCH operator is set to the name of the
|
||||
fts3 table, then by default the query may be matched against any column
|
||||
of the table. If it is set to a column name, then by default the query
|
||||
may only match the specified column. In both cases this may be overriden
|
||||
as part of the query text (see sections 2 and 3 below).
|
||||
|
||||
As of SQLite version 3.6.8, Fts3 supports two slightly different query
|
||||
formats; the standard syntax, which is used by default, and the enhanced
|
||||
query syntax which can be selected by compiling with the pre-processor
|
||||
symbol SQLITE_ENABLE_FTS3_PARENTHESIS defined.
|
||||
|
||||
-DSQLITE_ENABLE_FTS3_PARENTHESIS
|
||||
|
||||
2. STANDARD QUERY SYNTAX
|
||||
|
||||
When using the standard Fts3 query syntax, a query usually consists of a
|
||||
list of terms (words) separated by white-space characters. To match a
|
||||
query, a row (or column) of an Fts3 table must contain each of the specified
|
||||
terms. For example, the following query:
|
||||
|
||||
<col> MATCH 'hello world'
|
||||
|
||||
matches rows (or columns, if <col> is the name of a column name) that
|
||||
contain at least one instance of the token "hello", and at least one
|
||||
instance of the token "world". Tokens may be grouped into phrases using
|
||||
quotation marks. In this case, a matching row or column must contain each
|
||||
of the tokens in the phrase in the order specified, with no intervening
|
||||
tokens. For example, the query:
|
||||
|
||||
<col> MATCH '"hello world" joe"
|
||||
|
||||
matches the first of the following two documents, but not the second or
|
||||
third:
|
||||
|
||||
"'Hello world', said Joe."
|
||||
"One should always greet the world with a cheery hello, thought Joe."
|
||||
"How many hello world programs could their be?"
|
||||
|
||||
As well as grouping tokens together by phrase, the binary NEAR operator
|
||||
may be used to search for rows that contain two or more specified tokens
|
||||
or phrases within a specified proximity of each other. The NEAR operator
|
||||
must always be specified in upper case. The word "near" in lower or mixed
|
||||
case is treated as an ordinary token. For example, the following query:
|
||||
|
||||
<col> MATCH 'engineering NEAR consultancy'
|
||||
|
||||
matches rows that contain both the "engineering" and "consultancy" tokens
|
||||
in the same column with not more than 10 other words between them. It does
|
||||
not matter which of the two terms occurs first in the document, only that
|
||||
they be seperated by only 10 tokens or less. The user may also specify
|
||||
a different required proximity by adding "/N" immediately after the NEAR
|
||||
operator, where N is an integer. For example:
|
||||
|
||||
<col> MATCH 'engineering NEAR/5 consultancy'
|
||||
|
||||
searches for a row containing an instance of each specified token seperated
|
||||
by not more than 5 other tokens. More than one NEAR operator can be used
|
||||
in as sequence. For example this query:
|
||||
|
||||
<col> MATCH 'reliable NEAR/2 engineering NEAR/5 consultancy'
|
||||
|
||||
searches for a row that contains an instance of the token "reliable"
|
||||
seperated by not more than two tokens from an instance of "engineering",
|
||||
which is in turn separated by not more than 5 other tokens from an
|
||||
instance of the term "consultancy". Phrases enclosed in quotes may
|
||||
also be used as arguments to the NEAR operator.
|
||||
|
||||
Similar to the NEAR operator, one or more tokens or phrases may be
|
||||
separated by OR operators. In this case, only one of the specified tokens
|
||||
or phrases must appear in the document. For example, the query:
|
||||
|
||||
<col> MATCH 'hello OR world'
|
||||
|
||||
matches rows that contain either the term "hello", or the term "world",
|
||||
or both. Note that unlike in many programming languages, the OR operator
|
||||
has a higher precedence than the AND operators implied between white-space
|
||||
separated tokens. The following query matches documents that contain the
|
||||
term 'sqlite' and at least one of the terms 'fantastic' or 'impressive',
|
||||
not those that contain both 'sqlite' and 'fantastic' or 'impressive':
|
||||
|
||||
<col> MATCH 'sqlite fantastic OR impressive'
|
||||
|
||||
Any token that is part of an Fts3 query expression, whether or not it is
|
||||
part of a phrase enclosed in quotes, may have a '*' character appended to
|
||||
it. In this case, the token matches all terms that begin with the characters
|
||||
of the token, not just those that exactly match it. For example, the
|
||||
following query:
|
||||
|
||||
<col> MATCH 'sql*'
|
||||
|
||||
matches all rows that contain the term "SQLite", as well as those that
|
||||
contain "SQL".
|
||||
|
||||
A token that is not part of a quoted phrase may be preceded by a '-'
|
||||
character, which indicates that matching rows must not contain the
|
||||
specified term. For example, the following:
|
||||
|
||||
<col> MATCH '"database engine" -sqlite'
|
||||
|
||||
matches rows that contain the phrase "database engine" but do not contain
|
||||
the term "sqlite". If the '-' character occurs inside a quoted phrase,
|
||||
it is ignored. It is possible to use both the '-' prefix and the '*' postfix
|
||||
on a single term. At this time, all Fts3 queries must contain at least
|
||||
one term or phrase that is not preceded by the '-' prefix.
|
||||
|
||||
Regardless of whether or not a table name or column name is used on the
|
||||
left hand side of the MATCH operator, a specific column of the fts3 table
|
||||
may be associated with each token in a query by preceding a token with
|
||||
a column name followed by a ':' character. For example, regardless of what
|
||||
is specified for <col>, the following query requires that column "col1"
|
||||
of the table contains the term "hello", and that column "col2" of the
|
||||
table contains the term "world". If the table does not contain columns
|
||||
named "col1" and "col2", then an error is returned and the query is
|
||||
not run.
|
||||
|
||||
<col> MATCH 'col1:hello col2:world'
|
||||
|
||||
It is not possible to associate a specific table column with a quoted
|
||||
phrase or a term preceded by a '-' operator. A '*' character may be
|
||||
appended to a term associated with a specific column for prefix matching.
|
||||
|
||||
3. ENHANCED QUERY SYNTAX
|
||||
|
||||
The enhanced query syntax is quite similar to the standard query syntax,
|
||||
with the following four differences:
|
||||
|
||||
1) Parenthesis are supported. When using the enhanced query syntax,
|
||||
parenthesis may be used to overcome the built-in precedence of the
|
||||
supplied binary operators. For example, the following query:
|
||||
|
||||
<col> MATCH '(hello world) OR (simple example)'
|
||||
|
||||
matches documents that contain both "hello" and "world", and documents
|
||||
that contain both "simple" and "example". It is not possible to forumlate
|
||||
such a query using the standard syntax.
|
||||
|
||||
2) Instead of separating tokens and phrases by whitespace, an AND operator
|
||||
may be explicitly specified. This does not change query processing at
|
||||
all, but may be used to improve readability. For example, the following
|
||||
query is handled identically to the one above:
|
||||
|
||||
<col> MATCH '(hello AND world) OR (simple AND example)'
|
||||
|
||||
As with the OR and NEAR operators, the AND operator must be specified
|
||||
in upper case. The word "and" specified in lower or mixed case is
|
||||
handled as a regular token.
|
||||
|
||||
3) The '-' token prefix is not supported. Instead, a new binary operator,
|
||||
NOT, is included. The NOT operator requires that the query specified
|
||||
as its left-hand operator matches, but that the query specified as the
|
||||
right-hand operator does not. For example, to query for all rows that
|
||||
contain the term "example" but not the term "simple", the following
|
||||
query could be used:
|
||||
|
||||
<col> MATCH 'example NOT simple'
|
||||
|
||||
As for all other operators, the NOT operator must be specified in
|
||||
upper case. Otherwise it will be treated as a regular token.
|
||||
|
||||
4) Unlike in the standard syntax, where the OR operator has a higher
|
||||
precedence than the implicit AND operator, when using the enhanced
|
||||
syntax implicit and explict AND operators have a higher precedence
|
||||
than OR operators. Using the enhanced syntax, the following two
|
||||
queries are equivalent:
|
||||
|
||||
<col> MATCH 'sqlite fantastic OR impressive'
|
||||
<col> MATCH '(sqlite AND fantastic) OR impressive'
|
||||
|
||||
however, when using the standard syntax, the query:
|
||||
|
||||
<col> MATCH 'sqlite fantastic OR impressive'
|
||||
|
||||
is equivalent to the enhanced syntax query:
|
||||
|
||||
<col> MATCH 'sqlite AND (fantastic OR impressive)'
|
||||
|
||||
The precedence of all enhanced syntax operators, in order from highest
|
||||
to lowest, is:
|
||||
|
||||
NEAR (highest precedence, tightest grouping)
|
||||
NOT
|
||||
AND
|
||||
OR (lowest precedence, loosest grouping)
|
||||
|
||||
Using the advanced syntax, it is possible to specify expressions enclosed
|
||||
in parenthesis as operands to the NOT, AND and OR operators. However both
|
||||
the left and right hand side operands of NEAR operators must be either
|
||||
tokens or phrases. Attempting the following query will return an error:
|
||||
|
||||
<col> MATCH 'sqlite NEAR (fantastic OR impressive)'
|
||||
|
||||
Queries of this form must be re-written as:
|
||||
|
||||
<col> MATCH 'sqlite NEAR fantastic OR sqlite NEAR impressive'
|
||||
135
ext/fts3/README.tokenizers
Normal file
135
ext/fts3/README.tokenizers
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
|
||||
1. FTS3 Tokenizers
|
||||
|
||||
When creating a new full-text table, FTS3 allows the user to select
|
||||
the text tokenizer implementation to be used when indexing text
|
||||
by specifying a "tokenize" clause as part of the CREATE VIRTUAL TABLE
|
||||
statement:
|
||||
|
||||
CREATE VIRTUAL TABLE <table-name> USING fts3(
|
||||
<columns ...> [, tokenize <tokenizer-name> [<tokenizer-args>]]
|
||||
);
|
||||
|
||||
The built-in tokenizers (valid values to pass as <tokenizer name>) are
|
||||
"simple", "porter" and "unicode".
|
||||
|
||||
<tokenizer-args> should consist of zero or more white-space separated
|
||||
arguments to pass to the selected tokenizer implementation. The
|
||||
interpretation of the arguments, if any, depends on the individual
|
||||
tokenizer.
|
||||
|
||||
2. Custom Tokenizers
|
||||
|
||||
FTS3 allows users to provide custom tokenizer implementations. The
|
||||
interface used to create a new tokenizer is defined and described in
|
||||
the fts3_tokenizer.h source file.
|
||||
|
||||
Registering a new FTS3 tokenizer is similar to registering a new
|
||||
virtual table module with SQLite. The user passes a pointer to a
|
||||
structure containing pointers to various callback functions that
|
||||
make up the implementation of the new tokenizer type. For tokenizers,
|
||||
the structure (defined in fts3_tokenizer.h) is called
|
||||
"sqlite3_tokenizer_module".
|
||||
|
||||
FTS3 does not expose a C-function that users call to register new
|
||||
tokenizer types with a database handle. Instead, the pointer must
|
||||
be encoded as an SQL blob value and passed to FTS3 through the SQL
|
||||
engine by evaluating a special scalar function, "fts3_tokenizer()".
|
||||
The fts3_tokenizer() function may be called with one or two arguments,
|
||||
as follows:
|
||||
|
||||
SELECT fts3_tokenizer(<tokenizer-name>);
|
||||
SELECT fts3_tokenizer(<tokenizer-name>, <sqlite3_tokenizer_module ptr>);
|
||||
|
||||
Where <tokenizer-name> is a string identifying the tokenizer and
|
||||
<sqlite3_tokenizer_module ptr> is a pointer to an sqlite3_tokenizer_module
|
||||
structure encoded as an SQL blob. If the second argument is present,
|
||||
it is registered as tokenizer <tokenizer-name> and a copy of it
|
||||
returned. If only one argument is passed, a pointer to the tokenizer
|
||||
implementation currently registered as <tokenizer-name> is returned,
|
||||
encoded as a blob. Or, if no such tokenizer exists, an SQL exception
|
||||
(error) is raised.
|
||||
|
||||
SECURITY: If the fts3 extension is used in an environment where potentially
|
||||
malicious users may execute arbitrary SQL (i.e. gears), they should be
|
||||
prevented from invoking the fts3_tokenizer() function. The
|
||||
fts3_tokenizer() function is disabled by default. It is only enabled
|
||||
by SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER. Do not enable it in
|
||||
security sensitive environments.
|
||||
|
||||
See "Sample code" below for an example of calling the fts3_tokenizer()
|
||||
function from C code.
|
||||
|
||||
3. ICU Library Tokenizers
|
||||
|
||||
If this extension is compiled with the SQLITE_ENABLE_ICU pre-processor
|
||||
symbol defined, then there exists a built-in tokenizer named "icu"
|
||||
implemented using the ICU library. The first argument passed to the
|
||||
xCreate() method (see fts3_tokenizer.h) of this tokenizer may be
|
||||
an ICU locale identifier. For example "tr_TR" for Turkish as used
|
||||
in Turkey, or "en_AU" for English as used in Australia. For example:
|
||||
|
||||
"CREATE VIRTUAL TABLE thai_text USING fts3(text, tokenizer icu th_TH)"
|
||||
|
||||
The ICU tokenizer implementation is very simple. It splits the input
|
||||
text according to the ICU rules for finding word boundaries and discards
|
||||
any tokens that consist entirely of white-space. This may be suitable
|
||||
for some applications in some locales, but not all. If more complex
|
||||
processing is required, for example to implement stemming or
|
||||
discard punctuation, this can be done by creating a tokenizer
|
||||
implementation that uses the ICU tokenizer as part of its implementation.
|
||||
|
||||
When using the ICU tokenizer this way, it is safe to overwrite the
|
||||
contents of the strings returned by the xNext() method (see
|
||||
fts3_tokenizer.h).
|
||||
|
||||
4. Sample code.
|
||||
|
||||
The following two code samples illustrate the way C code should invoke
|
||||
the fts3_tokenizer() scalar function:
|
||||
|
||||
int registerTokenizer(
|
||||
sqlite3 *db,
|
||||
char *zName,
|
||||
const sqlite3_tokenizer_module *p
|
||||
){
|
||||
int rc;
|
||||
sqlite3_stmt *pStmt;
|
||||
const char zSql[] = "SELECT fts3_tokenizer(?, ?)";
|
||||
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
|
||||
sqlite3_step(pStmt);
|
||||
|
||||
return sqlite3_finalize(pStmt);
|
||||
}
|
||||
|
||||
int queryTokenizer(
|
||||
sqlite3 *db,
|
||||
char *zName,
|
||||
const sqlite3_tokenizer_module **pp
|
||||
){
|
||||
int rc;
|
||||
sqlite3_stmt *pStmt;
|
||||
const char zSql[] = "SELECT fts3_tokenizer(?)";
|
||||
|
||||
*pp = 0;
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
|
||||
if( SQLITE_ROW==sqlite3_step(pStmt) ){
|
||||
if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
|
||||
memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
|
||||
}
|
||||
}
|
||||
|
||||
return sqlite3_finalize(pStmt);
|
||||
}
|
||||
4
ext/fts3/README.txt
Normal file
4
ext/fts3/README.txt
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
This folder contains source code to the second full-text search
|
||||
extension for SQLite. While the API is the same, this version uses a
|
||||
substantially different storage schema from fts1, so tables will need
|
||||
to be rebuilt.
|
||||
6101
ext/fts3/fts3.c
Normal file
6101
ext/fts3/fts3.c
Normal file
File diff suppressed because it is too large
Load diff
26
ext/fts3/fts3.h
Normal file
26
ext/fts3/fts3.h
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
/*
|
||||
** 2006 Oct 10
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** This header file is used by programs that want to link against the
|
||||
** FTS3 library. All it does is declare the sqlite3Fts3Init() interface.
|
||||
*/
|
||||
#include "sqlite3.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
int sqlite3Fts3Init(sqlite3 *db);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif /* __cplusplus */
|
||||
654
ext/fts3/fts3Int.h
Normal file
654
ext/fts3/fts3Int.h
Normal file
|
|
@ -0,0 +1,654 @@
|
|||
/*
|
||||
** 2009 Nov 12
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
*/
|
||||
#ifndef _FTSINT_H
|
||||
#define _FTSINT_H
|
||||
|
||||
#if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
|
||||
# define NDEBUG 1
|
||||
#endif
|
||||
|
||||
/* FTS3/FTS4 require virtual tables */
|
||||
#ifdef SQLITE_OMIT_VIRTUALTABLE
|
||||
# undef SQLITE_ENABLE_FTS3
|
||||
# undef SQLITE_ENABLE_FTS4
|
||||
#endif
|
||||
|
||||
/*
|
||||
** FTS4 is really an extension for FTS3. It is enabled using the
|
||||
** SQLITE_ENABLE_FTS3 macro. But to avoid confusion we also all
|
||||
** the SQLITE_ENABLE_FTS4 macro to serve as an alisse for SQLITE_ENABLE_FTS3.
|
||||
*/
|
||||
#if defined(SQLITE_ENABLE_FTS4) && !defined(SQLITE_ENABLE_FTS3)
|
||||
# define SQLITE_ENABLE_FTS3
|
||||
#endif
|
||||
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
/* If not building as part of the core, include sqlite3ext.h. */
|
||||
#ifndef SQLITE_CORE
|
||||
# include "sqlite3ext.h"
|
||||
SQLITE_EXTENSION_INIT3
|
||||
#endif
|
||||
|
||||
#include "sqlite3.h"
|
||||
#include "fts3_tokenizer.h"
|
||||
#include "fts3_hash.h"
|
||||
|
||||
/*
|
||||
** This constant determines the maximum depth of an FTS expression tree
|
||||
** that the library will create and use. FTS uses recursion to perform
|
||||
** various operations on the query tree, so the disadvantage of a large
|
||||
** limit is that it may allow very large queries to use large amounts
|
||||
** of stack space (perhaps causing a stack overflow).
|
||||
*/
|
||||
#ifndef SQLITE_FTS3_MAX_EXPR_DEPTH
|
||||
# define SQLITE_FTS3_MAX_EXPR_DEPTH 12
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
** This constant controls how often segments are merged. Once there are
|
||||
** FTS3_MERGE_COUNT segments of level N, they are merged into a single
|
||||
** segment of level N+1.
|
||||
*/
|
||||
#define FTS3_MERGE_COUNT 16
|
||||
|
||||
/*
|
||||
** This is the maximum amount of data (in bytes) to store in the
|
||||
** Fts3Table.pendingTerms hash table. Normally, the hash table is
|
||||
** populated as documents are inserted/updated/deleted in a transaction
|
||||
** and used to create a new segment when the transaction is committed.
|
||||
** However if this limit is reached midway through a transaction, a new
|
||||
** segment is created and the hash table cleared immediately.
|
||||
*/
|
||||
#define FTS3_MAX_PENDING_DATA (1*1024*1024)
|
||||
|
||||
/*
|
||||
** Macro to return the number of elements in an array. SQLite has a
|
||||
** similar macro called ArraySize(). Use a different name to avoid
|
||||
** a collision when building an amalgamation with built-in FTS3.
|
||||
*/
|
||||
#define SizeofArray(X) ((int)(sizeof(X)/sizeof(X[0])))
|
||||
|
||||
|
||||
#ifndef MIN
|
||||
# define MIN(x,y) ((x)<(y)?(x):(y))
|
||||
#endif
|
||||
#ifndef MAX
|
||||
# define MAX(x,y) ((x)>(y)?(x):(y))
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Maximum length of a varint encoded integer. The varint format is different
|
||||
** from that used by SQLite, so the maximum length is 10, not 9.
|
||||
*/
|
||||
#define FTS3_VARINT_MAX 10
|
||||
|
||||
#define FTS3_BUFFER_PADDING 8
|
||||
|
||||
/*
|
||||
** FTS4 virtual tables may maintain multiple indexes - one index of all terms
|
||||
** in the document set and zero or more prefix indexes. All indexes are stored
|
||||
** as one or more b+-trees in the %_segments and %_segdir tables.
|
||||
**
|
||||
** It is possible to determine which index a b+-tree belongs to based on the
|
||||
** value stored in the "%_segdir.level" column. Given this value L, the index
|
||||
** that the b+-tree belongs to is (L<<10). In other words, all b+-trees with
|
||||
** level values between 0 and 1023 (inclusive) belong to index 0, all levels
|
||||
** between 1024 and 2047 to index 1, and so on.
|
||||
**
|
||||
** It is considered impossible for an index to use more than 1024 levels. In
|
||||
** theory though this may happen, but only after at least
|
||||
** (FTS3_MERGE_COUNT^1024) separate flushes of the pending-terms tables.
|
||||
*/
|
||||
#define FTS3_SEGDIR_MAXLEVEL 1024
|
||||
#define FTS3_SEGDIR_MAXLEVEL_STR "1024"
|
||||
|
||||
/*
|
||||
** The testcase() macro is only used by the amalgamation. If undefined,
|
||||
** make it a no-op.
|
||||
*/
|
||||
#ifndef testcase
|
||||
# define testcase(X)
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Terminator values for position-lists and column-lists.
|
||||
*/
|
||||
#define POS_COLUMN (1) /* Column-list terminator */
|
||||
#define POS_END (0) /* Position-list terminator */
|
||||
|
||||
/*
|
||||
** The assert_fts3_nc() macro is similar to the assert() macro, except that it
|
||||
** is used for assert() conditions that are true only if it can be
|
||||
** guranteed that the database is not corrupt.
|
||||
*/
|
||||
#ifdef SQLITE_DEBUG
|
||||
extern int sqlite3_fts3_may_be_corrupt;
|
||||
# define assert_fts3_nc(x) assert(sqlite3_fts3_may_be_corrupt || (x))
|
||||
#else
|
||||
# define assert_fts3_nc(x) assert(x)
|
||||
#endif
|
||||
|
||||
/*
|
||||
** This section provides definitions to allow the
|
||||
** FTS3 extension to be compiled outside of the
|
||||
** amalgamation.
|
||||
*/
|
||||
#ifndef SQLITE_AMALGAMATION
|
||||
/*
|
||||
** Macros indicating that conditional expressions are always true or
|
||||
** false.
|
||||
*/
|
||||
#if defined(SQLITE_COVERAGE_TEST) || defined(SQLITE_MUTATION_TEST)
|
||||
# define SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS 1
|
||||
#endif
|
||||
#if defined(SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS)
|
||||
# define ALWAYS(X) (1)
|
||||
# define NEVER(X) (0)
|
||||
#elif !defined(NDEBUG)
|
||||
# define ALWAYS(X) ((X)?1:(assert(0),0))
|
||||
# define NEVER(X) ((X)?(assert(0),1):0)
|
||||
#else
|
||||
# define ALWAYS(X) (X)
|
||||
# define NEVER(X) (X)
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Internal types used by SQLite.
|
||||
*/
|
||||
typedef unsigned char u8; /* 1-byte (or larger) unsigned integer */
|
||||
typedef short int i16; /* 2-byte (or larger) signed integer */
|
||||
typedef unsigned int u32; /* 4-byte unsigned integer */
|
||||
typedef sqlite3_uint64 u64; /* 8-byte unsigned integer */
|
||||
typedef sqlite3_int64 i64; /* 8-byte signed integer */
|
||||
|
||||
/*
|
||||
** Macro used to suppress compiler warnings for unused parameters.
|
||||
*/
|
||||
#define UNUSED_PARAMETER(x) (void)(x)
|
||||
|
||||
/*
|
||||
** Activate assert() only if SQLITE_TEST is enabled.
|
||||
*/
|
||||
#if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
|
||||
# define NDEBUG 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
** The TESTONLY macro is used to enclose variable declarations or
|
||||
** other bits of code that are needed to support the arguments
|
||||
** within testcase() and assert() macros.
|
||||
*/
|
||||
#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
|
||||
# define TESTONLY(X) X
|
||||
#else
|
||||
# define TESTONLY(X)
|
||||
#endif
|
||||
|
||||
#define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32))
|
||||
#define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64)
|
||||
|
||||
#define deliberate_fall_through
|
||||
|
||||
#endif /* SQLITE_AMALGAMATION */
|
||||
|
||||
#ifdef SQLITE_DEBUG
|
||||
int sqlite3Fts3Corrupt(void);
|
||||
# define FTS_CORRUPT_VTAB sqlite3Fts3Corrupt()
|
||||
#else
|
||||
# define FTS_CORRUPT_VTAB SQLITE_CORRUPT_VTAB
|
||||
#endif
|
||||
|
||||
typedef struct Fts3Table Fts3Table;
|
||||
typedef struct Fts3Cursor Fts3Cursor;
|
||||
typedef struct Fts3Expr Fts3Expr;
|
||||
typedef struct Fts3Phrase Fts3Phrase;
|
||||
typedef struct Fts3PhraseToken Fts3PhraseToken;
|
||||
|
||||
typedef struct Fts3Doclist Fts3Doclist;
|
||||
typedef struct Fts3SegFilter Fts3SegFilter;
|
||||
typedef struct Fts3DeferredToken Fts3DeferredToken;
|
||||
typedef struct Fts3SegReader Fts3SegReader;
|
||||
typedef struct Fts3MultiSegReader Fts3MultiSegReader;
|
||||
|
||||
typedef struct MatchinfoBuffer MatchinfoBuffer;
|
||||
|
||||
/*
|
||||
** A connection to a fulltext index is an instance of the following
|
||||
** structure. The xCreate and xConnect methods create an instance
|
||||
** of this structure and xDestroy and xDisconnect free that instance.
|
||||
** All other methods receive a pointer to the structure as one of their
|
||||
** arguments.
|
||||
*/
|
||||
struct Fts3Table {
|
||||
sqlite3_vtab base; /* Base class used by SQLite core */
|
||||
sqlite3 *db; /* The database connection */
|
||||
const char *zDb; /* logical database name */
|
||||
const char *zName; /* virtual table name */
|
||||
int nColumn; /* number of named columns in virtual table */
|
||||
char **azColumn; /* column names. malloced */
|
||||
u8 *abNotindexed; /* True for 'notindexed' columns */
|
||||
sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */
|
||||
char *zContentTbl; /* content=xxx option, or NULL */
|
||||
char *zLanguageid; /* languageid=xxx option, or NULL */
|
||||
int nAutoincrmerge; /* Value configured by 'automerge' */
|
||||
u32 nLeafAdd; /* Number of leaf blocks added this trans */
|
||||
int bLock; /* Used to prevent recursive content= tbls */
|
||||
|
||||
/* Precompiled statements used by the implementation. Each of these
|
||||
** statements is run and reset within a single virtual table API call.
|
||||
*/
|
||||
sqlite3_stmt *aStmt[40];
|
||||
sqlite3_stmt *pSeekStmt; /* Cache for fts3CursorSeekStmt() */
|
||||
|
||||
char *zReadExprlist;
|
||||
char *zWriteExprlist;
|
||||
|
||||
int nNodeSize; /* Soft limit for node size */
|
||||
u8 bFts4; /* True for FTS4, false for FTS3 */
|
||||
u8 bHasStat; /* True if %_stat table exists (2==unknown) */
|
||||
u8 bHasDocsize; /* True if %_docsize table exists */
|
||||
u8 bDescIdx; /* True if doclists are in reverse order */
|
||||
u8 bIgnoreSavepoint; /* True to ignore xSavepoint invocations */
|
||||
int nPgsz; /* Page size for host database */
|
||||
char *zSegmentsTbl; /* Name of %_segments table */
|
||||
sqlite3_blob *pSegments; /* Blob handle open on %_segments table */
|
||||
|
||||
/*
|
||||
** The following array of hash tables is used to buffer pending index
|
||||
** updates during transactions. All pending updates buffered at any one
|
||||
** time must share a common language-id (see the FTS4 langid= feature).
|
||||
** The current language id is stored in variable iPrevLangid.
|
||||
**
|
||||
** A single FTS4 table may have multiple full-text indexes. For each index
|
||||
** there is an entry in the aIndex[] array. Index 0 is an index of all the
|
||||
** terms that appear in the document set. Each subsequent index in aIndex[]
|
||||
** is an index of prefixes of a specific length.
|
||||
**
|
||||
** Variable nPendingData contains an estimate the memory consumed by the
|
||||
** pending data structures, including hash table overhead, but not including
|
||||
** malloc overhead. When nPendingData exceeds nMaxPendingData, all hash
|
||||
** tables are flushed to disk. Variable iPrevDocid is the docid of the most
|
||||
** recently inserted record.
|
||||
*/
|
||||
int nIndex; /* Size of aIndex[] */
|
||||
struct Fts3Index {
|
||||
int nPrefix; /* Prefix length (0 for main terms index) */
|
||||
Fts3Hash hPending; /* Pending terms table for this index */
|
||||
} *aIndex;
|
||||
int nMaxPendingData; /* Max pending data before flush to disk */
|
||||
int nPendingData; /* Current bytes of pending data */
|
||||
sqlite_int64 iPrevDocid; /* Docid of most recently inserted document */
|
||||
int iPrevLangid; /* Langid of recently inserted document */
|
||||
int bPrevDelete; /* True if last operation was a delete */
|
||||
|
||||
#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
|
||||
/* State variables used for validating that the transaction control
|
||||
** methods of the virtual table are called at appropriate times. These
|
||||
** values do not contribute to FTS functionality; they are used for
|
||||
** verifying the operation of the SQLite core.
|
||||
*/
|
||||
int inTransaction; /* True after xBegin but before xCommit/xRollback */
|
||||
int mxSavepoint; /* Largest valid xSavepoint integer */
|
||||
#endif
|
||||
|
||||
#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST)
|
||||
/* True to disable the incremental doclist optimization. This is controled
|
||||
** by special insert command 'test-no-incr-doclist'. */
|
||||
int bNoIncrDoclist;
|
||||
|
||||
/* Number of segments in a level */
|
||||
int nMergeCount;
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Macro to find the number of segments to merge */
|
||||
#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST)
|
||||
# define MergeCount(P) ((P)->nMergeCount)
|
||||
#else
|
||||
# define MergeCount(P) FTS3_MERGE_COUNT
|
||||
#endif
|
||||
|
||||
/*
|
||||
** When the core wants to read from the virtual table, it creates a
|
||||
** virtual table cursor (an instance of the following structure) using
|
||||
** the xOpen method. Cursors are destroyed using the xClose method.
|
||||
*/
|
||||
struct Fts3Cursor {
|
||||
sqlite3_vtab_cursor base; /* Base class used by SQLite core */
|
||||
i16 eSearch; /* Search strategy (see below) */
|
||||
u8 isEof; /* True if at End Of Results */
|
||||
u8 isRequireSeek; /* True if must seek pStmt to %_content row */
|
||||
u8 bSeekStmt; /* True if pStmt is a seek */
|
||||
sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */
|
||||
Fts3Expr *pExpr; /* Parsed MATCH query string */
|
||||
int iLangid; /* Language being queried for */
|
||||
int nPhrase; /* Number of matchable phrases in query */
|
||||
Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */
|
||||
sqlite3_int64 iPrevId; /* Previous id read from aDoclist */
|
||||
char *pNextId; /* Pointer into the body of aDoclist */
|
||||
char *aDoclist; /* List of docids for full-text queries */
|
||||
int nDoclist; /* Size of buffer at aDoclist */
|
||||
u8 bDesc; /* True to sort in descending order */
|
||||
int eEvalmode; /* An FTS3_EVAL_XX constant */
|
||||
int nRowAvg; /* Average size of database rows, in pages */
|
||||
sqlite3_int64 nDoc; /* Documents in table */
|
||||
i64 iMinDocid; /* Minimum docid to return */
|
||||
i64 iMaxDocid; /* Maximum docid to return */
|
||||
int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */
|
||||
MatchinfoBuffer *pMIBuffer; /* Buffer for matchinfo data */
|
||||
};
|
||||
|
||||
#define FTS3_EVAL_FILTER 0
|
||||
#define FTS3_EVAL_NEXT 1
|
||||
#define FTS3_EVAL_MATCHINFO 2
|
||||
|
||||
/*
|
||||
** The Fts3Cursor.eSearch member is always set to one of the following.
|
||||
** Actualy, Fts3Cursor.eSearch can be greater than or equal to
|
||||
** FTS3_FULLTEXT_SEARCH. If so, then Fts3Cursor.eSearch - 2 is the index
|
||||
** of the column to be searched. For example, in
|
||||
**
|
||||
** CREATE VIRTUAL TABLE ex1 USING fts3(a,b,c,d);
|
||||
** SELECT docid FROM ex1 WHERE b MATCH 'one two three';
|
||||
**
|
||||
** Because the LHS of the MATCH operator is 2nd column "b",
|
||||
** Fts3Cursor.eSearch will be set to FTS3_FULLTEXT_SEARCH+1. (+0 for a,
|
||||
** +1 for b, +2 for c, +3 for d.) If the LHS of MATCH were "ex1"
|
||||
** indicating that all columns should be searched,
|
||||
** then eSearch would be set to FTS3_FULLTEXT_SEARCH+4.
|
||||
*/
|
||||
#define FTS3_FULLSCAN_SEARCH 0 /* Linear scan of %_content table */
|
||||
#define FTS3_DOCID_SEARCH 1 /* Lookup by rowid on %_content table */
|
||||
#define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */
|
||||
|
||||
/*
|
||||
** The lower 16-bits of the sqlite3_index_info.idxNum value set by
|
||||
** the xBestIndex() method contains the Fts3Cursor.eSearch value described
|
||||
** above. The upper 16-bits contain a combination of the following
|
||||
** bits, used to describe extra constraints on full-text searches.
|
||||
*/
|
||||
#define FTS3_HAVE_LANGID 0x00010000 /* languageid=? */
|
||||
#define FTS3_HAVE_DOCID_GE 0x00020000 /* docid>=? */
|
||||
#define FTS3_HAVE_DOCID_LE 0x00040000 /* docid<=? */
|
||||
|
||||
struct Fts3Doclist {
|
||||
char *aAll; /* Array containing doclist (or NULL) */
|
||||
int nAll; /* Size of a[] in bytes */
|
||||
char *pNextDocid; /* Pointer to next docid */
|
||||
|
||||
sqlite3_int64 iDocid; /* Current docid (if pList!=0) */
|
||||
int bFreeList; /* True if pList should be sqlite3_free()d */
|
||||
char *pList; /* Pointer to position list following iDocid */
|
||||
int nList; /* Length of position list */
|
||||
};
|
||||
|
||||
/*
|
||||
** A "phrase" is a sequence of one or more tokens that must match in
|
||||
** sequence. A single token is the base case and the most common case.
|
||||
** For a sequence of tokens contained in double-quotes (i.e. "one two three")
|
||||
** nToken will be the number of tokens in the string.
|
||||
*/
|
||||
struct Fts3PhraseToken {
|
||||
char *z; /* Text of the token */
|
||||
int n; /* Number of bytes in buffer z */
|
||||
int isPrefix; /* True if token ends with a "*" character */
|
||||
int bFirst; /* True if token must appear at position 0 */
|
||||
|
||||
/* Variables above this point are populated when the expression is
|
||||
** parsed (by code in fts3_expr.c). Below this point the variables are
|
||||
** used when evaluating the expression. */
|
||||
Fts3DeferredToken *pDeferred; /* Deferred token object for this token */
|
||||
Fts3MultiSegReader *pSegcsr; /* Segment-reader for this token */
|
||||
};
|
||||
|
||||
struct Fts3Phrase {
|
||||
/* Cache of doclist for this phrase. */
|
||||
Fts3Doclist doclist;
|
||||
int bIncr; /* True if doclist is loaded incrementally */
|
||||
int iDoclistToken;
|
||||
|
||||
/* Used by sqlite3Fts3EvalPhrasePoslist() if this is a descendent of an
|
||||
** OR condition. */
|
||||
char *pOrPoslist;
|
||||
i64 iOrDocid;
|
||||
|
||||
/* Variables below this point are populated by fts3_expr.c when parsing
|
||||
** a MATCH expression. Everything above is part of the evaluation phase.
|
||||
*/
|
||||
int nToken; /* Number of tokens in the phrase */
|
||||
int iColumn; /* Index of column this phrase must match */
|
||||
Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */
|
||||
};
|
||||
|
||||
/*
|
||||
** A tree of these objects forms the RHS of a MATCH operator.
|
||||
**
|
||||
** If Fts3Expr.eType is FTSQUERY_PHRASE and isLoaded is true, then aDoclist
|
||||
** points to a malloced buffer, size nDoclist bytes, containing the results
|
||||
** of this phrase query in FTS3 doclist format. As usual, the initial
|
||||
** "Length" field found in doclists stored on disk is omitted from this
|
||||
** buffer.
|
||||
**
|
||||
** Variable aMI is used only for FTSQUERY_NEAR nodes to store the global
|
||||
** matchinfo data. If it is not NULL, it points to an array of size nCol*3,
|
||||
** where nCol is the number of columns in the queried FTS table. The array
|
||||
** is populated as follows:
|
||||
**
|
||||
** aMI[iCol*3 + 0] = Undefined
|
||||
** aMI[iCol*3 + 1] = Number of occurrences
|
||||
** aMI[iCol*3 + 2] = Number of rows containing at least one instance
|
||||
**
|
||||
** The aMI array is allocated using sqlite3_malloc(). It should be freed
|
||||
** when the expression node is.
|
||||
*/
|
||||
struct Fts3Expr {
|
||||
int eType; /* One of the FTSQUERY_XXX values defined below */
|
||||
int nNear; /* Valid if eType==FTSQUERY_NEAR */
|
||||
Fts3Expr *pParent; /* pParent->pLeft==this or pParent->pRight==this */
|
||||
Fts3Expr *pLeft; /* Left operand */
|
||||
Fts3Expr *pRight; /* Right operand */
|
||||
Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */
|
||||
|
||||
/* The following are used by the fts3_eval.c module. */
|
||||
sqlite3_int64 iDocid; /* Current docid */
|
||||
u8 bEof; /* True this expression is at EOF already */
|
||||
u8 bStart; /* True if iDocid is valid */
|
||||
u8 bDeferred; /* True if this expression is entirely deferred */
|
||||
|
||||
/* The following are used by the fts3_snippet.c module. */
|
||||
int iPhrase; /* Index of this phrase in matchinfo() results */
|
||||
u32 *aMI; /* See above */
|
||||
};
|
||||
|
||||
/*
|
||||
** Candidate values for Fts3Query.eType. Note that the order of the first
|
||||
** four values is in order of precedence when parsing expressions. For
|
||||
** example, the following:
|
||||
**
|
||||
** "a OR b AND c NOT d NEAR e"
|
||||
**
|
||||
** is equivalent to:
|
||||
**
|
||||
** "a OR (b AND (c NOT (d NEAR e)))"
|
||||
*/
|
||||
#define FTSQUERY_NEAR 1
|
||||
#define FTSQUERY_NOT 2
|
||||
#define FTSQUERY_AND 3
|
||||
#define FTSQUERY_OR 4
|
||||
#define FTSQUERY_PHRASE 5
|
||||
|
||||
|
||||
/* fts3_write.c */
|
||||
int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*);
|
||||
int sqlite3Fts3PendingTermsFlush(Fts3Table *);
|
||||
void sqlite3Fts3PendingTermsClear(Fts3Table *);
|
||||
int sqlite3Fts3Optimize(Fts3Table *);
|
||||
int sqlite3Fts3SegReaderNew(int, int, sqlite3_int64,
|
||||
sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**);
|
||||
int sqlite3Fts3SegReaderPending(
|
||||
Fts3Table*,int,const char*,int,int,Fts3SegReader**);
|
||||
void sqlite3Fts3SegReaderFree(Fts3SegReader *);
|
||||
int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, int, sqlite3_stmt **);
|
||||
int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*);
|
||||
|
||||
int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **);
|
||||
int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **);
|
||||
|
||||
#ifndef SQLITE_DISABLE_FTS4_DEFERRED
|
||||
void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *);
|
||||
int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int);
|
||||
int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *);
|
||||
void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *);
|
||||
int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, int *);
|
||||
#else
|
||||
# define sqlite3Fts3FreeDeferredTokens(x)
|
||||
# define sqlite3Fts3DeferToken(x,y,z) SQLITE_OK
|
||||
# define sqlite3Fts3CacheDeferredDoclists(x) SQLITE_OK
|
||||
# define sqlite3Fts3FreeDeferredDoclists(x)
|
||||
# define sqlite3Fts3DeferredTokenList(x,y,z) SQLITE_OK
|
||||
#endif
|
||||
|
||||
void sqlite3Fts3SegmentsClose(Fts3Table *);
|
||||
int sqlite3Fts3MaxLevel(Fts3Table *, int *);
|
||||
|
||||
/* Special values interpreted by sqlite3SegReaderCursor() */
|
||||
#define FTS3_SEGCURSOR_PENDING -1
|
||||
#define FTS3_SEGCURSOR_ALL -2
|
||||
|
||||
int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3MultiSegReader*, Fts3SegFilter*);
|
||||
int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3MultiSegReader *);
|
||||
void sqlite3Fts3SegReaderFinish(Fts3MultiSegReader *);
|
||||
|
||||
int sqlite3Fts3SegReaderCursor(Fts3Table *,
|
||||
int, int, int, const char *, int, int, int, Fts3MultiSegReader *);
|
||||
|
||||
/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */
|
||||
#define FTS3_SEGMENT_REQUIRE_POS 0x00000001
|
||||
#define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002
|
||||
#define FTS3_SEGMENT_COLUMN_FILTER 0x00000004
|
||||
#define FTS3_SEGMENT_PREFIX 0x00000008
|
||||
#define FTS3_SEGMENT_SCAN 0x00000010
|
||||
#define FTS3_SEGMENT_FIRST 0x00000020
|
||||
|
||||
/* Type passed as 4th argument to SegmentReaderIterate() */
|
||||
struct Fts3SegFilter {
|
||||
const char *zTerm;
|
||||
int nTerm;
|
||||
int iCol;
|
||||
int flags;
|
||||
};
|
||||
|
||||
struct Fts3MultiSegReader {
|
||||
/* Used internally by sqlite3Fts3SegReaderXXX() calls */
|
||||
Fts3SegReader **apSegment; /* Array of Fts3SegReader objects */
|
||||
int nSegment; /* Size of apSegment array */
|
||||
int nAdvance; /* How many seg-readers to advance */
|
||||
Fts3SegFilter *pFilter; /* Pointer to filter object */
|
||||
char *aBuffer; /* Buffer to merge doclists in */
|
||||
int nBuffer; /* Allocated size of aBuffer[] in bytes */
|
||||
|
||||
int iColFilter; /* If >=0, filter for this column */
|
||||
int bRestart;
|
||||
|
||||
/* Used by fts3.c only. */
|
||||
int nCost; /* Cost of running iterator */
|
||||
int bLookup; /* True if a lookup of a single entry. */
|
||||
|
||||
/* Output values. Valid only after Fts3SegReaderStep() returns SQLITE_ROW. */
|
||||
char *zTerm; /* Pointer to term buffer */
|
||||
int nTerm; /* Size of zTerm in bytes */
|
||||
char *aDoclist; /* Pointer to doclist buffer */
|
||||
int nDoclist; /* Size of aDoclist[] in bytes */
|
||||
};
|
||||
|
||||
int sqlite3Fts3Incrmerge(Fts3Table*,int,int);
|
||||
|
||||
#define fts3GetVarint32(p, piVal) ( \
|
||||
(*(u8*)(p)&0x80) ? sqlite3Fts3GetVarint32(p, piVal) : (*piVal=*(u8*)(p), 1) \
|
||||
)
|
||||
|
||||
/* fts3.c */
|
||||
void sqlite3Fts3ErrMsg(char**,const char*,...);
|
||||
int sqlite3Fts3PutVarint(char *, sqlite3_int64);
|
||||
int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
|
||||
int sqlite3Fts3GetVarintU(const char *, sqlite_uint64 *);
|
||||
int sqlite3Fts3GetVarintBounded(const char*,const char*,sqlite3_int64*);
|
||||
int sqlite3Fts3GetVarint32(const char *, int *);
|
||||
int sqlite3Fts3VarintLen(sqlite3_uint64);
|
||||
void sqlite3Fts3Dequote(char *);
|
||||
void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*);
|
||||
int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *);
|
||||
int sqlite3Fts3FirstFilter(sqlite3_int64, char *, int, char *);
|
||||
void sqlite3Fts3CreateStatTable(int*, Fts3Table*);
|
||||
int sqlite3Fts3EvalTestDeferred(Fts3Cursor *pCsr, int *pRc);
|
||||
int sqlite3Fts3ReadInt(const char *z, int *pnOut);
|
||||
|
||||
/* fts3_tokenizer.c */
|
||||
const char *sqlite3Fts3NextToken(const char *, int *);
|
||||
int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *);
|
||||
int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *,
|
||||
sqlite3_tokenizer **, char **
|
||||
);
|
||||
int sqlite3Fts3IsIdChar(char);
|
||||
|
||||
/* fts3_snippet.c */
|
||||
void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*);
|
||||
void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *,
|
||||
const char *, const char *, int, int
|
||||
);
|
||||
void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *);
|
||||
void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p);
|
||||
|
||||
/* fts3_expr.c */
|
||||
int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int,
|
||||
char **, int, int, int, const char *, int, Fts3Expr **, char **
|
||||
);
|
||||
void sqlite3Fts3ExprFree(Fts3Expr *);
|
||||
#ifdef SQLITE_TEST
|
||||
int sqlite3Fts3ExprInitTestInterface(sqlite3 *db, Fts3Hash*);
|
||||
int sqlite3Fts3InitTerm(sqlite3 *db);
|
||||
#endif
|
||||
void *sqlite3Fts3MallocZero(i64 nByte);
|
||||
|
||||
int sqlite3Fts3OpenTokenizer(sqlite3_tokenizer *, int, const char *, int,
|
||||
sqlite3_tokenizer_cursor **
|
||||
);
|
||||
|
||||
/* fts3_aux.c */
|
||||
int sqlite3Fts3InitAux(sqlite3 *db);
|
||||
|
||||
void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *);
|
||||
|
||||
int sqlite3Fts3MsrIncrStart(
|
||||
Fts3Table*, Fts3MultiSegReader*, int, const char*, int);
|
||||
int sqlite3Fts3MsrIncrNext(
|
||||
Fts3Table *, Fts3MultiSegReader *, sqlite3_int64 *, char **, int *);
|
||||
int sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol, char **);
|
||||
int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *);
|
||||
int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr);
|
||||
|
||||
/* fts3_tokenize_vtab.c */
|
||||
int sqlite3Fts3InitTok(sqlite3*, Fts3Hash *);
|
||||
|
||||
/* fts3_unicode2.c (functions generated by parsing unicode text files) */
|
||||
#ifndef SQLITE_DISABLE_FTS3_UNICODE
|
||||
int sqlite3FtsUnicodeFold(int, int);
|
||||
int sqlite3FtsUnicodeIsalnum(int);
|
||||
int sqlite3FtsUnicodeIsdiacritic(int);
|
||||
#endif
|
||||
|
||||
#endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */
|
||||
#endif /* _FTSINT_H */
|
||||
556
ext/fts3/fts3_aux.c
Normal file
556
ext/fts3/fts3_aux.c
Normal file
|
|
@ -0,0 +1,556 @@
|
|||
/*
|
||||
** 2011 Jan 27
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
*/
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
typedef struct Fts3auxTable Fts3auxTable;
|
||||
typedef struct Fts3auxCursor Fts3auxCursor;
|
||||
|
||||
struct Fts3auxTable {
|
||||
sqlite3_vtab base; /* Base class used by SQLite core */
|
||||
Fts3Table *pFts3Tab;
|
||||
};
|
||||
|
||||
struct Fts3auxCursor {
|
||||
sqlite3_vtab_cursor base; /* Base class used by SQLite core */
|
||||
Fts3MultiSegReader csr; /* Must be right after "base" */
|
||||
Fts3SegFilter filter;
|
||||
char *zStop;
|
||||
int nStop; /* Byte-length of string zStop */
|
||||
int iLangid; /* Language id to query */
|
||||
int isEof; /* True if cursor is at EOF */
|
||||
sqlite3_int64 iRowid; /* Current rowid */
|
||||
|
||||
int iCol; /* Current value of 'col' column */
|
||||
int nStat; /* Size of aStat[] array */
|
||||
struct Fts3auxColstats {
|
||||
sqlite3_int64 nDoc; /* 'documents' values for current csr row */
|
||||
sqlite3_int64 nOcc; /* 'occurrences' values for current csr row */
|
||||
} *aStat;
|
||||
};
|
||||
|
||||
/*
|
||||
** Schema of the terms table.
|
||||
*/
|
||||
#define FTS3_AUX_SCHEMA \
|
||||
"CREATE TABLE x(term, col, documents, occurrences, languageid HIDDEN)"
|
||||
|
||||
/*
|
||||
** This function does all the work for both the xConnect and xCreate methods.
|
||||
** These tables have no persistent representation of their own, so xConnect
|
||||
** and xCreate are identical operations.
|
||||
*/
|
||||
static int fts3auxConnectMethod(
|
||||
sqlite3 *db, /* Database connection */
|
||||
void *pUnused, /* Unused */
|
||||
int argc, /* Number of elements in argv array */
|
||||
const char * const *argv, /* xCreate/xConnect argument array */
|
||||
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
|
||||
char **pzErr /* OUT: sqlite3_malloc'd error message */
|
||||
){
|
||||
char const *zDb; /* Name of database (e.g. "main") */
|
||||
char const *zFts3; /* Name of fts3 table */
|
||||
int nDb; /* Result of strlen(zDb) */
|
||||
int nFts3; /* Result of strlen(zFts3) */
|
||||
sqlite3_int64 nByte; /* Bytes of space to allocate here */
|
||||
int rc; /* value returned by declare_vtab() */
|
||||
Fts3auxTable *p; /* Virtual table object to return */
|
||||
|
||||
UNUSED_PARAMETER(pUnused);
|
||||
|
||||
/* The user should invoke this in one of two forms:
|
||||
**
|
||||
** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table);
|
||||
** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table-db, fts4-table);
|
||||
*/
|
||||
if( argc!=4 && argc!=5 ) goto bad_args;
|
||||
|
||||
zDb = argv[1];
|
||||
nDb = (int)strlen(zDb);
|
||||
if( argc==5 ){
|
||||
if( nDb==4 && 0==sqlite3_strnicmp("temp", zDb, 4) ){
|
||||
zDb = argv[3];
|
||||
nDb = (int)strlen(zDb);
|
||||
zFts3 = argv[4];
|
||||
}else{
|
||||
goto bad_args;
|
||||
}
|
||||
}else{
|
||||
zFts3 = argv[3];
|
||||
}
|
||||
nFts3 = (int)strlen(zFts3);
|
||||
|
||||
rc = sqlite3_declare_vtab(db, FTS3_AUX_SCHEMA);
|
||||
if( rc!=SQLITE_OK ) return rc;
|
||||
|
||||
nByte = sizeof(Fts3auxTable) + sizeof(Fts3Table) + nDb + nFts3 + 2;
|
||||
p = (Fts3auxTable *)sqlite3_malloc64(nByte);
|
||||
if( !p ) return SQLITE_NOMEM;
|
||||
memset(p, 0, nByte);
|
||||
|
||||
p->pFts3Tab = (Fts3Table *)&p[1];
|
||||
p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1];
|
||||
p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1];
|
||||
p->pFts3Tab->db = db;
|
||||
p->pFts3Tab->nIndex = 1;
|
||||
|
||||
memcpy((char *)p->pFts3Tab->zDb, zDb, nDb);
|
||||
memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3);
|
||||
sqlite3Fts3Dequote((char *)p->pFts3Tab->zName);
|
||||
|
||||
*ppVtab = (sqlite3_vtab *)p;
|
||||
return SQLITE_OK;
|
||||
|
||||
bad_args:
|
||||
sqlite3Fts3ErrMsg(pzErr, "invalid arguments to fts4aux constructor");
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
|
||||
/*
|
||||
** This function does the work for both the xDisconnect and xDestroy methods.
|
||||
** These tables have no persistent representation of their own, so xDisconnect
|
||||
** and xDestroy are identical operations.
|
||||
*/
|
||||
static int fts3auxDisconnectMethod(sqlite3_vtab *pVtab){
|
||||
Fts3auxTable *p = (Fts3auxTable *)pVtab;
|
||||
Fts3Table *pFts3 = p->pFts3Tab;
|
||||
int i;
|
||||
|
||||
/* Free any prepared statements held */
|
||||
for(i=0; i<SizeofArray(pFts3->aStmt); i++){
|
||||
sqlite3_finalize(pFts3->aStmt[i]);
|
||||
}
|
||||
sqlite3_free(pFts3->zSegmentsTbl);
|
||||
sqlite3_free(p);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
#define FTS4AUX_EQ_CONSTRAINT 1
|
||||
#define FTS4AUX_GE_CONSTRAINT 2
|
||||
#define FTS4AUX_LE_CONSTRAINT 4
|
||||
|
||||
/*
|
||||
** xBestIndex - Analyze a WHERE and ORDER BY clause.
|
||||
*/
|
||||
static int fts3auxBestIndexMethod(
|
||||
sqlite3_vtab *pVTab,
|
||||
sqlite3_index_info *pInfo
|
||||
){
|
||||
int i;
|
||||
int iEq = -1;
|
||||
int iGe = -1;
|
||||
int iLe = -1;
|
||||
int iLangid = -1;
|
||||
int iNext = 1; /* Next free argvIndex value */
|
||||
|
||||
UNUSED_PARAMETER(pVTab);
|
||||
|
||||
/* This vtab delivers always results in "ORDER BY term ASC" order. */
|
||||
if( pInfo->nOrderBy==1
|
||||
&& pInfo->aOrderBy[0].iColumn==0
|
||||
&& pInfo->aOrderBy[0].desc==0
|
||||
){
|
||||
pInfo->orderByConsumed = 1;
|
||||
}
|
||||
|
||||
/* Search for equality and range constraints on the "term" column.
|
||||
** And equality constraints on the hidden "languageid" column. */
|
||||
for(i=0; i<pInfo->nConstraint; i++){
|
||||
if( pInfo->aConstraint[i].usable ){
|
||||
int op = pInfo->aConstraint[i].op;
|
||||
int iCol = pInfo->aConstraint[i].iColumn;
|
||||
|
||||
if( iCol==0 ){
|
||||
if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iEq = i;
|
||||
if( op==SQLITE_INDEX_CONSTRAINT_LT ) iLe = i;
|
||||
if( op==SQLITE_INDEX_CONSTRAINT_LE ) iLe = i;
|
||||
if( op==SQLITE_INDEX_CONSTRAINT_GT ) iGe = i;
|
||||
if( op==SQLITE_INDEX_CONSTRAINT_GE ) iGe = i;
|
||||
}
|
||||
if( iCol==4 ){
|
||||
if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iLangid = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( iEq>=0 ){
|
||||
pInfo->idxNum = FTS4AUX_EQ_CONSTRAINT;
|
||||
pInfo->aConstraintUsage[iEq].argvIndex = iNext++;
|
||||
pInfo->estimatedCost = 5;
|
||||
}else{
|
||||
pInfo->idxNum = 0;
|
||||
pInfo->estimatedCost = 20000;
|
||||
if( iGe>=0 ){
|
||||
pInfo->idxNum += FTS4AUX_GE_CONSTRAINT;
|
||||
pInfo->aConstraintUsage[iGe].argvIndex = iNext++;
|
||||
pInfo->estimatedCost /= 2;
|
||||
}
|
||||
if( iLe>=0 ){
|
||||
pInfo->idxNum += FTS4AUX_LE_CONSTRAINT;
|
||||
pInfo->aConstraintUsage[iLe].argvIndex = iNext++;
|
||||
pInfo->estimatedCost /= 2;
|
||||
}
|
||||
}
|
||||
if( iLangid>=0 ){
|
||||
pInfo->aConstraintUsage[iLangid].argvIndex = iNext++;
|
||||
pInfo->estimatedCost--;
|
||||
}
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xOpen - Open a cursor.
|
||||
*/
|
||||
static int fts3auxOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
|
||||
Fts3auxCursor *pCsr; /* Pointer to cursor object to return */
|
||||
|
||||
UNUSED_PARAMETER(pVTab);
|
||||
|
||||
pCsr = (Fts3auxCursor *)sqlite3_malloc(sizeof(Fts3auxCursor));
|
||||
if( !pCsr ) return SQLITE_NOMEM;
|
||||
memset(pCsr, 0, sizeof(Fts3auxCursor));
|
||||
|
||||
*ppCsr = (sqlite3_vtab_cursor *)pCsr;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xClose - Close a cursor.
|
||||
*/
|
||||
static int fts3auxCloseMethod(sqlite3_vtab_cursor *pCursor){
|
||||
Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
|
||||
Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
|
||||
|
||||
sqlite3Fts3SegmentsClose(pFts3);
|
||||
sqlite3Fts3SegReaderFinish(&pCsr->csr);
|
||||
sqlite3_free((void *)pCsr->filter.zTerm);
|
||||
sqlite3_free(pCsr->zStop);
|
||||
sqlite3_free(pCsr->aStat);
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int fts3auxGrowStatArray(Fts3auxCursor *pCsr, int nSize){
|
||||
if( nSize>pCsr->nStat ){
|
||||
struct Fts3auxColstats *aNew;
|
||||
aNew = (struct Fts3auxColstats *)sqlite3_realloc64(pCsr->aStat,
|
||||
sizeof(struct Fts3auxColstats) * nSize
|
||||
);
|
||||
if( aNew==0 ) return SQLITE_NOMEM;
|
||||
memset(&aNew[pCsr->nStat], 0,
|
||||
sizeof(struct Fts3auxColstats) * (nSize - pCsr->nStat)
|
||||
);
|
||||
pCsr->aStat = aNew;
|
||||
pCsr->nStat = nSize;
|
||||
}
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xNext - Advance the cursor to the next row, if any.
|
||||
*/
|
||||
static int fts3auxNextMethod(sqlite3_vtab_cursor *pCursor){
|
||||
Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
|
||||
Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
|
||||
int rc;
|
||||
|
||||
/* Increment our pretend rowid value. */
|
||||
pCsr->iRowid++;
|
||||
|
||||
for(pCsr->iCol++; pCsr->iCol<pCsr->nStat; pCsr->iCol++){
|
||||
if( pCsr->aStat[pCsr->iCol].nDoc>0 ) return SQLITE_OK;
|
||||
}
|
||||
|
||||
rc = sqlite3Fts3SegReaderStep(pFts3, &pCsr->csr);
|
||||
if( rc==SQLITE_ROW ){
|
||||
int i = 0;
|
||||
int nDoclist = pCsr->csr.nDoclist;
|
||||
char *aDoclist = pCsr->csr.aDoclist;
|
||||
int iCol;
|
||||
|
||||
int eState = 0;
|
||||
|
||||
if( pCsr->zStop ){
|
||||
int n = (pCsr->nStop<pCsr->csr.nTerm) ? pCsr->nStop : pCsr->csr.nTerm;
|
||||
int mc = memcmp(pCsr->zStop, pCsr->csr.zTerm, n);
|
||||
if( mc<0 || (mc==0 && pCsr->csr.nTerm>pCsr->nStop) ){
|
||||
pCsr->isEof = 1;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
}
|
||||
|
||||
if( fts3auxGrowStatArray(pCsr, 2) ) return SQLITE_NOMEM;
|
||||
memset(pCsr->aStat, 0, sizeof(struct Fts3auxColstats) * pCsr->nStat);
|
||||
iCol = 0;
|
||||
rc = SQLITE_OK;
|
||||
|
||||
while( i<nDoclist ){
|
||||
sqlite3_int64 v = 0;
|
||||
|
||||
i += sqlite3Fts3GetVarint(&aDoclist[i], &v);
|
||||
switch( eState ){
|
||||
/* State 0. In this state the integer just read was a docid. */
|
||||
case 0:
|
||||
pCsr->aStat[0].nDoc++;
|
||||
eState = 1;
|
||||
iCol = 0;
|
||||
break;
|
||||
|
||||
/* State 1. In this state we are expecting either a 1, indicating
|
||||
** that the following integer will be a column number, or the
|
||||
** start of a position list for column 0.
|
||||
**
|
||||
** The only difference between state 1 and state 2 is that if the
|
||||
** integer encountered in state 1 is not 0 or 1, then we need to
|
||||
** increment the column 0 "nDoc" count for this term.
|
||||
*/
|
||||
case 1:
|
||||
assert( iCol==0 );
|
||||
if( v>1 ){
|
||||
pCsr->aStat[1].nDoc++;
|
||||
}
|
||||
eState = 2;
|
||||
/* fall through */
|
||||
|
||||
case 2:
|
||||
if( v==0 ){ /* 0x00. Next integer will be a docid. */
|
||||
eState = 0;
|
||||
}else if( v==1 ){ /* 0x01. Next integer will be a column number. */
|
||||
eState = 3;
|
||||
}else{ /* 2 or greater. A position. */
|
||||
pCsr->aStat[iCol+1].nOcc++;
|
||||
pCsr->aStat[0].nOcc++;
|
||||
}
|
||||
break;
|
||||
|
||||
/* State 3. The integer just read is a column number. */
|
||||
default: assert( eState==3 );
|
||||
iCol = (int)v;
|
||||
if( iCol<1 ){
|
||||
rc = SQLITE_CORRUPT_VTAB;
|
||||
break;
|
||||
}
|
||||
if( fts3auxGrowStatArray(pCsr, iCol+2) ) return SQLITE_NOMEM;
|
||||
pCsr->aStat[iCol+1].nDoc++;
|
||||
eState = 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
pCsr->iCol = 0;
|
||||
}else{
|
||||
pCsr->isEof = 1;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** xFilter - Initialize a cursor to point at the start of its data.
|
||||
*/
|
||||
static int fts3auxFilterMethod(
|
||||
sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
|
||||
int idxNum, /* Strategy index */
|
||||
const char *idxStr, /* Unused */
|
||||
int nVal, /* Number of elements in apVal */
|
||||
sqlite3_value **apVal /* Arguments for the indexing scheme */
|
||||
){
|
||||
Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
|
||||
Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
|
||||
int rc;
|
||||
int isScan = 0;
|
||||
int iLangVal = 0; /* Language id to query */
|
||||
|
||||
int iEq = -1; /* Index of term=? value in apVal */
|
||||
int iGe = -1; /* Index of term>=? value in apVal */
|
||||
int iLe = -1; /* Index of term<=? value in apVal */
|
||||
int iLangid = -1; /* Index of languageid=? value in apVal */
|
||||
int iNext = 0;
|
||||
|
||||
UNUSED_PARAMETER(nVal);
|
||||
UNUSED_PARAMETER(idxStr);
|
||||
|
||||
assert( idxStr==0 );
|
||||
assert( idxNum==FTS4AUX_EQ_CONSTRAINT || idxNum==0
|
||||
|| idxNum==FTS4AUX_LE_CONSTRAINT || idxNum==FTS4AUX_GE_CONSTRAINT
|
||||
|| idxNum==(FTS4AUX_LE_CONSTRAINT|FTS4AUX_GE_CONSTRAINT)
|
||||
);
|
||||
|
||||
if( idxNum==FTS4AUX_EQ_CONSTRAINT ){
|
||||
iEq = iNext++;
|
||||
}else{
|
||||
isScan = 1;
|
||||
if( idxNum & FTS4AUX_GE_CONSTRAINT ){
|
||||
iGe = iNext++;
|
||||
}
|
||||
if( idxNum & FTS4AUX_LE_CONSTRAINT ){
|
||||
iLe = iNext++;
|
||||
}
|
||||
}
|
||||
if( iNext<nVal ){
|
||||
iLangid = iNext++;
|
||||
}
|
||||
|
||||
/* In case this cursor is being reused, close and zero it. */
|
||||
testcase(pCsr->filter.zTerm);
|
||||
sqlite3Fts3SegReaderFinish(&pCsr->csr);
|
||||
sqlite3_free((void *)pCsr->filter.zTerm);
|
||||
sqlite3_free(pCsr->aStat);
|
||||
sqlite3_free(pCsr->zStop);
|
||||
memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr);
|
||||
|
||||
pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
|
||||
if( isScan ) pCsr->filter.flags |= FTS3_SEGMENT_SCAN;
|
||||
|
||||
if( iEq>=0 || iGe>=0 ){
|
||||
const unsigned char *zStr = sqlite3_value_text(apVal[0]);
|
||||
assert( (iEq==0 && iGe==-1) || (iEq==-1 && iGe==0) );
|
||||
if( zStr ){
|
||||
pCsr->filter.zTerm = sqlite3_mprintf("%s", zStr);
|
||||
if( pCsr->filter.zTerm==0 ) return SQLITE_NOMEM;
|
||||
pCsr->filter.nTerm = (int)strlen(pCsr->filter.zTerm);
|
||||
}
|
||||
}
|
||||
|
||||
if( iLe>=0 ){
|
||||
pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iLe]));
|
||||
if( pCsr->zStop==0 ) return SQLITE_NOMEM;
|
||||
pCsr->nStop = (int)strlen(pCsr->zStop);
|
||||
}
|
||||
|
||||
if( iLangid>=0 ){
|
||||
iLangVal = sqlite3_value_int(apVal[iLangid]);
|
||||
|
||||
/* If the user specified a negative value for the languageid, use zero
|
||||
** instead. This works, as the "languageid=?" constraint will also
|
||||
** be tested by the VDBE layer. The test will always be false (since
|
||||
** this module will not return a row with a negative languageid), and
|
||||
** so the overall query will return zero rows. */
|
||||
if( iLangVal<0 ) iLangVal = 0;
|
||||
}
|
||||
pCsr->iLangid = iLangVal;
|
||||
|
||||
rc = sqlite3Fts3SegReaderCursor(pFts3, iLangVal, 0, FTS3_SEGCURSOR_ALL,
|
||||
pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr
|
||||
);
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter);
|
||||
}
|
||||
|
||||
if( rc==SQLITE_OK ) rc = fts3auxNextMethod(pCursor);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** xEof - Return true if the cursor is at EOF, or false otherwise.
|
||||
*/
|
||||
static int fts3auxEofMethod(sqlite3_vtab_cursor *pCursor){
|
||||
Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
|
||||
return pCsr->isEof;
|
||||
}
|
||||
|
||||
/*
|
||||
** xColumn - Return a column value.
|
||||
*/
|
||||
static int fts3auxColumnMethod(
|
||||
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
|
||||
sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
|
||||
int iCol /* Index of column to read value from */
|
||||
){
|
||||
Fts3auxCursor *p = (Fts3auxCursor *)pCursor;
|
||||
|
||||
assert( p->isEof==0 );
|
||||
switch( iCol ){
|
||||
case 0: /* term */
|
||||
sqlite3_result_text(pCtx, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT);
|
||||
break;
|
||||
|
||||
case 1: /* col */
|
||||
if( p->iCol ){
|
||||
sqlite3_result_int(pCtx, p->iCol-1);
|
||||
}else{
|
||||
sqlite3_result_text(pCtx, "*", -1, SQLITE_STATIC);
|
||||
}
|
||||
break;
|
||||
|
||||
case 2: /* documents */
|
||||
sqlite3_result_int64(pCtx, p->aStat[p->iCol].nDoc);
|
||||
break;
|
||||
|
||||
case 3: /* occurrences */
|
||||
sqlite3_result_int64(pCtx, p->aStat[p->iCol].nOcc);
|
||||
break;
|
||||
|
||||
default: /* languageid */
|
||||
assert( iCol==4 );
|
||||
sqlite3_result_int(pCtx, p->iLangid);
|
||||
break;
|
||||
}
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xRowid - Return the current rowid for the cursor.
|
||||
*/
|
||||
static int fts3auxRowidMethod(
|
||||
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
|
||||
sqlite_int64 *pRowid /* OUT: Rowid value */
|
||||
){
|
||||
Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
|
||||
*pRowid = pCsr->iRowid;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Register the fts3aux module with database connection db. Return SQLITE_OK
|
||||
** if successful or an error code if sqlite3_create_module() fails.
|
||||
*/
|
||||
int sqlite3Fts3InitAux(sqlite3 *db){
|
||||
static const sqlite3_module fts3aux_module = {
|
||||
0, /* iVersion */
|
||||
fts3auxConnectMethod, /* xCreate */
|
||||
fts3auxConnectMethod, /* xConnect */
|
||||
fts3auxBestIndexMethod, /* xBestIndex */
|
||||
fts3auxDisconnectMethod, /* xDisconnect */
|
||||
fts3auxDisconnectMethod, /* xDestroy */
|
||||
fts3auxOpenMethod, /* xOpen */
|
||||
fts3auxCloseMethod, /* xClose */
|
||||
fts3auxFilterMethod, /* xFilter */
|
||||
fts3auxNextMethod, /* xNext */
|
||||
fts3auxEofMethod, /* xEof */
|
||||
fts3auxColumnMethod, /* xColumn */
|
||||
fts3auxRowidMethod, /* xRowid */
|
||||
0, /* xUpdate */
|
||||
0, /* xBegin */
|
||||
0, /* xSync */
|
||||
0, /* xCommit */
|
||||
0, /* xRollback */
|
||||
0, /* xFindFunction */
|
||||
0, /* xRename */
|
||||
0, /* xSavepoint */
|
||||
0, /* xRelease */
|
||||
0, /* xRollbackTo */
|
||||
0 /* xShadowName */
|
||||
};
|
||||
int rc; /* Return code */
|
||||
|
||||
rc = sqlite3_create_module(db, "fts4aux", &fts3aux_module, 0);
|
||||
return rc;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
|
||||
1293
ext/fts3/fts3_expr.c
Normal file
1293
ext/fts3/fts3_expr.c
Normal file
File diff suppressed because it is too large
Load diff
383
ext/fts3/fts3_hash.c
Normal file
383
ext/fts3/fts3_hash.c
Normal file
|
|
@ -0,0 +1,383 @@
|
|||
/*
|
||||
** 2001 September 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This is the implementation of generic hash-tables used in SQLite.
|
||||
** We've modified it slightly to serve as a standalone hash table
|
||||
** implementation for the full-text indexing module.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS3 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS3 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
|
||||
*/
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "fts3_hash.h"
|
||||
|
||||
/*
|
||||
** Malloc and Free functions
|
||||
*/
|
||||
static void *fts3HashMalloc(sqlite3_int64 n){
|
||||
void *p = sqlite3_malloc64(n);
|
||||
if( p ){
|
||||
memset(p, 0, n);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
static void fts3HashFree(void *p){
|
||||
sqlite3_free(p);
|
||||
}
|
||||
|
||||
/* Turn bulk memory into a hash table object by initializing the
|
||||
** fields of the Hash structure.
|
||||
**
|
||||
** "pNew" is a pointer to the hash table that is to be initialized.
|
||||
** keyClass is one of the constants
|
||||
** FTS3_HASH_BINARY or FTS3_HASH_STRING. The value of keyClass
|
||||
** determines what kind of key the hash table will use. "copyKey" is
|
||||
** true if the hash table should make its own private copy of keys and
|
||||
** false if it should just use the supplied pointer.
|
||||
*/
|
||||
void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copyKey){
|
||||
assert( pNew!=0 );
|
||||
assert( keyClass>=FTS3_HASH_STRING && keyClass<=FTS3_HASH_BINARY );
|
||||
pNew->keyClass = keyClass;
|
||||
pNew->copyKey = copyKey;
|
||||
pNew->first = 0;
|
||||
pNew->count = 0;
|
||||
pNew->htsize = 0;
|
||||
pNew->ht = 0;
|
||||
}
|
||||
|
||||
/* Remove all entries from a hash table. Reclaim all memory.
|
||||
** Call this routine to delete a hash table or to reset a hash table
|
||||
** to the empty state.
|
||||
*/
|
||||
void sqlite3Fts3HashClear(Fts3Hash *pH){
|
||||
Fts3HashElem *elem; /* For looping over all elements of the table */
|
||||
|
||||
assert( pH!=0 );
|
||||
elem = pH->first;
|
||||
pH->first = 0;
|
||||
fts3HashFree(pH->ht);
|
||||
pH->ht = 0;
|
||||
pH->htsize = 0;
|
||||
while( elem ){
|
||||
Fts3HashElem *next_elem = elem->next;
|
||||
if( pH->copyKey && elem->pKey ){
|
||||
fts3HashFree(elem->pKey);
|
||||
}
|
||||
fts3HashFree(elem);
|
||||
elem = next_elem;
|
||||
}
|
||||
pH->count = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Hash and comparison functions when the mode is FTS3_HASH_STRING
|
||||
*/
|
||||
static int fts3StrHash(const void *pKey, int nKey){
|
||||
const char *z = (const char *)pKey;
|
||||
unsigned h = 0;
|
||||
if( nKey<=0 ) nKey = (int) strlen(z);
|
||||
while( nKey > 0 ){
|
||||
h = (h<<3) ^ h ^ *z++;
|
||||
nKey--;
|
||||
}
|
||||
return (int)(h & 0x7fffffff);
|
||||
}
|
||||
static int fts3StrCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( n1!=n2 ) return 1;
|
||||
return strncmp((const char*)pKey1,(const char*)pKey2,n1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Hash and comparison functions when the mode is FTS3_HASH_BINARY
|
||||
*/
|
||||
static int fts3BinHash(const void *pKey, int nKey){
|
||||
int h = 0;
|
||||
const char *z = (const char *)pKey;
|
||||
while( nKey-- > 0 ){
|
||||
h = (h<<3) ^ h ^ *(z++);
|
||||
}
|
||||
return h & 0x7fffffff;
|
||||
}
|
||||
static int fts3BinCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( n1!=n2 ) return 1;
|
||||
return memcmp(pKey1,pKey2,n1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a pointer to the appropriate hash function given the key class.
|
||||
**
|
||||
** The C syntax in this function definition may be unfamilar to some
|
||||
** programmers, so we provide the following additional explanation:
|
||||
**
|
||||
** The name of the function is "ftsHashFunction". The function takes a
|
||||
** single parameter "keyClass". The return value of ftsHashFunction()
|
||||
** is a pointer to another function. Specifically, the return value
|
||||
** of ftsHashFunction() is a pointer to a function that takes two parameters
|
||||
** with types "const void*" and "int" and returns an "int".
|
||||
*/
|
||||
static int (*ftsHashFunction(int keyClass))(const void*,int){
|
||||
if( keyClass==FTS3_HASH_STRING ){
|
||||
return &fts3StrHash;
|
||||
}else{
|
||||
assert( keyClass==FTS3_HASH_BINARY );
|
||||
return &fts3BinHash;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a pointer to the appropriate hash function given the key class.
|
||||
**
|
||||
** For help in interpreted the obscure C code in the function definition,
|
||||
** see the header comment on the previous function.
|
||||
*/
|
||||
static int (*ftsCompareFunction(int keyClass))(const void*,int,const void*,int){
|
||||
if( keyClass==FTS3_HASH_STRING ){
|
||||
return &fts3StrCompare;
|
||||
}else{
|
||||
assert( keyClass==FTS3_HASH_BINARY );
|
||||
return &fts3BinCompare;
|
||||
}
|
||||
}
|
||||
|
||||
/* Link an element into the hash table
|
||||
*/
|
||||
static void fts3HashInsertElement(
|
||||
Fts3Hash *pH, /* The complete hash table */
|
||||
struct _fts3ht *pEntry, /* The entry into which pNew is inserted */
|
||||
Fts3HashElem *pNew /* The element to be inserted */
|
||||
){
|
||||
Fts3HashElem *pHead; /* First element already in pEntry */
|
||||
pHead = pEntry->chain;
|
||||
if( pHead ){
|
||||
pNew->next = pHead;
|
||||
pNew->prev = pHead->prev;
|
||||
if( pHead->prev ){ pHead->prev->next = pNew; }
|
||||
else { pH->first = pNew; }
|
||||
pHead->prev = pNew;
|
||||
}else{
|
||||
pNew->next = pH->first;
|
||||
if( pH->first ){ pH->first->prev = pNew; }
|
||||
pNew->prev = 0;
|
||||
pH->first = pNew;
|
||||
}
|
||||
pEntry->count++;
|
||||
pEntry->chain = pNew;
|
||||
}
|
||||
|
||||
|
||||
/* Resize the hash table so that it cantains "new_size" buckets.
|
||||
** "new_size" must be a power of 2. The hash table might fail
|
||||
** to resize if sqliteMalloc() fails.
|
||||
**
|
||||
** Return non-zero if a memory allocation error occurs.
|
||||
*/
|
||||
static int fts3Rehash(Fts3Hash *pH, int new_size){
|
||||
struct _fts3ht *new_ht; /* The new hash table */
|
||||
Fts3HashElem *elem, *next_elem; /* For looping over existing elements */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( (new_size & (new_size-1))==0 );
|
||||
new_ht = (struct _fts3ht *)fts3HashMalloc( new_size*sizeof(struct _fts3ht) );
|
||||
if( new_ht==0 ) return 1;
|
||||
fts3HashFree(pH->ht);
|
||||
pH->ht = new_ht;
|
||||
pH->htsize = new_size;
|
||||
xHash = ftsHashFunction(pH->keyClass);
|
||||
for(elem=pH->first, pH->first=0; elem; elem = next_elem){
|
||||
int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
|
||||
next_elem = elem->next;
|
||||
fts3HashInsertElement(pH, &new_ht[h], elem);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* This function (for internal use only) locates an element in an
|
||||
** hash table that matches the given key. The hash for this key has
|
||||
** already been computed and is passed as the 4th parameter.
|
||||
*/
|
||||
static Fts3HashElem *fts3FindElementByHash(
|
||||
const Fts3Hash *pH, /* The pH to be searched */
|
||||
const void *pKey, /* The key we are searching for */
|
||||
int nKey,
|
||||
int h /* The hash for this key. */
|
||||
){
|
||||
Fts3HashElem *elem; /* Used to loop thru the element list */
|
||||
int count; /* Number of elements left to test */
|
||||
int (*xCompare)(const void*,int,const void*,int); /* comparison function */
|
||||
|
||||
if( pH->ht ){
|
||||
struct _fts3ht *pEntry = &pH->ht[h];
|
||||
elem = pEntry->chain;
|
||||
count = pEntry->count;
|
||||
xCompare = ftsCompareFunction(pH->keyClass);
|
||||
while( count-- && elem ){
|
||||
if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
|
||||
return elem;
|
||||
}
|
||||
elem = elem->next;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Remove a single entry from the hash table given a pointer to that
|
||||
** element and a hash on the element's key.
|
||||
*/
|
||||
static void fts3RemoveElementByHash(
|
||||
Fts3Hash *pH, /* The pH containing "elem" */
|
||||
Fts3HashElem* elem, /* The element to be removed from the pH */
|
||||
int h /* Hash value for the element */
|
||||
){
|
||||
struct _fts3ht *pEntry;
|
||||
if( elem->prev ){
|
||||
elem->prev->next = elem->next;
|
||||
}else{
|
||||
pH->first = elem->next;
|
||||
}
|
||||
if( elem->next ){
|
||||
elem->next->prev = elem->prev;
|
||||
}
|
||||
pEntry = &pH->ht[h];
|
||||
if( pEntry->chain==elem ){
|
||||
pEntry->chain = elem->next;
|
||||
}
|
||||
pEntry->count--;
|
||||
if( pEntry->count<=0 ){
|
||||
pEntry->chain = 0;
|
||||
}
|
||||
if( pH->copyKey && elem->pKey ){
|
||||
fts3HashFree(elem->pKey);
|
||||
}
|
||||
fts3HashFree( elem );
|
||||
pH->count--;
|
||||
if( pH->count<=0 ){
|
||||
assert( pH->first==0 );
|
||||
assert( pH->count==0 );
|
||||
fts3HashClear(pH);
|
||||
}
|
||||
}
|
||||
|
||||
Fts3HashElem *sqlite3Fts3HashFindElem(
|
||||
const Fts3Hash *pH,
|
||||
const void *pKey,
|
||||
int nKey
|
||||
){
|
||||
int h; /* A hash on key */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
if( pH==0 || pH->ht==0 ) return 0;
|
||||
xHash = ftsHashFunction(pH->keyClass);
|
||||
assert( xHash!=0 );
|
||||
h = (*xHash)(pKey,nKey);
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
return fts3FindElementByHash(pH,pKey,nKey, h & (pH->htsize-1));
|
||||
}
|
||||
|
||||
/*
|
||||
** Attempt to locate an element of the hash table pH with a key
|
||||
** that matches pKey,nKey. Return the data for this element if it is
|
||||
** found, or NULL if there is no match.
|
||||
*/
|
||||
void *sqlite3Fts3HashFind(const Fts3Hash *pH, const void *pKey, int nKey){
|
||||
Fts3HashElem *pElem; /* The element that matches key (if any) */
|
||||
|
||||
pElem = sqlite3Fts3HashFindElem(pH, pKey, nKey);
|
||||
return pElem ? pElem->data : 0;
|
||||
}
|
||||
|
||||
/* Insert an element into the hash table pH. The key is pKey,nKey
|
||||
** and the data is "data".
|
||||
**
|
||||
** If no element exists with a matching key, then a new
|
||||
** element is created. A copy of the key is made if the copyKey
|
||||
** flag is set. NULL is returned.
|
||||
**
|
||||
** If another element already exists with the same key, then the
|
||||
** new data replaces the old data and the old data is returned.
|
||||
** The key is not copied in this instance. If a malloc fails, then
|
||||
** the new data is returned and the hash table is unchanged.
|
||||
**
|
||||
** If the "data" parameter to this function is NULL, then the
|
||||
** element corresponding to "key" is removed from the hash table.
|
||||
*/
|
||||
void *sqlite3Fts3HashInsert(
|
||||
Fts3Hash *pH, /* The hash table to insert into */
|
||||
const void *pKey, /* The key */
|
||||
int nKey, /* Number of bytes in the key */
|
||||
void *data /* The data */
|
||||
){
|
||||
int hraw; /* Raw hash value of the key */
|
||||
int h; /* the hash of the key modulo hash table size */
|
||||
Fts3HashElem *elem; /* Used to loop thru the element list */
|
||||
Fts3HashElem *new_elem; /* New element added to the pH */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( pH!=0 );
|
||||
xHash = ftsHashFunction(pH->keyClass);
|
||||
assert( xHash!=0 );
|
||||
hraw = (*xHash)(pKey, nKey);
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
h = hraw & (pH->htsize-1);
|
||||
elem = fts3FindElementByHash(pH,pKey,nKey,h);
|
||||
if( elem ){
|
||||
void *old_data = elem->data;
|
||||
if( data==0 ){
|
||||
fts3RemoveElementByHash(pH,elem,h);
|
||||
}else{
|
||||
elem->data = data;
|
||||
}
|
||||
return old_data;
|
||||
}
|
||||
if( data==0 ) return 0;
|
||||
if( (pH->htsize==0 && fts3Rehash(pH,8))
|
||||
|| (pH->count>=pH->htsize && fts3Rehash(pH, pH->htsize*2))
|
||||
){
|
||||
pH->count = 0;
|
||||
return data;
|
||||
}
|
||||
assert( pH->htsize>0 );
|
||||
new_elem = (Fts3HashElem*)fts3HashMalloc( sizeof(Fts3HashElem) );
|
||||
if( new_elem==0 ) return data;
|
||||
if( pH->copyKey && pKey!=0 ){
|
||||
new_elem->pKey = fts3HashMalloc( nKey );
|
||||
if( new_elem->pKey==0 ){
|
||||
fts3HashFree(new_elem);
|
||||
return data;
|
||||
}
|
||||
memcpy((void*)new_elem->pKey, pKey, nKey);
|
||||
}else{
|
||||
new_elem->pKey = (void*)pKey;
|
||||
}
|
||||
new_elem->nKey = nKey;
|
||||
pH->count++;
|
||||
assert( pH->htsize>0 );
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
h = hraw & (pH->htsize-1);
|
||||
fts3HashInsertElement(pH, &pH->ht[h], new_elem);
|
||||
new_elem->data = data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
|
||||
112
ext/fts3/fts3_hash.h
Normal file
112
ext/fts3/fts3_hash.h
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
/*
|
||||
** 2001 September 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This is the header file for the generic hash-table implementation
|
||||
** used in SQLite. We've modified it slightly to serve as a standalone
|
||||
** hash table implementation for the full-text indexing module.
|
||||
**
|
||||
*/
|
||||
#ifndef _FTS3_HASH_H_
|
||||
#define _FTS3_HASH_H_
|
||||
|
||||
/* Forward declarations of structures. */
|
||||
typedef struct Fts3Hash Fts3Hash;
|
||||
typedef struct Fts3HashElem Fts3HashElem;
|
||||
|
||||
/* A complete hash table is an instance of the following structure.
|
||||
** The internals of this structure are intended to be opaque -- client
|
||||
** code should not attempt to access or modify the fields of this structure
|
||||
** directly. Change this structure only by using the routines below.
|
||||
** However, many of the "procedures" and "functions" for modifying and
|
||||
** accessing this structure are really macros, so we can't really make
|
||||
** this structure opaque.
|
||||
*/
|
||||
struct Fts3Hash {
|
||||
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
|
||||
char copyKey; /* True if copy of key made on insert */
|
||||
int count; /* Number of entries in this table */
|
||||
Fts3HashElem *first; /* The first element of the array */
|
||||
int htsize; /* Number of buckets in the hash table */
|
||||
struct _fts3ht { /* the hash table */
|
||||
int count; /* Number of entries with this hash */
|
||||
Fts3HashElem *chain; /* Pointer to first entry with this hash */
|
||||
} *ht;
|
||||
};
|
||||
|
||||
/* Each element in the hash table is an instance of the following
|
||||
** structure. All elements are stored on a single doubly-linked list.
|
||||
**
|
||||
** Again, this structure is intended to be opaque, but it can't really
|
||||
** be opaque because it is used by macros.
|
||||
*/
|
||||
struct Fts3HashElem {
|
||||
Fts3HashElem *next, *prev; /* Next and previous elements in the table */
|
||||
void *data; /* Data associated with this element */
|
||||
void *pKey; int nKey; /* Key associated with this element */
|
||||
};
|
||||
|
||||
/*
|
||||
** There are 2 different modes of operation for a hash table:
|
||||
**
|
||||
** FTS3_HASH_STRING pKey points to a string that is nKey bytes long
|
||||
** (including the null-terminator, if any). Case
|
||||
** is respected in comparisons.
|
||||
**
|
||||
** FTS3_HASH_BINARY pKey points to binary data nKey bytes long.
|
||||
** memcmp() is used to compare keys.
|
||||
**
|
||||
** A copy of the key is made if the copyKey parameter to fts3HashInit is 1.
|
||||
*/
|
||||
#define FTS3_HASH_STRING 1
|
||||
#define FTS3_HASH_BINARY 2
|
||||
|
||||
/*
|
||||
** Access routines. To delete, insert a NULL pointer.
|
||||
*/
|
||||
void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copyKey);
|
||||
void *sqlite3Fts3HashInsert(Fts3Hash*, const void *pKey, int nKey, void *pData);
|
||||
void *sqlite3Fts3HashFind(const Fts3Hash*, const void *pKey, int nKey);
|
||||
void sqlite3Fts3HashClear(Fts3Hash*);
|
||||
Fts3HashElem *sqlite3Fts3HashFindElem(const Fts3Hash *, const void *, int);
|
||||
|
||||
/*
|
||||
** Shorthand for the functions above
|
||||
*/
|
||||
#define fts3HashInit sqlite3Fts3HashInit
|
||||
#define fts3HashInsert sqlite3Fts3HashInsert
|
||||
#define fts3HashFind sqlite3Fts3HashFind
|
||||
#define fts3HashClear sqlite3Fts3HashClear
|
||||
#define fts3HashFindElem sqlite3Fts3HashFindElem
|
||||
|
||||
/*
|
||||
** Macros for looping over all elements of a hash table. The idiom is
|
||||
** like this:
|
||||
**
|
||||
** Fts3Hash h;
|
||||
** Fts3HashElem *p;
|
||||
** ...
|
||||
** for(p=fts3HashFirst(&h); p; p=fts3HashNext(p)){
|
||||
** SomeStructure *pData = fts3HashData(p);
|
||||
** // do something with pData
|
||||
** }
|
||||
*/
|
||||
#define fts3HashFirst(H) ((H)->first)
|
||||
#define fts3HashNext(E) ((E)->next)
|
||||
#define fts3HashData(E) ((E)->data)
|
||||
#define fts3HashKey(E) ((E)->pKey)
|
||||
#define fts3HashKeysize(E) ((E)->nKey)
|
||||
|
||||
/*
|
||||
** Number of entries in a hash table
|
||||
*/
|
||||
#define fts3HashCount(H) ((H)->count)
|
||||
|
||||
#endif /* _FTS3_HASH_H_ */
|
||||
262
ext/fts3/fts3_icu.c
Normal file
262
ext/fts3/fts3_icu.c
Normal file
|
|
@ -0,0 +1,262 @@
|
|||
/*
|
||||
** 2007 June 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This file implements a tokenizer for fts3 based on the ICU library.
|
||||
*/
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
#ifdef SQLITE_ENABLE_ICU
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include "fts3_tokenizer.h"
|
||||
|
||||
#include <unicode/ubrk.h>
|
||||
#include <unicode/ucol.h>
|
||||
#include <unicode/ustring.h>
|
||||
#include <unicode/utf16.h>
|
||||
|
||||
typedef struct IcuTokenizer IcuTokenizer;
|
||||
typedef struct IcuCursor IcuCursor;
|
||||
|
||||
struct IcuTokenizer {
|
||||
sqlite3_tokenizer base;
|
||||
char *zLocale;
|
||||
};
|
||||
|
||||
struct IcuCursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
|
||||
UBreakIterator *pIter; /* ICU break-iterator object */
|
||||
int nChar; /* Number of UChar elements in pInput */
|
||||
UChar *aChar; /* Copy of input using utf-16 encoding */
|
||||
int *aOffset; /* Offsets of each character in utf-8 input */
|
||||
|
||||
int nBuffer;
|
||||
char *zBuffer;
|
||||
|
||||
int iToken;
|
||||
};
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
*/
|
||||
static int icuCreate(
|
||||
int argc, /* Number of entries in argv[] */
|
||||
const char * const *argv, /* Tokenizer creation arguments */
|
||||
sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
|
||||
){
|
||||
IcuTokenizer *p;
|
||||
int n = 0;
|
||||
|
||||
if( argc>0 ){
|
||||
n = strlen(argv[0])+1;
|
||||
}
|
||||
p = (IcuTokenizer *)sqlite3_malloc64(sizeof(IcuTokenizer)+n);
|
||||
if( !p ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
memset(p, 0, sizeof(IcuTokenizer));
|
||||
|
||||
if( n ){
|
||||
p->zLocale = (char *)&p[1];
|
||||
memcpy(p->zLocale, argv[0], n);
|
||||
}
|
||||
|
||||
*ppTokenizer = (sqlite3_tokenizer *)p;
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Destroy a tokenizer
|
||||
*/
|
||||
static int icuDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
|
||||
sqlite3_free(p);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Prepare to begin tokenizing a particular string. The input
|
||||
** string to be tokenized is pInput[0..nBytes-1]. A cursor
|
||||
** used to incrementally tokenize this string is returned in
|
||||
** *ppCursor.
|
||||
*/
|
||||
static int icuOpen(
|
||||
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
|
||||
const char *zInput, /* Input string */
|
||||
int nInput, /* Length of zInput in bytes */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
|
||||
){
|
||||
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
|
||||
IcuCursor *pCsr;
|
||||
|
||||
const int32_t opt = U_FOLD_CASE_DEFAULT;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int nChar;
|
||||
|
||||
UChar32 c;
|
||||
int iInput = 0;
|
||||
int iOut = 0;
|
||||
|
||||
*ppCursor = 0;
|
||||
|
||||
if( zInput==0 ){
|
||||
nInput = 0;
|
||||
zInput = "";
|
||||
}else if( nInput<0 ){
|
||||
nInput = strlen(zInput);
|
||||
}
|
||||
nChar = nInput+1;
|
||||
pCsr = (IcuCursor *)sqlite3_malloc64(
|
||||
sizeof(IcuCursor) + /* IcuCursor */
|
||||
((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */
|
||||
(nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */
|
||||
);
|
||||
if( !pCsr ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
memset(pCsr, 0, sizeof(IcuCursor));
|
||||
pCsr->aChar = (UChar *)&pCsr[1];
|
||||
pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3];
|
||||
|
||||
pCsr->aOffset[iOut] = iInput;
|
||||
U8_NEXT(zInput, iInput, nInput, c);
|
||||
while( c>0 ){
|
||||
int isError = 0;
|
||||
c = u_foldCase(c, opt);
|
||||
U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
|
||||
if( isError ){
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
pCsr->aOffset[iOut] = iInput;
|
||||
|
||||
if( iInput<nInput ){
|
||||
U8_NEXT(zInput, iInput, nInput, c);
|
||||
}else{
|
||||
c = 0;
|
||||
}
|
||||
}
|
||||
|
||||
pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
|
||||
if( !U_SUCCESS(status) ){
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
pCsr->nChar = iOut;
|
||||
|
||||
ubrk_first(pCsr->pIter);
|
||||
*ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close a tokenization cursor previously opened by a call to icuOpen().
|
||||
*/
|
||||
static int icuClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
IcuCursor *pCsr = (IcuCursor *)pCursor;
|
||||
ubrk_close(pCsr->pIter);
|
||||
sqlite3_free(pCsr->zBuffer);
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Extract the next token from a tokenization cursor.
|
||||
*/
|
||||
static int icuNext(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
|
||||
const char **ppToken, /* OUT: *ppToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
IcuCursor *pCsr = (IcuCursor *)pCursor;
|
||||
|
||||
int iStart = 0;
|
||||
int iEnd = 0;
|
||||
int nByte = 0;
|
||||
|
||||
while( iStart==iEnd ){
|
||||
UChar32 c;
|
||||
|
||||
iStart = ubrk_current(pCsr->pIter);
|
||||
iEnd = ubrk_next(pCsr->pIter);
|
||||
if( iEnd==UBRK_DONE ){
|
||||
return SQLITE_DONE;
|
||||
}
|
||||
|
||||
while( iStart<iEnd ){
|
||||
int iWhite = iStart;
|
||||
U16_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
|
||||
if( u_isspace(c) ){
|
||||
iStart = iWhite;
|
||||
}else{
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert(iStart<=iEnd);
|
||||
}
|
||||
|
||||
do {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
if( nByte ){
|
||||
char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
|
||||
if( !zNew ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
pCsr->zBuffer = zNew;
|
||||
pCsr->nBuffer = nByte;
|
||||
}
|
||||
|
||||
u_strToUTF8(
|
||||
pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */
|
||||
&pCsr->aChar[iStart], iEnd-iStart, /* Input vars */
|
||||
&status /* Output success/failure */
|
||||
);
|
||||
} while( nByte>pCsr->nBuffer );
|
||||
|
||||
*ppToken = pCsr->zBuffer;
|
||||
*pnBytes = nByte;
|
||||
*piStartOffset = pCsr->aOffset[iStart];
|
||||
*piEndOffset = pCsr->aOffset[iEnd];
|
||||
*piPosition = pCsr->iToken++;
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** The set of routines that implement the simple tokenizer
|
||||
*/
|
||||
static const sqlite3_tokenizer_module icuTokenizerModule = {
|
||||
0, /* iVersion */
|
||||
icuCreate, /* xCreate */
|
||||
icuDestroy, /* xCreate */
|
||||
icuOpen, /* xOpen */
|
||||
icuClose, /* xClose */
|
||||
icuNext, /* xNext */
|
||||
0, /* xLanguageid */
|
||||
};
|
||||
|
||||
/*
|
||||
** Set *ppModule to point at the implementation of the ICU tokenizer.
|
||||
*/
|
||||
void sqlite3Fts3IcuTokenizerModule(
|
||||
sqlite3_tokenizer_module const**ppModule
|
||||
){
|
||||
*ppModule = &icuTokenizerModule;
|
||||
}
|
||||
|
||||
#endif /* defined(SQLITE_ENABLE_ICU) */
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
|
||||
662
ext/fts3/fts3_porter.c
Normal file
662
ext/fts3/fts3_porter.c
Normal file
|
|
@ -0,0 +1,662 @@
|
|||
/*
|
||||
** 2006 September 30
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** Implementation of the full-text-search tokenizer that implements
|
||||
** a Porter stemmer.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS3 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS3 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
|
||||
*/
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "fts3_tokenizer.h"
|
||||
|
||||
/*
|
||||
** Class derived from sqlite3_tokenizer
|
||||
*/
|
||||
typedef struct porter_tokenizer {
|
||||
sqlite3_tokenizer base; /* Base class */
|
||||
} porter_tokenizer;
|
||||
|
||||
/*
|
||||
** Class derived from sqlite3_tokenizer_cursor
|
||||
*/
|
||||
typedef struct porter_tokenizer_cursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
const char *zInput; /* input we are tokenizing */
|
||||
int nInput; /* size of the input */
|
||||
int iOffset; /* current position in zInput */
|
||||
int iToken; /* index of next token to be returned */
|
||||
char *zToken; /* storage for current token */
|
||||
int nAllocated; /* space allocated to zToken buffer */
|
||||
} porter_tokenizer_cursor;
|
||||
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
*/
|
||||
static int porterCreate(
|
||||
int argc, const char * const *argv,
|
||||
sqlite3_tokenizer **ppTokenizer
|
||||
){
|
||||
porter_tokenizer *t;
|
||||
|
||||
UNUSED_PARAMETER(argc);
|
||||
UNUSED_PARAMETER(argv);
|
||||
|
||||
t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t));
|
||||
if( t==NULL ) return SQLITE_NOMEM;
|
||||
memset(t, 0, sizeof(*t));
|
||||
*ppTokenizer = &t->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Destroy a tokenizer
|
||||
*/
|
||||
static int porterDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
sqlite3_free(pTokenizer);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Prepare to begin tokenizing a particular string. The input
|
||||
** string to be tokenized is zInput[0..nInput-1]. A cursor
|
||||
** used to incrementally tokenize this string is returned in
|
||||
** *ppCursor.
|
||||
*/
|
||||
static int porterOpen(
|
||||
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
|
||||
const char *zInput, int nInput, /* String to be tokenized */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
|
||||
){
|
||||
porter_tokenizer_cursor *c;
|
||||
|
||||
UNUSED_PARAMETER(pTokenizer);
|
||||
|
||||
c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
|
||||
if( c==NULL ) return SQLITE_NOMEM;
|
||||
|
||||
c->zInput = zInput;
|
||||
if( zInput==0 ){
|
||||
c->nInput = 0;
|
||||
}else if( nInput<0 ){
|
||||
c->nInput = (int)strlen(zInput);
|
||||
}else{
|
||||
c->nInput = nInput;
|
||||
}
|
||||
c->iOffset = 0; /* start tokenizing at the beginning */
|
||||
c->iToken = 0;
|
||||
c->zToken = NULL; /* no space allocated, yet. */
|
||||
c->nAllocated = 0;
|
||||
|
||||
*ppCursor = &c->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close a tokenization cursor previously opened by a call to
|
||||
** porterOpen() above.
|
||||
*/
|
||||
static int porterClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
|
||||
sqlite3_free(c->zToken);
|
||||
sqlite3_free(c);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
/*
|
||||
** Vowel or consonant
|
||||
*/
|
||||
static const char cType[] = {
|
||||
0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
|
||||
1, 1, 1, 2, 1
|
||||
};
|
||||
|
||||
/*
|
||||
** isConsonant() and isVowel() determine if their first character in
|
||||
** the string they point to is a consonant or a vowel, according
|
||||
** to Porter ruls.
|
||||
**
|
||||
** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.
|
||||
** 'Y' is a consonant unless it follows another consonant,
|
||||
** in which case it is a vowel.
|
||||
**
|
||||
** In these routine, the letters are in reverse order. So the 'y' rule
|
||||
** is that 'y' is a consonant unless it is followed by another
|
||||
** consonent.
|
||||
*/
|
||||
static int isVowel(const char*);
|
||||
static int isConsonant(const char *z){
|
||||
int j;
|
||||
char x = *z;
|
||||
if( x==0 ) return 0;
|
||||
assert( x>='a' && x<='z' );
|
||||
j = cType[x-'a'];
|
||||
if( j<2 ) return j;
|
||||
return z[1]==0 || isVowel(z + 1);
|
||||
}
|
||||
static int isVowel(const char *z){
|
||||
int j;
|
||||
char x = *z;
|
||||
if( x==0 ) return 0;
|
||||
assert( x>='a' && x<='z' );
|
||||
j = cType[x-'a'];
|
||||
if( j<2 ) return 1-j;
|
||||
return isConsonant(z + 1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Let any sequence of one or more vowels be represented by V and let
|
||||
** C be sequence of one or more consonants. Then every word can be
|
||||
** represented as:
|
||||
**
|
||||
** [C] (VC){m} [V]
|
||||
**
|
||||
** In prose: A word is an optional consonant followed by zero or
|
||||
** vowel-consonant pairs followed by an optional vowel. "m" is the
|
||||
** number of vowel consonant pairs. This routine computes the value
|
||||
** of m for the first i bytes of a word.
|
||||
**
|
||||
** Return true if the m-value for z is 1 or more. In other words,
|
||||
** return true if z contains at least one vowel that is followed
|
||||
** by a consonant.
|
||||
**
|
||||
** In this routine z[] is in reverse order. So we are really looking
|
||||
** for an instance of a consonant followed by a vowel.
|
||||
*/
|
||||
static int m_gt_0(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/* Like mgt0 above except we are looking for a value of m which is
|
||||
** exactly 1
|
||||
*/
|
||||
static int m_eq_1(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 1;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z==0;
|
||||
}
|
||||
|
||||
/* Like mgt0 above except we are looking for a value of m>1 instead
|
||||
** or m>0
|
||||
*/
|
||||
static int m_gt_1(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if there is a vowel anywhere within z[0..n-1]
|
||||
*/
|
||||
static int hasVowel(const char *z){
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if the word ends in a double consonant.
|
||||
**
|
||||
** The text is reversed here. So we are really looking at
|
||||
** the first two characters of z[].
|
||||
*/
|
||||
static int doubleConsonant(const char *z){
|
||||
return isConsonant(z) && z[0]==z[1];
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if the word ends with three letters which
|
||||
** are consonant-vowel-consonent and where the final consonant
|
||||
** is not 'w', 'x', or 'y'.
|
||||
**
|
||||
** The word is reversed here. So we are really checking the
|
||||
** first three letters and the first one cannot be in [wxy].
|
||||
*/
|
||||
static int star_oh(const char *z){
|
||||
return
|
||||
isConsonant(z) &&
|
||||
z[0]!='w' && z[0]!='x' && z[0]!='y' &&
|
||||
isVowel(z+1) &&
|
||||
isConsonant(z+2);
|
||||
}
|
||||
|
||||
/*
|
||||
** If the word ends with zFrom and xCond() is true for the stem
|
||||
** of the word that preceeds the zFrom ending, then change the
|
||||
** ending to zTo.
|
||||
**
|
||||
** The input word *pz and zFrom are both in reverse order. zTo
|
||||
** is in normal order.
|
||||
**
|
||||
** Return TRUE if zFrom matches. Return FALSE if zFrom does not
|
||||
** match. Not that TRUE is returned even if xCond() fails and
|
||||
** no substitution occurs.
|
||||
*/
|
||||
static int stem(
|
||||
char **pz, /* The word being stemmed (Reversed) */
|
||||
const char *zFrom, /* If the ending matches this... (Reversed) */
|
||||
const char *zTo, /* ... change the ending to this (not reversed) */
|
||||
int (*xCond)(const char*) /* Condition that must be true */
|
||||
){
|
||||
char *z = *pz;
|
||||
while( *zFrom && *zFrom==*z ){ z++; zFrom++; }
|
||||
if( *zFrom!=0 ) return 0;
|
||||
if( xCond && !xCond(z) ) return 1;
|
||||
while( *zTo ){
|
||||
*(--z) = *(zTo++);
|
||||
}
|
||||
*pz = z;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
** This is the fallback stemmer used when the porter stemmer is
|
||||
** inappropriate. The input word is copied into the output with
|
||||
** US-ASCII case folding. If the input word is too long (more
|
||||
** than 20 bytes if it contains no digits or more than 6 bytes if
|
||||
** it contains digits) then word is truncated to 20 or 6 bytes
|
||||
** by taking 10 or 3 bytes from the beginning and end.
|
||||
*/
|
||||
static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
|
||||
int i, mx, j;
|
||||
int hasDigit = 0;
|
||||
for(i=0; i<nIn; i++){
|
||||
char c = zIn[i];
|
||||
if( c>='A' && c<='Z' ){
|
||||
zOut[i] = c - 'A' + 'a';
|
||||
}else{
|
||||
if( c>='0' && c<='9' ) hasDigit = 1;
|
||||
zOut[i] = c;
|
||||
}
|
||||
}
|
||||
mx = hasDigit ? 3 : 10;
|
||||
if( nIn>mx*2 ){
|
||||
for(j=mx, i=nIn-mx; i<nIn; i++, j++){
|
||||
zOut[j] = zOut[i];
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
zOut[i] = 0;
|
||||
*pnOut = i;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Stem the input word zIn[0..nIn-1]. Store the output in zOut.
|
||||
** zOut is at least big enough to hold nIn bytes. Write the actual
|
||||
** size of the output word (exclusive of the '\0' terminator) into *pnOut.
|
||||
**
|
||||
** Any upper-case characters in the US-ASCII character set ([A-Z])
|
||||
** are converted to lower case. Upper-case UTF characters are
|
||||
** unchanged.
|
||||
**
|
||||
** Words that are longer than about 20 bytes are stemmed by retaining
|
||||
** a few bytes from the beginning and the end of the word. If the
|
||||
** word contains digits, 3 bytes are taken from the beginning and
|
||||
** 3 bytes from the end. For long words without digits, 10 bytes
|
||||
** are taken from each end. US-ASCII case folding still applies.
|
||||
**
|
||||
** If the input word contains not digits but does characters not
|
||||
** in [a-zA-Z] then no stemming is attempted and this routine just
|
||||
** copies the input into the input into the output with US-ASCII
|
||||
** case folding.
|
||||
**
|
||||
** Stemming never increases the length of the word. So there is
|
||||
** no chance of overflowing the zOut buffer.
|
||||
*/
|
||||
static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
|
||||
int i, j;
|
||||
char zReverse[28];
|
||||
char *z, *z2;
|
||||
if( nIn<3 || nIn>=(int)sizeof(zReverse)-7 ){
|
||||
/* The word is too big or too small for the porter stemmer.
|
||||
** Fallback to the copy stemmer */
|
||||
copy_stemmer(zIn, nIn, zOut, pnOut);
|
||||
return;
|
||||
}
|
||||
for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){
|
||||
char c = zIn[i];
|
||||
if( c>='A' && c<='Z' ){
|
||||
zReverse[j] = c + 'a' - 'A';
|
||||
}else if( c>='a' && c<='z' ){
|
||||
zReverse[j] = c;
|
||||
}else{
|
||||
/* The use of a character not in [a-zA-Z] means that we fallback
|
||||
** to the copy stemmer */
|
||||
copy_stemmer(zIn, nIn, zOut, pnOut);
|
||||
return;
|
||||
}
|
||||
}
|
||||
memset(&zReverse[sizeof(zReverse)-5], 0, 5);
|
||||
z = &zReverse[j+1];
|
||||
|
||||
|
||||
/* Step 1a */
|
||||
if( z[0]=='s' ){
|
||||
if(
|
||||
!stem(&z, "sess", "ss", 0) &&
|
||||
!stem(&z, "sei", "i", 0) &&
|
||||
!stem(&z, "ss", "ss", 0)
|
||||
){
|
||||
z++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 1b */
|
||||
z2 = z;
|
||||
if( stem(&z, "dee", "ee", m_gt_0) ){
|
||||
/* Do nothing. The work was all in the test */
|
||||
}else if(
|
||||
(stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel))
|
||||
&& z!=z2
|
||||
){
|
||||
if( stem(&z, "ta", "ate", 0) ||
|
||||
stem(&z, "lb", "ble", 0) ||
|
||||
stem(&z, "zi", "ize", 0) ){
|
||||
/* Do nothing. The work was all in the test */
|
||||
}else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){
|
||||
z++;
|
||||
}else if( m_eq_1(z) && star_oh(z) ){
|
||||
*(--z) = 'e';
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 1c */
|
||||
if( z[0]=='y' && hasVowel(z+1) ){
|
||||
z[0] = 'i';
|
||||
}
|
||||
|
||||
/* Step 2 */
|
||||
switch( z[1] ){
|
||||
case 'a':
|
||||
if( !stem(&z, "lanoita", "ate", m_gt_0) ){
|
||||
stem(&z, "lanoit", "tion", m_gt_0);
|
||||
}
|
||||
break;
|
||||
case 'c':
|
||||
if( !stem(&z, "icne", "ence", m_gt_0) ){
|
||||
stem(&z, "icna", "ance", m_gt_0);
|
||||
}
|
||||
break;
|
||||
case 'e':
|
||||
stem(&z, "rezi", "ize", m_gt_0);
|
||||
break;
|
||||
case 'g':
|
||||
stem(&z, "igol", "log", m_gt_0);
|
||||
break;
|
||||
case 'l':
|
||||
if( !stem(&z, "ilb", "ble", m_gt_0)
|
||||
&& !stem(&z, "illa", "al", m_gt_0)
|
||||
&& !stem(&z, "iltne", "ent", m_gt_0)
|
||||
&& !stem(&z, "ile", "e", m_gt_0)
|
||||
){
|
||||
stem(&z, "ilsuo", "ous", m_gt_0);
|
||||
}
|
||||
break;
|
||||
case 'o':
|
||||
if( !stem(&z, "noitazi", "ize", m_gt_0)
|
||||
&& !stem(&z, "noita", "ate", m_gt_0)
|
||||
){
|
||||
stem(&z, "rota", "ate", m_gt_0);
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
if( !stem(&z, "msila", "al", m_gt_0)
|
||||
&& !stem(&z, "ssenevi", "ive", m_gt_0)
|
||||
&& !stem(&z, "ssenluf", "ful", m_gt_0)
|
||||
){
|
||||
stem(&z, "ssensuo", "ous", m_gt_0);
|
||||
}
|
||||
break;
|
||||
case 't':
|
||||
if( !stem(&z, "itila", "al", m_gt_0)
|
||||
&& !stem(&z, "itivi", "ive", m_gt_0)
|
||||
){
|
||||
stem(&z, "itilib", "ble", m_gt_0);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 3 */
|
||||
switch( z[0] ){
|
||||
case 'e':
|
||||
if( !stem(&z, "etaci", "ic", m_gt_0)
|
||||
&& !stem(&z, "evita", "", m_gt_0)
|
||||
){
|
||||
stem(&z, "ezila", "al", m_gt_0);
|
||||
}
|
||||
break;
|
||||
case 'i':
|
||||
stem(&z, "itici", "ic", m_gt_0);
|
||||
break;
|
||||
case 'l':
|
||||
if( !stem(&z, "laci", "ic", m_gt_0) ){
|
||||
stem(&z, "luf", "", m_gt_0);
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
stem(&z, "ssen", "", m_gt_0);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 4 */
|
||||
switch( z[1] ){
|
||||
case 'a':
|
||||
if( z[0]=='l' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'c':
|
||||
if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){
|
||||
z += 4;
|
||||
}
|
||||
break;
|
||||
case 'e':
|
||||
if( z[0]=='r' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'i':
|
||||
if( z[0]=='c' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'l':
|
||||
if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){
|
||||
z += 4;
|
||||
}
|
||||
break;
|
||||
case 'n':
|
||||
if( z[0]=='t' ){
|
||||
if( z[2]=='a' ){
|
||||
if( m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
}else if( z[2]=='e' ){
|
||||
if( !stem(&z, "tneme", "", m_gt_1)
|
||||
&& !stem(&z, "tnem", "", m_gt_1)
|
||||
){
|
||||
stem(&z, "tne", "", m_gt_1);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 'o':
|
||||
if( z[0]=='u' ){
|
||||
if( m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
}else if( z[3]=='s' || z[3]=='t' ){
|
||||
stem(&z, "noi", "", m_gt_1);
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
case 't':
|
||||
if( !stem(&z, "eta", "", m_gt_1) ){
|
||||
stem(&z, "iti", "", m_gt_1);
|
||||
}
|
||||
break;
|
||||
case 'u':
|
||||
if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
case 'v':
|
||||
case 'z':
|
||||
if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 5a */
|
||||
if( z[0]=='e' ){
|
||||
if( m_gt_1(z+1) ){
|
||||
z++;
|
||||
}else if( m_eq_1(z+1) && !star_oh(z+1) ){
|
||||
z++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 5b */
|
||||
if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){
|
||||
z++;
|
||||
}
|
||||
|
||||
/* z[] is now the stemmed word in reverse order. Flip it back
|
||||
** around into forward order and return.
|
||||
*/
|
||||
*pnOut = i = (int)strlen(z);
|
||||
zOut[i] = 0;
|
||||
while( *z ){
|
||||
zOut[--i] = *(z++);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Characters that can be part of a token. We assume any character
|
||||
** whose value is greater than 0x80 (any UTF character) can be
|
||||
** part of a token. In other words, delimiters all must have
|
||||
** values of 0x7f or lower.
|
||||
*/
|
||||
static const char porterIdChar[] = {
|
||||
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
|
||||
};
|
||||
#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !porterIdChar[ch-0x30]))
|
||||
|
||||
/*
|
||||
** Extract the next token from a tokenization cursor. The cursor must
|
||||
** have been opened by a prior call to porterOpen().
|
||||
*/
|
||||
static int porterNext(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */
|
||||
const char **pzToken, /* OUT: *pzToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
|
||||
const char *z = c->zInput;
|
||||
|
||||
while( c->iOffset<c->nInput ){
|
||||
int iStartOffset, ch;
|
||||
|
||||
/* Scan past delimiter characters */
|
||||
while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
/* Count non-delimiter characters. */
|
||||
iStartOffset = c->iOffset;
|
||||
while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
if( c->iOffset>iStartOffset ){
|
||||
int n = c->iOffset-iStartOffset;
|
||||
if( n>c->nAllocated ){
|
||||
char *pNew;
|
||||
c->nAllocated = n+20;
|
||||
pNew = sqlite3_realloc(c->zToken, c->nAllocated);
|
||||
if( !pNew ) return SQLITE_NOMEM;
|
||||
c->zToken = pNew;
|
||||
}
|
||||
porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
|
||||
*pzToken = c->zToken;
|
||||
*piStartOffset = iStartOffset;
|
||||
*piEndOffset = c->iOffset;
|
||||
*piPosition = c->iToken++;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
}
|
||||
return SQLITE_DONE;
|
||||
}
|
||||
|
||||
/*
|
||||
** The set of routines that implement the porter-stemmer tokenizer
|
||||
*/
|
||||
static const sqlite3_tokenizer_module porterTokenizerModule = {
|
||||
0,
|
||||
porterCreate,
|
||||
porterDestroy,
|
||||
porterOpen,
|
||||
porterClose,
|
||||
porterNext,
|
||||
0
|
||||
};
|
||||
|
||||
/*
|
||||
** Allocate a new porter tokenizer. Return a pointer to the new
|
||||
** tokenizer in *ppModule
|
||||
*/
|
||||
void sqlite3Fts3PorterTokenizerModule(
|
||||
sqlite3_tokenizer_module const**ppModule
|
||||
){
|
||||
*ppModule = &porterTokenizerModule;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
|
||||
1751
ext/fts3/fts3_snippet.c
Normal file
1751
ext/fts3/fts3_snippet.c
Normal file
File diff suppressed because it is too large
Load diff
374
ext/fts3/fts3_term.c
Normal file
374
ext/fts3/fts3_term.c
Normal file
|
|
@ -0,0 +1,374 @@
|
|||
/*
|
||||
** 2011 Jan 27
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** This file is not part of the production FTS code. It is only used for
|
||||
** testing. It contains a virtual table implementation that provides direct
|
||||
** access to the full-text index of an FTS table.
|
||||
*/
|
||||
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
#ifdef SQLITE_TEST
|
||||
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
typedef struct Fts3termTable Fts3termTable;
|
||||
typedef struct Fts3termCursor Fts3termCursor;
|
||||
|
||||
struct Fts3termTable {
|
||||
sqlite3_vtab base; /* Base class used by SQLite core */
|
||||
int iIndex; /* Index for Fts3Table.aIndex[] */
|
||||
Fts3Table *pFts3Tab;
|
||||
};
|
||||
|
||||
struct Fts3termCursor {
|
||||
sqlite3_vtab_cursor base; /* Base class used by SQLite core */
|
||||
Fts3MultiSegReader csr; /* Must be right after "base" */
|
||||
Fts3SegFilter filter;
|
||||
|
||||
int isEof; /* True if cursor is at EOF */
|
||||
char *pNext;
|
||||
|
||||
sqlite3_int64 iRowid; /* Current 'rowid' value */
|
||||
sqlite3_int64 iDocid; /* Current 'docid' value */
|
||||
int iCol; /* Current 'col' value */
|
||||
int iPos; /* Current 'pos' value */
|
||||
};
|
||||
|
||||
/*
|
||||
** Schema of the terms table.
|
||||
*/
|
||||
#define FTS3_TERMS_SCHEMA "CREATE TABLE x(term, docid, col, pos)"
|
||||
|
||||
/*
|
||||
** This function does all the work for both the xConnect and xCreate methods.
|
||||
** These tables have no persistent representation of their own, so xConnect
|
||||
** and xCreate are identical operations.
|
||||
*/
|
||||
static int fts3termConnectMethod(
|
||||
sqlite3 *db, /* Database connection */
|
||||
void *pCtx, /* Non-zero for an fts4prefix table */
|
||||
int argc, /* Number of elements in argv array */
|
||||
const char * const *argv, /* xCreate/xConnect argument array */
|
||||
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
|
||||
char **pzErr /* OUT: sqlite3_malloc'd error message */
|
||||
){
|
||||
char const *zDb; /* Name of database (e.g. "main") */
|
||||
char const *zFts3; /* Name of fts3 table */
|
||||
int nDb; /* Result of strlen(zDb) */
|
||||
int nFts3; /* Result of strlen(zFts3) */
|
||||
sqlite3_int64 nByte; /* Bytes of space to allocate here */
|
||||
int rc; /* value returned by declare_vtab() */
|
||||
Fts3termTable *p; /* Virtual table object to return */
|
||||
int iIndex = 0;
|
||||
|
||||
UNUSED_PARAMETER(pCtx);
|
||||
if( argc==5 ){
|
||||
iIndex = atoi(argv[4]);
|
||||
argc--;
|
||||
}
|
||||
|
||||
/* The user should specify a single argument - the name of an fts3 table. */
|
||||
if( argc!=4 ){
|
||||
sqlite3Fts3ErrMsg(pzErr,
|
||||
"wrong number of arguments to fts4term constructor"
|
||||
);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
|
||||
zDb = argv[1];
|
||||
nDb = (int)strlen(zDb);
|
||||
zFts3 = argv[3];
|
||||
nFts3 = (int)strlen(zFts3);
|
||||
|
||||
rc = sqlite3_declare_vtab(db, FTS3_TERMS_SCHEMA);
|
||||
if( rc!=SQLITE_OK ) return rc;
|
||||
|
||||
nByte = sizeof(Fts3termTable) + sizeof(Fts3Table) + nDb + nFts3 + 2;
|
||||
p = (Fts3termTable *)sqlite3_malloc64(nByte);
|
||||
if( !p ) return SQLITE_NOMEM;
|
||||
memset(p, 0, (size_t)nByte);
|
||||
|
||||
p->pFts3Tab = (Fts3Table *)&p[1];
|
||||
p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1];
|
||||
p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1];
|
||||
p->pFts3Tab->db = db;
|
||||
p->pFts3Tab->nIndex = iIndex+1;
|
||||
p->iIndex = iIndex;
|
||||
|
||||
memcpy((char *)p->pFts3Tab->zDb, zDb, nDb);
|
||||
memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3);
|
||||
sqlite3Fts3Dequote((char *)p->pFts3Tab->zName);
|
||||
|
||||
*ppVtab = (sqlite3_vtab *)p;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** This function does the work for both the xDisconnect and xDestroy methods.
|
||||
** These tables have no persistent representation of their own, so xDisconnect
|
||||
** and xDestroy are identical operations.
|
||||
*/
|
||||
static int fts3termDisconnectMethod(sqlite3_vtab *pVtab){
|
||||
Fts3termTable *p = (Fts3termTable *)pVtab;
|
||||
Fts3Table *pFts3 = p->pFts3Tab;
|
||||
int i;
|
||||
|
||||
/* Free any prepared statements held */
|
||||
for(i=0; i<SizeofArray(pFts3->aStmt); i++){
|
||||
sqlite3_finalize(pFts3->aStmt[i]);
|
||||
}
|
||||
sqlite3_free(pFts3->zSegmentsTbl);
|
||||
sqlite3_free(p);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
#define FTS4AUX_EQ_CONSTRAINT 1
|
||||
#define FTS4AUX_GE_CONSTRAINT 2
|
||||
#define FTS4AUX_LE_CONSTRAINT 4
|
||||
|
||||
/*
|
||||
** xBestIndex - Analyze a WHERE and ORDER BY clause.
|
||||
*/
|
||||
static int fts3termBestIndexMethod(
|
||||
sqlite3_vtab *pVTab,
|
||||
sqlite3_index_info *pInfo
|
||||
){
|
||||
UNUSED_PARAMETER(pVTab);
|
||||
|
||||
/* This vtab naturally does "ORDER BY term, docid, col, pos". */
|
||||
if( pInfo->nOrderBy ){
|
||||
int i;
|
||||
for(i=0; i<pInfo->nOrderBy; i++){
|
||||
if( pInfo->aOrderBy[i].iColumn!=i || pInfo->aOrderBy[i].desc ) break;
|
||||
}
|
||||
if( i==pInfo->nOrderBy ){
|
||||
pInfo->orderByConsumed = 1;
|
||||
}
|
||||
}
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xOpen - Open a cursor.
|
||||
*/
|
||||
static int fts3termOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
|
||||
Fts3termCursor *pCsr; /* Pointer to cursor object to return */
|
||||
|
||||
UNUSED_PARAMETER(pVTab);
|
||||
|
||||
pCsr = (Fts3termCursor *)sqlite3_malloc(sizeof(Fts3termCursor));
|
||||
if( !pCsr ) return SQLITE_NOMEM;
|
||||
memset(pCsr, 0, sizeof(Fts3termCursor));
|
||||
|
||||
*ppCsr = (sqlite3_vtab_cursor *)pCsr;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xClose - Close a cursor.
|
||||
*/
|
||||
static int fts3termCloseMethod(sqlite3_vtab_cursor *pCursor){
|
||||
Fts3Table *pFts3 = ((Fts3termTable *)pCursor->pVtab)->pFts3Tab;
|
||||
Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
|
||||
|
||||
sqlite3Fts3SegmentsClose(pFts3);
|
||||
sqlite3Fts3SegReaderFinish(&pCsr->csr);
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xNext - Advance the cursor to the next row, if any.
|
||||
*/
|
||||
static int fts3termNextMethod(sqlite3_vtab_cursor *pCursor){
|
||||
Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
|
||||
Fts3Table *pFts3 = ((Fts3termTable *)pCursor->pVtab)->pFts3Tab;
|
||||
int rc;
|
||||
sqlite3_int64 v;
|
||||
|
||||
/* Increment our pretend rowid value. */
|
||||
pCsr->iRowid++;
|
||||
|
||||
/* Advance to the next term in the full-text index. */
|
||||
if( pCsr->csr.aDoclist==0
|
||||
|| pCsr->pNext>=&pCsr->csr.aDoclist[pCsr->csr.nDoclist-1]
|
||||
){
|
||||
rc = sqlite3Fts3SegReaderStep(pFts3, &pCsr->csr);
|
||||
if( rc!=SQLITE_ROW ){
|
||||
pCsr->isEof = 1;
|
||||
return rc;
|
||||
}
|
||||
|
||||
pCsr->iCol = 0;
|
||||
pCsr->iPos = 0;
|
||||
pCsr->iDocid = 0;
|
||||
pCsr->pNext = pCsr->csr.aDoclist;
|
||||
|
||||
/* Read docid */
|
||||
pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &pCsr->iDocid);
|
||||
}
|
||||
|
||||
pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
|
||||
if( v==0 ){
|
||||
pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
|
||||
pCsr->iDocid += v;
|
||||
pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
|
||||
pCsr->iCol = 0;
|
||||
pCsr->iPos = 0;
|
||||
}
|
||||
|
||||
if( v==1 ){
|
||||
pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
|
||||
pCsr->iCol += (int)v;
|
||||
pCsr->iPos = 0;
|
||||
pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
|
||||
}
|
||||
|
||||
pCsr->iPos += (int)(v - 2);
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xFilter - Initialize a cursor to point at the start of its data.
|
||||
*/
|
||||
static int fts3termFilterMethod(
|
||||
sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
|
||||
int idxNum, /* Strategy index */
|
||||
const char *idxStr, /* Unused */
|
||||
int nVal, /* Number of elements in apVal */
|
||||
sqlite3_value **apVal /* Arguments for the indexing scheme */
|
||||
){
|
||||
Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
|
||||
Fts3termTable *p = (Fts3termTable *)pCursor->pVtab;
|
||||
Fts3Table *pFts3 = p->pFts3Tab;
|
||||
int rc;
|
||||
|
||||
UNUSED_PARAMETER(nVal);
|
||||
UNUSED_PARAMETER(idxNum);
|
||||
UNUSED_PARAMETER(idxStr);
|
||||
UNUSED_PARAMETER(apVal);
|
||||
|
||||
assert( idxStr==0 && idxNum==0 );
|
||||
|
||||
/* In case this cursor is being reused, close and zero it. */
|
||||
testcase(pCsr->filter.zTerm);
|
||||
sqlite3Fts3SegReaderFinish(&pCsr->csr);
|
||||
memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr);
|
||||
|
||||
pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
|
||||
pCsr->filter.flags |= FTS3_SEGMENT_SCAN;
|
||||
|
||||
rc = sqlite3Fts3SegReaderCursor(pFts3, 0, p->iIndex, FTS3_SEGCURSOR_ALL,
|
||||
pCsr->filter.zTerm, pCsr->filter.nTerm, 0, 1, &pCsr->csr
|
||||
);
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter);
|
||||
}
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = fts3termNextMethod(pCursor);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** xEof - Return true if the cursor is at EOF, or false otherwise.
|
||||
*/
|
||||
static int fts3termEofMethod(sqlite3_vtab_cursor *pCursor){
|
||||
Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
|
||||
return pCsr->isEof;
|
||||
}
|
||||
|
||||
/*
|
||||
** xColumn - Return a column value.
|
||||
*/
|
||||
static int fts3termColumnMethod(
|
||||
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
|
||||
sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
|
||||
int iCol /* Index of column to read value from */
|
||||
){
|
||||
Fts3termCursor *p = (Fts3termCursor *)pCursor;
|
||||
|
||||
assert( iCol>=0 && iCol<=3 );
|
||||
switch( iCol ){
|
||||
case 0:
|
||||
sqlite3_result_text(pCtx, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT);
|
||||
break;
|
||||
case 1:
|
||||
sqlite3_result_int64(pCtx, p->iDocid);
|
||||
break;
|
||||
case 2:
|
||||
sqlite3_result_int64(pCtx, p->iCol);
|
||||
break;
|
||||
default:
|
||||
sqlite3_result_int64(pCtx, p->iPos);
|
||||
break;
|
||||
}
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xRowid - Return the current rowid for the cursor.
|
||||
*/
|
||||
static int fts3termRowidMethod(
|
||||
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
|
||||
sqlite_int64 *pRowid /* OUT: Rowid value */
|
||||
){
|
||||
Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
|
||||
*pRowid = pCsr->iRowid;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Register the fts3term module with database connection db. Return SQLITE_OK
|
||||
** if successful or an error code if sqlite3_create_module() fails.
|
||||
*/
|
||||
int sqlite3Fts3InitTerm(sqlite3 *db){
|
||||
static const sqlite3_module fts3term_module = {
|
||||
0, /* iVersion */
|
||||
fts3termConnectMethod, /* xCreate */
|
||||
fts3termConnectMethod, /* xConnect */
|
||||
fts3termBestIndexMethod, /* xBestIndex */
|
||||
fts3termDisconnectMethod, /* xDisconnect */
|
||||
fts3termDisconnectMethod, /* xDestroy */
|
||||
fts3termOpenMethod, /* xOpen */
|
||||
fts3termCloseMethod, /* xClose */
|
||||
fts3termFilterMethod, /* xFilter */
|
||||
fts3termNextMethod, /* xNext */
|
||||
fts3termEofMethod, /* xEof */
|
||||
fts3termColumnMethod, /* xColumn */
|
||||
fts3termRowidMethod, /* xRowid */
|
||||
0, /* xUpdate */
|
||||
0, /* xBegin */
|
||||
0, /* xSync */
|
||||
0, /* xCommit */
|
||||
0, /* xRollback */
|
||||
0, /* xFindFunction */
|
||||
0, /* xRename */
|
||||
0, /* xSavepoint */
|
||||
0, /* xRelease */
|
||||
0, /* xRollbackTo */
|
||||
0 /* xShadowName */
|
||||
};
|
||||
int rc; /* Return code */
|
||||
|
||||
rc = sqlite3_create_module(db, "fts4term", &fts3term_module, 0);
|
||||
return rc;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
|
||||
623
ext/fts3/fts3_test.c
Normal file
623
ext/fts3/fts3_test.c
Normal file
|
|
@ -0,0 +1,623 @@
|
|||
/*
|
||||
** 2011 Jun 13
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** This file is not part of the production FTS code. It is only used for
|
||||
** testing. It contains a Tcl command that can be used to test if a document
|
||||
** matches an FTS NEAR expression.
|
||||
**
|
||||
** As of March 2012, it also contains a version 1 tokenizer used for testing
|
||||
** that the sqlite3_tokenizer_module.xLanguage() method is invoked correctly.
|
||||
*/
|
||||
|
||||
#if defined(INCLUDE_SQLITE_TCL_H)
|
||||
# include "sqlite_tcl.h"
|
||||
#else
|
||||
# include "tcl.h"
|
||||
# ifndef SQLITE_TCLAPI
|
||||
# define SQLITE_TCLAPI
|
||||
# endif
|
||||
#endif
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#if defined(SQLITE_TEST)
|
||||
#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
|
||||
|
||||
/* Required so that the "ifdef SQLITE_ENABLE_FTS3" below works */
|
||||
#include "fts3Int.h"
|
||||
|
||||
#define NM_MAX_TOKEN 12
|
||||
|
||||
typedef struct NearPhrase NearPhrase;
|
||||
typedef struct NearDocument NearDocument;
|
||||
typedef struct NearToken NearToken;
|
||||
|
||||
struct NearDocument {
|
||||
int nToken; /* Length of token in bytes */
|
||||
NearToken *aToken; /* Token array */
|
||||
};
|
||||
|
||||
struct NearToken {
|
||||
int n; /* Length of token in bytes */
|
||||
const char *z; /* Pointer to token string */
|
||||
};
|
||||
|
||||
struct NearPhrase {
|
||||
int nNear; /* Preceding NEAR value */
|
||||
int nToken; /* Number of tokens in this phrase */
|
||||
NearToken aToken[NM_MAX_TOKEN]; /* Array of tokens in this phrase */
|
||||
};
|
||||
|
||||
static int nm_phrase_match(
|
||||
NearPhrase *p,
|
||||
NearToken *aToken
|
||||
){
|
||||
int ii;
|
||||
|
||||
for(ii=0; ii<p->nToken; ii++){
|
||||
NearToken *pToken = &p->aToken[ii];
|
||||
if( pToken->n>0 && pToken->z[pToken->n-1]=='*' ){
|
||||
if( aToken[ii].n<(pToken->n-1) ) return 0;
|
||||
if( memcmp(aToken[ii].z, pToken->z, pToken->n-1) ) return 0;
|
||||
}else{
|
||||
if( aToken[ii].n!=pToken->n ) return 0;
|
||||
if( memcmp(aToken[ii].z, pToken->z, pToken->n) ) return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int nm_near_chain(
|
||||
int iDir, /* Direction to iterate through aPhrase[] */
|
||||
NearDocument *pDoc, /* Document to match against */
|
||||
int iPos, /* Position at which iPhrase was found */
|
||||
int nPhrase, /* Size of phrase array */
|
||||
NearPhrase *aPhrase, /* Phrase array */
|
||||
int iPhrase /* Index of phrase found */
|
||||
){
|
||||
int iStart;
|
||||
int iStop;
|
||||
int ii;
|
||||
int nNear;
|
||||
int iPhrase2;
|
||||
NearPhrase *p;
|
||||
NearPhrase *pPrev;
|
||||
|
||||
assert( iDir==1 || iDir==-1 );
|
||||
|
||||
if( iDir==1 ){
|
||||
if( (iPhrase+1)==nPhrase ) return 1;
|
||||
nNear = aPhrase[iPhrase+1].nNear;
|
||||
}else{
|
||||
if( iPhrase==0 ) return 1;
|
||||
nNear = aPhrase[iPhrase].nNear;
|
||||
}
|
||||
pPrev = &aPhrase[iPhrase];
|
||||
iPhrase2 = iPhrase+iDir;
|
||||
p = &aPhrase[iPhrase2];
|
||||
|
||||
iStart = iPos - nNear - p->nToken;
|
||||
iStop = iPos + nNear + pPrev->nToken;
|
||||
|
||||
if( iStart<0 ) iStart = 0;
|
||||
if( iStop > pDoc->nToken - p->nToken ) iStop = pDoc->nToken - p->nToken;
|
||||
|
||||
for(ii=iStart; ii<=iStop; ii++){
|
||||
if( nm_phrase_match(p, &pDoc->aToken[ii]) ){
|
||||
if( nm_near_chain(iDir, pDoc, ii, nPhrase, aPhrase, iPhrase2) ) return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nm_match_count(
|
||||
NearDocument *pDoc, /* Document to match against */
|
||||
int nPhrase, /* Size of phrase array */
|
||||
NearPhrase *aPhrase, /* Phrase array */
|
||||
int iPhrase /* Index of phrase to count matches for */
|
||||
){
|
||||
int nOcc = 0;
|
||||
int ii;
|
||||
NearPhrase *p = &aPhrase[iPhrase];
|
||||
|
||||
for(ii=0; ii<(pDoc->nToken + 1 - p->nToken); ii++){
|
||||
if( nm_phrase_match(p, &pDoc->aToken[ii]) ){
|
||||
/* Test forward NEAR chain (i>iPhrase) */
|
||||
if( 0==nm_near_chain(1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue;
|
||||
|
||||
/* Test reverse NEAR chain (i<iPhrase) */
|
||||
if( 0==nm_near_chain(-1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue;
|
||||
|
||||
/* This is a real match. Increment the counter. */
|
||||
nOcc++;
|
||||
}
|
||||
}
|
||||
|
||||
return nOcc;
|
||||
}
|
||||
|
||||
/*
|
||||
** Tclcmd: fts3_near_match DOCUMENT EXPR ?OPTIONS?
|
||||
*/
|
||||
static int SQLITE_TCLAPI fts3_near_match_cmd(
|
||||
ClientData clientData,
|
||||
Tcl_Interp *interp,
|
||||
int objc,
|
||||
Tcl_Obj *CONST objv[]
|
||||
){
|
||||
int nTotal = 0;
|
||||
int rc;
|
||||
int ii;
|
||||
int nPhrase;
|
||||
NearPhrase *aPhrase = 0;
|
||||
NearDocument doc = {0, 0};
|
||||
Tcl_Obj **apDocToken;
|
||||
Tcl_Obj *pRet;
|
||||
Tcl_Obj *pPhrasecount = 0;
|
||||
|
||||
Tcl_Obj **apExprToken;
|
||||
int nExprToken;
|
||||
|
||||
UNUSED_PARAMETER(clientData);
|
||||
|
||||
/* Must have 3 or more arguments. */
|
||||
if( objc<3 || (objc%2)==0 ){
|
||||
Tcl_WrongNumArgs(interp, 1, objv, "DOCUMENT EXPR ?OPTION VALUE?...");
|
||||
rc = TCL_ERROR;
|
||||
goto near_match_out;
|
||||
}
|
||||
|
||||
for(ii=3; ii<objc; ii+=2){
|
||||
enum NM_enum { NM_PHRASECOUNTS };
|
||||
struct TestnmSubcmd {
|
||||
char *zName;
|
||||
enum NM_enum eOpt;
|
||||
} aOpt[] = {
|
||||
{ "-phrasecountvar", NM_PHRASECOUNTS },
|
||||
{ 0, 0 }
|
||||
};
|
||||
int iOpt;
|
||||
if( Tcl_GetIndexFromObjStruct(
|
||||
interp, objv[ii], aOpt, sizeof(aOpt[0]), "option", 0, &iOpt)
|
||||
){
|
||||
return TCL_ERROR;
|
||||
}
|
||||
|
||||
switch( aOpt[iOpt].eOpt ){
|
||||
case NM_PHRASECOUNTS:
|
||||
pPhrasecount = objv[ii+1];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
rc = Tcl_ListObjGetElements(interp, objv[1], &doc.nToken, &apDocToken);
|
||||
if( rc!=TCL_OK ) goto near_match_out;
|
||||
doc.aToken = (NearToken *)ckalloc(doc.nToken*sizeof(NearToken));
|
||||
for(ii=0; ii<doc.nToken; ii++){
|
||||
doc.aToken[ii].z = Tcl_GetStringFromObj(apDocToken[ii], &doc.aToken[ii].n);
|
||||
}
|
||||
|
||||
rc = Tcl_ListObjGetElements(interp, objv[2], &nExprToken, &apExprToken);
|
||||
if( rc!=TCL_OK ) goto near_match_out;
|
||||
|
||||
nPhrase = (nExprToken + 1) / 2;
|
||||
aPhrase = (NearPhrase *)ckalloc(nPhrase * sizeof(NearPhrase));
|
||||
memset(aPhrase, 0, nPhrase * sizeof(NearPhrase));
|
||||
for(ii=0; ii<nPhrase; ii++){
|
||||
Tcl_Obj *pPhrase = apExprToken[ii*2];
|
||||
Tcl_Obj **apToken;
|
||||
int nToken;
|
||||
int jj;
|
||||
|
||||
rc = Tcl_ListObjGetElements(interp, pPhrase, &nToken, &apToken);
|
||||
if( rc!=TCL_OK ) goto near_match_out;
|
||||
if( nToken>NM_MAX_TOKEN ){
|
||||
Tcl_AppendResult(interp, "Too many tokens in phrase", 0);
|
||||
rc = TCL_ERROR;
|
||||
goto near_match_out;
|
||||
}
|
||||
for(jj=0; jj<nToken; jj++){
|
||||
NearToken *pT = &aPhrase[ii].aToken[jj];
|
||||
pT->z = Tcl_GetStringFromObj(apToken[jj], &pT->n);
|
||||
}
|
||||
aPhrase[ii].nToken = nToken;
|
||||
}
|
||||
for(ii=1; ii<nPhrase; ii++){
|
||||
Tcl_Obj *pNear = apExprToken[2*ii-1];
|
||||
int nNear;
|
||||
rc = Tcl_GetIntFromObj(interp, pNear, &nNear);
|
||||
if( rc!=TCL_OK ) goto near_match_out;
|
||||
aPhrase[ii].nNear = nNear;
|
||||
}
|
||||
|
||||
pRet = Tcl_NewObj();
|
||||
Tcl_IncrRefCount(pRet);
|
||||
for(ii=0; ii<nPhrase; ii++){
|
||||
int nOcc = nm_match_count(&doc, nPhrase, aPhrase, ii);
|
||||
Tcl_ListObjAppendElement(interp, pRet, Tcl_NewIntObj(nOcc));
|
||||
nTotal += nOcc;
|
||||
}
|
||||
if( pPhrasecount ){
|
||||
Tcl_ObjSetVar2(interp, pPhrasecount, 0, pRet, 0);
|
||||
}
|
||||
Tcl_DecrRefCount(pRet);
|
||||
Tcl_SetObjResult(interp, Tcl_NewBooleanObj(nTotal>0));
|
||||
|
||||
near_match_out:
|
||||
ckfree((char *)aPhrase);
|
||||
ckfree((char *)doc.aToken);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** Tclcmd: fts3_configure_incr_load ?CHUNKSIZE THRESHOLD?
|
||||
**
|
||||
** Normally, FTS uses hard-coded values to determine the minimum doclist
|
||||
** size eligible for incremental loading, and the size of the chunks loaded
|
||||
** when a doclist is incrementally loaded. This command allows the built-in
|
||||
** values to be overridden for testing purposes.
|
||||
**
|
||||
** If present, the first argument is the chunksize in bytes to load doclists
|
||||
** in. The second argument is the minimum doclist size in bytes to use
|
||||
** incremental loading with.
|
||||
**
|
||||
** Whether or not the arguments are present, this command returns a list of
|
||||
** two integers - the initial chunksize and threshold when the command is
|
||||
** invoked. This can be used to restore the default behavior after running
|
||||
** tests. For example:
|
||||
**
|
||||
** # Override incr-load settings for testing:
|
||||
** set cfg [fts3_configure_incr_load $new_chunksize $new_threshold]
|
||||
**
|
||||
** .... run tests ....
|
||||
**
|
||||
** # Restore initial incr-load settings:
|
||||
** eval fts3_configure_incr_load $cfg
|
||||
*/
|
||||
static int SQLITE_TCLAPI fts3_configure_incr_load_cmd(
|
||||
ClientData clientData,
|
||||
Tcl_Interp *interp,
|
||||
int objc,
|
||||
Tcl_Obj *CONST objv[]
|
||||
){
|
||||
#ifdef SQLITE_ENABLE_FTS3
|
||||
extern int test_fts3_node_chunksize;
|
||||
extern int test_fts3_node_chunk_threshold;
|
||||
Tcl_Obj *pRet;
|
||||
|
||||
if( objc!=1 && objc!=3 ){
|
||||
Tcl_WrongNumArgs(interp, 1, objv, "?CHUNKSIZE THRESHOLD?");
|
||||
return TCL_ERROR;
|
||||
}
|
||||
|
||||
pRet = Tcl_NewObj();
|
||||
Tcl_IncrRefCount(pRet);
|
||||
Tcl_ListObjAppendElement(
|
||||
interp, pRet, Tcl_NewIntObj(test_fts3_node_chunksize));
|
||||
Tcl_ListObjAppendElement(
|
||||
interp, pRet, Tcl_NewIntObj(test_fts3_node_chunk_threshold));
|
||||
|
||||
if( objc==3 ){
|
||||
int iArg1;
|
||||
int iArg2;
|
||||
if( Tcl_GetIntFromObj(interp, objv[1], &iArg1)
|
||||
|| Tcl_GetIntFromObj(interp, objv[2], &iArg2)
|
||||
){
|
||||
Tcl_DecrRefCount(pRet);
|
||||
return TCL_ERROR;
|
||||
}
|
||||
test_fts3_node_chunksize = iArg1;
|
||||
test_fts3_node_chunk_threshold = iArg2;
|
||||
}
|
||||
|
||||
Tcl_SetObjResult(interp, pRet);
|
||||
Tcl_DecrRefCount(pRet);
|
||||
#endif
|
||||
UNUSED_PARAMETER(clientData);
|
||||
return TCL_OK;
|
||||
}
|
||||
|
||||
#ifdef SQLITE_ENABLE_FTS3
|
||||
/**************************************************************************
|
||||
** Beginning of test tokenizer code.
|
||||
**
|
||||
** For language 0, this tokenizer is similar to the default 'simple'
|
||||
** tokenizer. For other languages L, the following:
|
||||
**
|
||||
** * Odd numbered languages are case-sensitive. Even numbered
|
||||
** languages are not.
|
||||
**
|
||||
** * Language ids 100 or greater are considered an error.
|
||||
**
|
||||
** The implementation assumes that the input contains only ASCII characters
|
||||
** (i.e. those that may be encoded in UTF-8 using a single byte).
|
||||
*/
|
||||
typedef struct test_tokenizer {
|
||||
sqlite3_tokenizer base;
|
||||
} test_tokenizer;
|
||||
|
||||
typedef struct test_tokenizer_cursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
const char *aInput; /* Input being tokenized */
|
||||
int nInput; /* Size of the input in bytes */
|
||||
int iInput; /* Current offset in aInput */
|
||||
int iToken; /* Index of next token to be returned */
|
||||
char *aBuffer; /* Buffer containing current token */
|
||||
int nBuffer; /* Number of bytes allocated at pToken */
|
||||
int iLangid; /* Configured language id */
|
||||
} test_tokenizer_cursor;
|
||||
|
||||
static int testTokenizerCreate(
|
||||
int argc, const char * const *argv,
|
||||
sqlite3_tokenizer **ppTokenizer
|
||||
){
|
||||
test_tokenizer *pNew;
|
||||
UNUSED_PARAMETER(argc);
|
||||
UNUSED_PARAMETER(argv);
|
||||
|
||||
pNew = sqlite3_malloc(sizeof(test_tokenizer));
|
||||
if( !pNew ) return SQLITE_NOMEM;
|
||||
memset(pNew, 0, sizeof(test_tokenizer));
|
||||
|
||||
*ppTokenizer = (sqlite3_tokenizer *)pNew;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int testTokenizerDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
test_tokenizer *p = (test_tokenizer *)pTokenizer;
|
||||
sqlite3_free(p);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int testTokenizerOpen(
|
||||
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
|
||||
const char *pInput, int nBytes, /* String to be tokenized */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
|
||||
){
|
||||
int rc = SQLITE_OK; /* Return code */
|
||||
test_tokenizer_cursor *pCsr; /* New cursor object */
|
||||
|
||||
UNUSED_PARAMETER(pTokenizer);
|
||||
|
||||
pCsr = (test_tokenizer_cursor *)sqlite3_malloc(sizeof(test_tokenizer_cursor));
|
||||
if( pCsr==0 ){
|
||||
rc = SQLITE_NOMEM;
|
||||
}else{
|
||||
memset(pCsr, 0, sizeof(test_tokenizer_cursor));
|
||||
pCsr->aInput = pInput;
|
||||
if( nBytes<0 ){
|
||||
pCsr->nInput = (int)strlen(pInput);
|
||||
}else{
|
||||
pCsr->nInput = nBytes;
|
||||
}
|
||||
}
|
||||
|
||||
*ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int testTokenizerClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
|
||||
sqlite3_free(pCsr->aBuffer);
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int testIsTokenChar(char c){
|
||||
return (c>='a' && c<='z') || (c>='A' && c<='Z');
|
||||
}
|
||||
static int testTolower(char c){
|
||||
char ret = c;
|
||||
if( ret>='A' && ret<='Z') ret = ret - ('A'-'a');
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int testTokenizerNext(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by testTokenizerOpen */
|
||||
const char **ppToken, /* OUT: *ppToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
|
||||
int rc = SQLITE_OK;
|
||||
const char *p;
|
||||
const char *pEnd;
|
||||
|
||||
p = &pCsr->aInput[pCsr->iInput];
|
||||
pEnd = &pCsr->aInput[pCsr->nInput];
|
||||
|
||||
/* Skip past any white-space */
|
||||
assert( p<=pEnd );
|
||||
while( p<pEnd && testIsTokenChar(*p)==0 ) p++;
|
||||
|
||||
if( p==pEnd ){
|
||||
rc = SQLITE_DONE;
|
||||
}else{
|
||||
/* Advance to the end of the token */
|
||||
const char *pToken = p;
|
||||
sqlite3_int64 nToken;
|
||||
while( p<pEnd && testIsTokenChar(*p) ) p++;
|
||||
nToken = (sqlite3_int64)(p-pToken);
|
||||
|
||||
/* Copy the token into the buffer */
|
||||
if( nToken>pCsr->nBuffer ){
|
||||
sqlite3_free(pCsr->aBuffer);
|
||||
pCsr->aBuffer = sqlite3_malloc64(nToken);
|
||||
}
|
||||
if( pCsr->aBuffer==0 ){
|
||||
rc = SQLITE_NOMEM;
|
||||
}else{
|
||||
int i;
|
||||
|
||||
if( pCsr->iLangid & 0x00000001 ){
|
||||
for(i=0; i<nToken; i++) pCsr->aBuffer[i] = pToken[i];
|
||||
}else{
|
||||
for(i=0; i<nToken; i++) pCsr->aBuffer[i] = (char)testTolower(pToken[i]);
|
||||
}
|
||||
pCsr->iToken++;
|
||||
pCsr->iInput = (int)(p - pCsr->aInput);
|
||||
|
||||
*ppToken = pCsr->aBuffer;
|
||||
*pnBytes = (int)nToken;
|
||||
*piStartOffset = (int)(pToken - pCsr->aInput);
|
||||
*piEndOffset = (int)(p - pCsr->aInput);
|
||||
*piPosition = pCsr->iToken;
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int testTokenizerLanguage(
|
||||
sqlite3_tokenizer_cursor *pCursor,
|
||||
int iLangid
|
||||
){
|
||||
int rc = SQLITE_OK;
|
||||
test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
|
||||
pCsr->iLangid = iLangid;
|
||||
if( pCsr->iLangid>=100 ){
|
||||
rc = SQLITE_ERROR;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int SQLITE_TCLAPI fts3_test_tokenizer_cmd(
|
||||
ClientData clientData,
|
||||
Tcl_Interp *interp,
|
||||
int objc,
|
||||
Tcl_Obj *CONST objv[]
|
||||
){
|
||||
#ifdef SQLITE_ENABLE_FTS3
|
||||
static const sqlite3_tokenizer_module testTokenizerModule = {
|
||||
1,
|
||||
testTokenizerCreate,
|
||||
testTokenizerDestroy,
|
||||
testTokenizerOpen,
|
||||
testTokenizerClose,
|
||||
testTokenizerNext,
|
||||
testTokenizerLanguage
|
||||
};
|
||||
const sqlite3_tokenizer_module *pPtr = &testTokenizerModule;
|
||||
if( objc!=1 ){
|
||||
Tcl_WrongNumArgs(interp, 1, objv, "");
|
||||
return TCL_ERROR;
|
||||
}
|
||||
Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(
|
||||
(const unsigned char *)&pPtr, sizeof(sqlite3_tokenizer_module *)
|
||||
));
|
||||
#endif
|
||||
UNUSED_PARAMETER(clientData);
|
||||
return TCL_OK;
|
||||
}
|
||||
|
||||
static int SQLITE_TCLAPI fts3_test_varint_cmd(
|
||||
ClientData clientData,
|
||||
Tcl_Interp *interp,
|
||||
int objc,
|
||||
Tcl_Obj *CONST objv[]
|
||||
){
|
||||
#ifdef SQLITE_ENABLE_FTS3
|
||||
char aBuf[24];
|
||||
int rc;
|
||||
Tcl_WideInt w;
|
||||
sqlite3_int64 w2;
|
||||
int nByte, nByte2;
|
||||
|
||||
if( objc!=2 ){
|
||||
Tcl_WrongNumArgs(interp, 1, objv, "INTEGER");
|
||||
return TCL_ERROR;
|
||||
}
|
||||
|
||||
rc = Tcl_GetWideIntFromObj(interp, objv[1], &w);
|
||||
if( rc!=TCL_OK ) return rc;
|
||||
|
||||
nByte = sqlite3Fts3PutVarint(aBuf, w);
|
||||
nByte2 = sqlite3Fts3GetVarint(aBuf, &w2);
|
||||
if( w!=w2 || nByte!=nByte2 ){
|
||||
char *zErr = sqlite3_mprintf("error testing %lld", w);
|
||||
Tcl_ResetResult(interp);
|
||||
Tcl_AppendResult(interp, zErr, 0);
|
||||
return TCL_ERROR;
|
||||
}
|
||||
|
||||
if( w<=2147483647 && w>=0 ){
|
||||
int i;
|
||||
nByte2 = fts3GetVarint32(aBuf, &i);
|
||||
if( (int)w!=i || nByte!=nByte2 ){
|
||||
char *zErr = sqlite3_mprintf("error testing %lld (32-bit)", w);
|
||||
Tcl_ResetResult(interp);
|
||||
Tcl_AppendResult(interp, zErr, 0);
|
||||
return TCL_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
UNUSED_PARAMETER(clientData);
|
||||
return TCL_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** End of tokenizer code.
|
||||
**************************************************************************/
|
||||
|
||||
/*
|
||||
** sqlite3_fts3_may_be_corrupt BOOLEAN
|
||||
**
|
||||
** Set or clear the global "may-be-corrupt" flag. Return the old value.
|
||||
*/
|
||||
static int SQLITE_TCLAPI fts3_may_be_corrupt(
|
||||
void * clientData,
|
||||
Tcl_Interp *interp,
|
||||
int objc,
|
||||
Tcl_Obj *CONST objv[]
|
||||
){
|
||||
#ifdef SQLITE_DEBUG
|
||||
int bOld = sqlite3_fts3_may_be_corrupt;
|
||||
|
||||
if( objc!=2 && objc!=1 ){
|
||||
Tcl_WrongNumArgs(interp, 1, objv, "?BOOLEAN?");
|
||||
return TCL_ERROR;
|
||||
}
|
||||
if( objc==2 ){
|
||||
int bNew;
|
||||
if( Tcl_GetBooleanFromObj(interp, objv[1], &bNew) ) return TCL_ERROR;
|
||||
sqlite3_fts3_may_be_corrupt = bNew;
|
||||
}
|
||||
|
||||
Tcl_SetObjResult(interp, Tcl_NewIntObj(bOld));
|
||||
#endif
|
||||
return TCL_OK;
|
||||
}
|
||||
|
||||
int Sqlitetestfts3_Init(Tcl_Interp *interp){
|
||||
Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0);
|
||||
Tcl_CreateObjCommand(interp,
|
||||
"fts3_configure_incr_load", fts3_configure_incr_load_cmd, 0, 0
|
||||
);
|
||||
Tcl_CreateObjCommand(
|
||||
interp, "fts3_test_tokenizer", fts3_test_tokenizer_cmd, 0, 0
|
||||
);
|
||||
Tcl_CreateObjCommand(
|
||||
interp, "fts3_test_varint", fts3_test_varint_cmd, 0, 0
|
||||
);
|
||||
Tcl_CreateObjCommand(
|
||||
interp, "sqlite3_fts3_may_be_corrupt", fts3_may_be_corrupt, 0, 0
|
||||
);
|
||||
return TCL_OK;
|
||||
}
|
||||
#endif /* SQLITE_ENABLE_FTS3 || SQLITE_ENABLE_FTS4 */
|
||||
#endif /* ifdef SQLITE_TEST */
|
||||
456
ext/fts3/fts3_tokenize_vtab.c
Normal file
456
ext/fts3/fts3_tokenize_vtab.c
Normal file
|
|
@ -0,0 +1,456 @@
|
|||
/*
|
||||
** 2013 Apr 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** This file contains code for the "fts3tokenize" virtual table module.
|
||||
** An fts3tokenize virtual table is created as follows:
|
||||
**
|
||||
** CREATE VIRTUAL TABLE <tbl> USING fts3tokenize(
|
||||
** <tokenizer-name>, <arg-1>, ...
|
||||
** );
|
||||
**
|
||||
** The table created has the following schema:
|
||||
**
|
||||
** CREATE TABLE <tbl>(input, token, start, end, position)
|
||||
**
|
||||
** When queried, the query must include a WHERE clause of type:
|
||||
**
|
||||
** input = <string>
|
||||
**
|
||||
** The virtual table module tokenizes this <string>, using the FTS3
|
||||
** tokenizer specified by the arguments to the CREATE VIRTUAL TABLE
|
||||
** statement and returns one row for each token in the result. With
|
||||
** fields set as follows:
|
||||
**
|
||||
** input: Always set to a copy of <string>
|
||||
** token: A token from the input.
|
||||
** start: Byte offset of the token within the input <string>.
|
||||
** end: Byte offset of the byte immediately following the end of the
|
||||
** token within the input string.
|
||||
** pos: Token offset of token within input.
|
||||
**
|
||||
*/
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
typedef struct Fts3tokTable Fts3tokTable;
|
||||
typedef struct Fts3tokCursor Fts3tokCursor;
|
||||
|
||||
/*
|
||||
** Virtual table structure.
|
||||
*/
|
||||
struct Fts3tokTable {
|
||||
sqlite3_vtab base; /* Base class used by SQLite core */
|
||||
const sqlite3_tokenizer_module *pMod;
|
||||
sqlite3_tokenizer *pTok;
|
||||
};
|
||||
|
||||
/*
|
||||
** Virtual table cursor structure.
|
||||
*/
|
||||
struct Fts3tokCursor {
|
||||
sqlite3_vtab_cursor base; /* Base class used by SQLite core */
|
||||
char *zInput; /* Input string */
|
||||
sqlite3_tokenizer_cursor *pCsr; /* Cursor to iterate through zInput */
|
||||
int iRowid; /* Current 'rowid' value */
|
||||
const char *zToken; /* Current 'token' value */
|
||||
int nToken; /* Size of zToken in bytes */
|
||||
int iStart; /* Current 'start' value */
|
||||
int iEnd; /* Current 'end' value */
|
||||
int iPos; /* Current 'pos' value */
|
||||
};
|
||||
|
||||
/*
|
||||
** Query FTS for the tokenizer implementation named zName.
|
||||
*/
|
||||
static int fts3tokQueryTokenizer(
|
||||
Fts3Hash *pHash,
|
||||
const char *zName,
|
||||
const sqlite3_tokenizer_module **pp,
|
||||
char **pzErr
|
||||
){
|
||||
sqlite3_tokenizer_module *p;
|
||||
int nName = (int)strlen(zName);
|
||||
|
||||
p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1);
|
||||
if( !p ){
|
||||
sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", zName);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
|
||||
*pp = p;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** The second argument, argv[], is an array of pointers to nul-terminated
|
||||
** strings. This function makes a copy of the array and strings into a
|
||||
** single block of memory. It then dequotes any of the strings that appear
|
||||
** to be quoted.
|
||||
**
|
||||
** If successful, output parameter *pazDequote is set to point at the
|
||||
** array of dequoted strings and SQLITE_OK is returned. The caller is
|
||||
** responsible for eventually calling sqlite3_free() to free the array
|
||||
** in this case. Or, if an error occurs, an SQLite error code is returned.
|
||||
** The final value of *pazDequote is undefined in this case.
|
||||
*/
|
||||
static int fts3tokDequoteArray(
|
||||
int argc, /* Number of elements in argv[] */
|
||||
const char * const *argv, /* Input array */
|
||||
char ***pazDequote /* Output array */
|
||||
){
|
||||
int rc = SQLITE_OK; /* Return code */
|
||||
if( argc==0 ){
|
||||
*pazDequote = 0;
|
||||
}else{
|
||||
int i;
|
||||
int nByte = 0;
|
||||
char **azDequote;
|
||||
|
||||
for(i=0; i<argc; i++){
|
||||
nByte += (int)(strlen(argv[i]) + 1);
|
||||
}
|
||||
|
||||
*pazDequote = azDequote = sqlite3_malloc64(sizeof(char *)*argc + nByte);
|
||||
if( azDequote==0 ){
|
||||
rc = SQLITE_NOMEM;
|
||||
}else{
|
||||
char *pSpace = (char *)&azDequote[argc];
|
||||
for(i=0; i<argc; i++){
|
||||
int n = (int)strlen(argv[i]);
|
||||
azDequote[i] = pSpace;
|
||||
memcpy(pSpace, argv[i], n+1);
|
||||
sqlite3Fts3Dequote(pSpace);
|
||||
pSpace += (n+1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** Schema of the tokenizer table.
|
||||
*/
|
||||
#define FTS3_TOK_SCHEMA "CREATE TABLE x(input, token, start, end, position)"
|
||||
|
||||
/*
|
||||
** This function does all the work for both the xConnect and xCreate methods.
|
||||
** These tables have no persistent representation of their own, so xConnect
|
||||
** and xCreate are identical operations.
|
||||
**
|
||||
** argv[0]: module name
|
||||
** argv[1]: database name
|
||||
** argv[2]: table name
|
||||
** argv[3]: first argument (tokenizer name)
|
||||
*/
|
||||
static int fts3tokConnectMethod(
|
||||
sqlite3 *db, /* Database connection */
|
||||
void *pHash, /* Hash table of tokenizers */
|
||||
int argc, /* Number of elements in argv array */
|
||||
const char * const *argv, /* xCreate/xConnect argument array */
|
||||
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
|
||||
char **pzErr /* OUT: sqlite3_malloc'd error message */
|
||||
){
|
||||
Fts3tokTable *pTab = 0;
|
||||
const sqlite3_tokenizer_module *pMod = 0;
|
||||
sqlite3_tokenizer *pTok = 0;
|
||||
int rc;
|
||||
char **azDequote = 0;
|
||||
int nDequote;
|
||||
|
||||
rc = sqlite3_declare_vtab(db, FTS3_TOK_SCHEMA);
|
||||
if( rc!=SQLITE_OK ) return rc;
|
||||
|
||||
nDequote = argc-3;
|
||||
rc = fts3tokDequoteArray(nDequote, &argv[3], &azDequote);
|
||||
|
||||
if( rc==SQLITE_OK ){
|
||||
const char *zModule;
|
||||
if( nDequote<1 ){
|
||||
zModule = "simple";
|
||||
}else{
|
||||
zModule = azDequote[0];
|
||||
}
|
||||
rc = fts3tokQueryTokenizer((Fts3Hash*)pHash, zModule, &pMod, pzErr);
|
||||
}
|
||||
|
||||
assert( (rc==SQLITE_OK)==(pMod!=0) );
|
||||
if( rc==SQLITE_OK ){
|
||||
const char * const *azArg = 0;
|
||||
if( nDequote>1 ) azArg = (const char * const *)&azDequote[1];
|
||||
rc = pMod->xCreate((nDequote>1 ? nDequote-1 : 0), azArg, &pTok);
|
||||
}
|
||||
|
||||
if( rc==SQLITE_OK ){
|
||||
pTab = (Fts3tokTable *)sqlite3_malloc(sizeof(Fts3tokTable));
|
||||
if( pTab==0 ){
|
||||
rc = SQLITE_NOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
if( rc==SQLITE_OK ){
|
||||
memset(pTab, 0, sizeof(Fts3tokTable));
|
||||
pTab->pMod = pMod;
|
||||
pTab->pTok = pTok;
|
||||
*ppVtab = &pTab->base;
|
||||
}else{
|
||||
if( pTok ){
|
||||
pMod->xDestroy(pTok);
|
||||
}
|
||||
}
|
||||
|
||||
sqlite3_free(azDequote);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** This function does the work for both the xDisconnect and xDestroy methods.
|
||||
** These tables have no persistent representation of their own, so xDisconnect
|
||||
** and xDestroy are identical operations.
|
||||
*/
|
||||
static int fts3tokDisconnectMethod(sqlite3_vtab *pVtab){
|
||||
Fts3tokTable *pTab = (Fts3tokTable *)pVtab;
|
||||
|
||||
pTab->pMod->xDestroy(pTab->pTok);
|
||||
sqlite3_free(pTab);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xBestIndex - Analyze a WHERE and ORDER BY clause.
|
||||
*/
|
||||
static int fts3tokBestIndexMethod(
|
||||
sqlite3_vtab *pVTab,
|
||||
sqlite3_index_info *pInfo
|
||||
){
|
||||
int i;
|
||||
UNUSED_PARAMETER(pVTab);
|
||||
|
||||
for(i=0; i<pInfo->nConstraint; i++){
|
||||
if( pInfo->aConstraint[i].usable
|
||||
&& pInfo->aConstraint[i].iColumn==0
|
||||
&& pInfo->aConstraint[i].op==SQLITE_INDEX_CONSTRAINT_EQ
|
||||
){
|
||||
pInfo->idxNum = 1;
|
||||
pInfo->aConstraintUsage[i].argvIndex = 1;
|
||||
pInfo->aConstraintUsage[i].omit = 1;
|
||||
pInfo->estimatedCost = 1;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
}
|
||||
|
||||
pInfo->idxNum = 0;
|
||||
assert( pInfo->estimatedCost>1000000.0 );
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xOpen - Open a cursor.
|
||||
*/
|
||||
static int fts3tokOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
|
||||
Fts3tokCursor *pCsr;
|
||||
UNUSED_PARAMETER(pVTab);
|
||||
|
||||
pCsr = (Fts3tokCursor *)sqlite3_malloc(sizeof(Fts3tokCursor));
|
||||
if( pCsr==0 ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
memset(pCsr, 0, sizeof(Fts3tokCursor));
|
||||
|
||||
*ppCsr = (sqlite3_vtab_cursor *)pCsr;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Reset the tokenizer cursor passed as the only argument. As if it had
|
||||
** just been returned by fts3tokOpenMethod().
|
||||
*/
|
||||
static void fts3tokResetCursor(Fts3tokCursor *pCsr){
|
||||
if( pCsr->pCsr ){
|
||||
Fts3tokTable *pTab = (Fts3tokTable *)(pCsr->base.pVtab);
|
||||
pTab->pMod->xClose(pCsr->pCsr);
|
||||
pCsr->pCsr = 0;
|
||||
}
|
||||
sqlite3_free(pCsr->zInput);
|
||||
pCsr->zInput = 0;
|
||||
pCsr->zToken = 0;
|
||||
pCsr->nToken = 0;
|
||||
pCsr->iStart = 0;
|
||||
pCsr->iEnd = 0;
|
||||
pCsr->iPos = 0;
|
||||
pCsr->iRowid = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
** xClose - Close a cursor.
|
||||
*/
|
||||
static int fts3tokCloseMethod(sqlite3_vtab_cursor *pCursor){
|
||||
Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
|
||||
|
||||
fts3tokResetCursor(pCsr);
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xNext - Advance the cursor to the next row, if any.
|
||||
*/
|
||||
static int fts3tokNextMethod(sqlite3_vtab_cursor *pCursor){
|
||||
Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
|
||||
Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab);
|
||||
int rc; /* Return code */
|
||||
|
||||
pCsr->iRowid++;
|
||||
rc = pTab->pMod->xNext(pCsr->pCsr,
|
||||
&pCsr->zToken, &pCsr->nToken,
|
||||
&pCsr->iStart, &pCsr->iEnd, &pCsr->iPos
|
||||
);
|
||||
|
||||
if( rc!=SQLITE_OK ){
|
||||
fts3tokResetCursor(pCsr);
|
||||
if( rc==SQLITE_DONE ) rc = SQLITE_OK;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** xFilter - Initialize a cursor to point at the start of its data.
|
||||
*/
|
||||
static int fts3tokFilterMethod(
|
||||
sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
|
||||
int idxNum, /* Strategy index */
|
||||
const char *idxStr, /* Unused */
|
||||
int nVal, /* Number of elements in apVal */
|
||||
sqlite3_value **apVal /* Arguments for the indexing scheme */
|
||||
){
|
||||
int rc = SQLITE_ERROR;
|
||||
Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
|
||||
Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab);
|
||||
UNUSED_PARAMETER(idxStr);
|
||||
UNUSED_PARAMETER(nVal);
|
||||
|
||||
fts3tokResetCursor(pCsr);
|
||||
if( idxNum==1 ){
|
||||
const char *zByte = (const char *)sqlite3_value_text(apVal[0]);
|
||||
int nByte = sqlite3_value_bytes(apVal[0]);
|
||||
pCsr->zInput = sqlite3_malloc64(nByte+1);
|
||||
if( pCsr->zInput==0 ){
|
||||
rc = SQLITE_NOMEM;
|
||||
}else{
|
||||
if( nByte>0 ) memcpy(pCsr->zInput, zByte, nByte);
|
||||
pCsr->zInput[nByte] = 0;
|
||||
rc = pTab->pMod->xOpen(pTab->pTok, pCsr->zInput, nByte, &pCsr->pCsr);
|
||||
if( rc==SQLITE_OK ){
|
||||
pCsr->pCsr->pTokenizer = pTab->pTok;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( rc!=SQLITE_OK ) return rc;
|
||||
return fts3tokNextMethod(pCursor);
|
||||
}
|
||||
|
||||
/*
|
||||
** xEof - Return true if the cursor is at EOF, or false otherwise.
|
||||
*/
|
||||
static int fts3tokEofMethod(sqlite3_vtab_cursor *pCursor){
|
||||
Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
|
||||
return (pCsr->zToken==0);
|
||||
}
|
||||
|
||||
/*
|
||||
** xColumn - Return a column value.
|
||||
*/
|
||||
static int fts3tokColumnMethod(
|
||||
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
|
||||
sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
|
||||
int iCol /* Index of column to read value from */
|
||||
){
|
||||
Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
|
||||
|
||||
/* CREATE TABLE x(input, token, start, end, position) */
|
||||
switch( iCol ){
|
||||
case 0:
|
||||
sqlite3_result_text(pCtx, pCsr->zInput, -1, SQLITE_TRANSIENT);
|
||||
break;
|
||||
case 1:
|
||||
sqlite3_result_text(pCtx, pCsr->zToken, pCsr->nToken, SQLITE_TRANSIENT);
|
||||
break;
|
||||
case 2:
|
||||
sqlite3_result_int(pCtx, pCsr->iStart);
|
||||
break;
|
||||
case 3:
|
||||
sqlite3_result_int(pCtx, pCsr->iEnd);
|
||||
break;
|
||||
default:
|
||||
assert( iCol==4 );
|
||||
sqlite3_result_int(pCtx, pCsr->iPos);
|
||||
break;
|
||||
}
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xRowid - Return the current rowid for the cursor.
|
||||
*/
|
||||
static int fts3tokRowidMethod(
|
||||
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
|
||||
sqlite_int64 *pRowid /* OUT: Rowid value */
|
||||
){
|
||||
Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
|
||||
*pRowid = (sqlite3_int64)pCsr->iRowid;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Register the fts3tok module with database connection db. Return SQLITE_OK
|
||||
** if successful or an error code if sqlite3_create_module() fails.
|
||||
*/
|
||||
int sqlite3Fts3InitTok(sqlite3 *db, Fts3Hash *pHash){
|
||||
static const sqlite3_module fts3tok_module = {
|
||||
0, /* iVersion */
|
||||
fts3tokConnectMethod, /* xCreate */
|
||||
fts3tokConnectMethod, /* xConnect */
|
||||
fts3tokBestIndexMethod, /* xBestIndex */
|
||||
fts3tokDisconnectMethod, /* xDisconnect */
|
||||
fts3tokDisconnectMethod, /* xDestroy */
|
||||
fts3tokOpenMethod, /* xOpen */
|
||||
fts3tokCloseMethod, /* xClose */
|
||||
fts3tokFilterMethod, /* xFilter */
|
||||
fts3tokNextMethod, /* xNext */
|
||||
fts3tokEofMethod, /* xEof */
|
||||
fts3tokColumnMethod, /* xColumn */
|
||||
fts3tokRowidMethod, /* xRowid */
|
||||
0, /* xUpdate */
|
||||
0, /* xBegin */
|
||||
0, /* xSync */
|
||||
0, /* xCommit */
|
||||
0, /* xRollback */
|
||||
0, /* xFindFunction */
|
||||
0, /* xRename */
|
||||
0, /* xSavepoint */
|
||||
0, /* xRelease */
|
||||
0, /* xRollbackTo */
|
||||
0 /* xShadowName */
|
||||
};
|
||||
int rc; /* Return code */
|
||||
|
||||
rc = sqlite3_create_module(db, "fts3tokenize", &fts3tok_module, (void*)pHash);
|
||||
return rc;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
|
||||
520
ext/fts3/fts3_tokenizer.c
Normal file
520
ext/fts3/fts3_tokenizer.c
Normal file
|
|
@ -0,0 +1,520 @@
|
|||
/*
|
||||
** 2007 June 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** This is part of an SQLite module implementing full-text search.
|
||||
** This particular file implements the generic tokenizer interface.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS3 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS3 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
|
||||
*/
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
** Return true if the two-argument version of fts3_tokenizer()
|
||||
** has been activated via a prior call to sqlite3_db_config(db,
|
||||
** SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER, 1, 0);
|
||||
*/
|
||||
static int fts3TokenizerEnabled(sqlite3_context *context){
|
||||
sqlite3 *db = sqlite3_context_db_handle(context);
|
||||
int isEnabled = 0;
|
||||
sqlite3_db_config(db,SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER,-1,&isEnabled);
|
||||
return isEnabled;
|
||||
}
|
||||
|
||||
/*
|
||||
** Implementation of the SQL scalar function for accessing the underlying
|
||||
** hash table. This function may be called as follows:
|
||||
**
|
||||
** SELECT <function-name>(<key-name>);
|
||||
** SELECT <function-name>(<key-name>, <pointer>);
|
||||
**
|
||||
** where <function-name> is the name passed as the second argument
|
||||
** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer').
|
||||
**
|
||||
** If the <pointer> argument is specified, it must be a blob value
|
||||
** containing a pointer to be stored as the hash data corresponding
|
||||
** to the string <key-name>. If <pointer> is not specified, then
|
||||
** the string <key-name> must already exist in the has table. Otherwise,
|
||||
** an error is returned.
|
||||
**
|
||||
** Whether or not the <pointer> argument is specified, the value returned
|
||||
** is a blob containing the pointer stored as the hash data corresponding
|
||||
** to string <key-name> (after the hash-table is updated, if applicable).
|
||||
*/
|
||||
static void fts3TokenizerFunc(
|
||||
sqlite3_context *context,
|
||||
int argc,
|
||||
sqlite3_value **argv
|
||||
){
|
||||
Fts3Hash *pHash;
|
||||
void *pPtr = 0;
|
||||
const unsigned char *zName;
|
||||
int nName;
|
||||
|
||||
assert( argc==1 || argc==2 );
|
||||
|
||||
pHash = (Fts3Hash *)sqlite3_user_data(context);
|
||||
|
||||
zName = sqlite3_value_text(argv[0]);
|
||||
nName = sqlite3_value_bytes(argv[0])+1;
|
||||
|
||||
if( argc==2 ){
|
||||
if( fts3TokenizerEnabled(context) || sqlite3_value_frombind(argv[1]) ){
|
||||
void *pOld;
|
||||
int n = sqlite3_value_bytes(argv[1]);
|
||||
if( zName==0 || n!=sizeof(pPtr) ){
|
||||
sqlite3_result_error(context, "argument type mismatch", -1);
|
||||
return;
|
||||
}
|
||||
pPtr = *(void **)sqlite3_value_blob(argv[1]);
|
||||
pOld = sqlite3Fts3HashInsert(pHash, (void *)zName, nName, pPtr);
|
||||
if( pOld==pPtr ){
|
||||
sqlite3_result_error(context, "out of memory", -1);
|
||||
}
|
||||
}else{
|
||||
sqlite3_result_error(context, "fts3tokenize disabled", -1);
|
||||
return;
|
||||
}
|
||||
}else{
|
||||
if( zName ){
|
||||
pPtr = sqlite3Fts3HashFind(pHash, zName, nName);
|
||||
}
|
||||
if( !pPtr ){
|
||||
char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
|
||||
sqlite3_result_error(context, zErr, -1);
|
||||
sqlite3_free(zErr);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if( fts3TokenizerEnabled(context) || sqlite3_value_frombind(argv[0]) ){
|
||||
sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
|
||||
}
|
||||
}
|
||||
|
||||
int sqlite3Fts3IsIdChar(char c){
|
||||
static const char isFtsIdChar[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
|
||||
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
|
||||
};
|
||||
return (c&0x80 || isFtsIdChar[(int)(c)]);
|
||||
}
|
||||
|
||||
const char *sqlite3Fts3NextToken(const char *zStr, int *pn){
|
||||
const char *z1;
|
||||
const char *z2 = 0;
|
||||
|
||||
/* Find the start of the next token. */
|
||||
z1 = zStr;
|
||||
while( z2==0 ){
|
||||
char c = *z1;
|
||||
switch( c ){
|
||||
case '\0': return 0; /* No more tokens here */
|
||||
case '\'':
|
||||
case '"':
|
||||
case '`': {
|
||||
z2 = z1;
|
||||
while( *++z2 && (*z2!=c || *++z2==c) );
|
||||
break;
|
||||
}
|
||||
case '[':
|
||||
z2 = &z1[1];
|
||||
while( *z2 && z2[0]!=']' ) z2++;
|
||||
if( *z2 ) z2++;
|
||||
break;
|
||||
|
||||
default:
|
||||
if( sqlite3Fts3IsIdChar(*z1) ){
|
||||
z2 = &z1[1];
|
||||
while( sqlite3Fts3IsIdChar(*z2) ) z2++;
|
||||
}else{
|
||||
z1++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*pn = (int)(z2-z1);
|
||||
return z1;
|
||||
}
|
||||
|
||||
int sqlite3Fts3InitTokenizer(
|
||||
Fts3Hash *pHash, /* Tokenizer hash table */
|
||||
const char *zArg, /* Tokenizer name */
|
||||
sqlite3_tokenizer **ppTok, /* OUT: Tokenizer (if applicable) */
|
||||
char **pzErr /* OUT: Set to malloced error message */
|
||||
){
|
||||
int rc;
|
||||
char *z = (char *)zArg;
|
||||
int n = 0;
|
||||
char *zCopy;
|
||||
char *zEnd; /* Pointer to nul-term of zCopy */
|
||||
sqlite3_tokenizer_module *m;
|
||||
|
||||
zCopy = sqlite3_mprintf("%s", zArg);
|
||||
if( !zCopy ) return SQLITE_NOMEM;
|
||||
zEnd = &zCopy[strlen(zCopy)];
|
||||
|
||||
z = (char *)sqlite3Fts3NextToken(zCopy, &n);
|
||||
if( z==0 ){
|
||||
assert( n==0 );
|
||||
z = zCopy;
|
||||
}
|
||||
z[n] = '\0';
|
||||
sqlite3Fts3Dequote(z);
|
||||
|
||||
m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash,z,(int)strlen(z)+1);
|
||||
if( !m ){
|
||||
sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", z);
|
||||
rc = SQLITE_ERROR;
|
||||
}else{
|
||||
char const **aArg = 0;
|
||||
int iArg = 0;
|
||||
z = &z[n+1];
|
||||
while( z<zEnd && (NULL!=(z = (char *)sqlite3Fts3NextToken(z, &n))) ){
|
||||
sqlite3_int64 nNew = sizeof(char *)*(iArg+1);
|
||||
char const **aNew = (const char **)sqlite3_realloc64((void *)aArg, nNew);
|
||||
if( !aNew ){
|
||||
sqlite3_free(zCopy);
|
||||
sqlite3_free((void *)aArg);
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
aArg = aNew;
|
||||
aArg[iArg++] = z;
|
||||
z[n] = '\0';
|
||||
sqlite3Fts3Dequote(z);
|
||||
z = &z[n+1];
|
||||
}
|
||||
rc = m->xCreate(iArg, aArg, ppTok);
|
||||
assert( rc!=SQLITE_OK || *ppTok );
|
||||
if( rc!=SQLITE_OK ){
|
||||
sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer");
|
||||
}else{
|
||||
(*ppTok)->pModule = m;
|
||||
}
|
||||
sqlite3_free((void *)aArg);
|
||||
}
|
||||
|
||||
sqlite3_free(zCopy);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
#ifdef SQLITE_TEST
|
||||
|
||||
#if defined(INCLUDE_SQLITE_TCL_H)
|
||||
# include "sqlite_tcl.h"
|
||||
#else
|
||||
# include "tcl.h"
|
||||
#endif
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
** Implementation of a special SQL scalar function for testing tokenizers
|
||||
** designed to be used in concert with the Tcl testing framework. This
|
||||
** function must be called with two or more arguments:
|
||||
**
|
||||
** SELECT <function-name>(<key-name>, ..., <input-string>);
|
||||
**
|
||||
** where <function-name> is the name passed as the second argument
|
||||
** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer')
|
||||
** concatenated with the string '_test' (e.g. 'fts3_tokenizer_test').
|
||||
**
|
||||
** The return value is a string that may be interpreted as a Tcl
|
||||
** list. For each token in the <input-string>, three elements are
|
||||
** added to the returned list. The first is the token position, the
|
||||
** second is the token text (folded, stemmed, etc.) and the third is the
|
||||
** substring of <input-string> associated with the token. For example,
|
||||
** using the built-in "simple" tokenizer:
|
||||
**
|
||||
** SELECT fts_tokenizer_test('simple', 'I don't see how');
|
||||
**
|
||||
** will return the string:
|
||||
**
|
||||
** "{0 i I 1 dont don't 2 see see 3 how how}"
|
||||
**
|
||||
*/
|
||||
static void testFunc(
|
||||
sqlite3_context *context,
|
||||
int argc,
|
||||
sqlite3_value **argv
|
||||
){
|
||||
Fts3Hash *pHash;
|
||||
sqlite3_tokenizer_module *p;
|
||||
sqlite3_tokenizer *pTokenizer = 0;
|
||||
sqlite3_tokenizer_cursor *pCsr = 0;
|
||||
|
||||
const char *zErr = 0;
|
||||
|
||||
const char *zName;
|
||||
int nName;
|
||||
const char *zInput;
|
||||
int nInput;
|
||||
|
||||
const char *azArg[64];
|
||||
|
||||
const char *zToken;
|
||||
int nToken = 0;
|
||||
int iStart = 0;
|
||||
int iEnd = 0;
|
||||
int iPos = 0;
|
||||
int i;
|
||||
|
||||
Tcl_Obj *pRet;
|
||||
|
||||
if( argc<2 ){
|
||||
sqlite3_result_error(context, "insufficient arguments", -1);
|
||||
return;
|
||||
}
|
||||
|
||||
nName = sqlite3_value_bytes(argv[0]);
|
||||
zName = (const char *)sqlite3_value_text(argv[0]);
|
||||
nInput = sqlite3_value_bytes(argv[argc-1]);
|
||||
zInput = (const char *)sqlite3_value_text(argv[argc-1]);
|
||||
|
||||
pHash = (Fts3Hash *)sqlite3_user_data(context);
|
||||
p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1);
|
||||
|
||||
if( !p ){
|
||||
char *zErr2 = sqlite3_mprintf("unknown tokenizer: %s", zName);
|
||||
sqlite3_result_error(context, zErr2, -1);
|
||||
sqlite3_free(zErr2);
|
||||
return;
|
||||
}
|
||||
|
||||
pRet = Tcl_NewObj();
|
||||
Tcl_IncrRefCount(pRet);
|
||||
|
||||
for(i=1; i<argc-1; i++){
|
||||
azArg[i-1] = (const char *)sqlite3_value_text(argv[i]);
|
||||
}
|
||||
|
||||
if( SQLITE_OK!=p->xCreate(argc-2, azArg, &pTokenizer) ){
|
||||
zErr = "error in xCreate()";
|
||||
goto finish;
|
||||
}
|
||||
pTokenizer->pModule = p;
|
||||
if( sqlite3Fts3OpenTokenizer(pTokenizer, 0, zInput, nInput, &pCsr) ){
|
||||
zErr = "error in xOpen()";
|
||||
goto finish;
|
||||
}
|
||||
|
||||
while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
|
||||
Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
|
||||
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
|
||||
zToken = &zInput[iStart];
|
||||
nToken = iEnd-iStart;
|
||||
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
|
||||
}
|
||||
|
||||
if( SQLITE_OK!=p->xClose(pCsr) ){
|
||||
zErr = "error in xClose()";
|
||||
goto finish;
|
||||
}
|
||||
if( SQLITE_OK!=p->xDestroy(pTokenizer) ){
|
||||
zErr = "error in xDestroy()";
|
||||
goto finish;
|
||||
}
|
||||
|
||||
finish:
|
||||
if( zErr ){
|
||||
sqlite3_result_error(context, zErr, -1);
|
||||
}else{
|
||||
sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT);
|
||||
}
|
||||
Tcl_DecrRefCount(pRet);
|
||||
}
|
||||
|
||||
static
|
||||
int registerTokenizer(
|
||||
sqlite3 *db,
|
||||
char *zName,
|
||||
const sqlite3_tokenizer_module *p
|
||||
){
|
||||
int rc;
|
||||
sqlite3_stmt *pStmt;
|
||||
const char zSql[] = "SELECT fts3_tokenizer(?, ?)";
|
||||
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
|
||||
sqlite3_step(pStmt);
|
||||
|
||||
return sqlite3_finalize(pStmt);
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
int queryTokenizer(
|
||||
sqlite3 *db,
|
||||
char *zName,
|
||||
const sqlite3_tokenizer_module **pp
|
||||
){
|
||||
int rc;
|
||||
sqlite3_stmt *pStmt;
|
||||
const char zSql[] = "SELECT fts3_tokenizer(?)";
|
||||
|
||||
*pp = 0;
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
|
||||
if( SQLITE_ROW==sqlite3_step(pStmt) ){
|
||||
if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB
|
||||
&& sqlite3_column_bytes(pStmt, 0)==sizeof(*pp)
|
||||
){
|
||||
memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
|
||||
}
|
||||
}
|
||||
|
||||
return sqlite3_finalize(pStmt);
|
||||
}
|
||||
|
||||
void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
|
||||
|
||||
/*
|
||||
** Implementation of the scalar function fts3_tokenizer_internal_test().
|
||||
** This function is used for testing only, it is not included in the
|
||||
** build unless SQLITE_TEST is defined.
|
||||
**
|
||||
** The purpose of this is to test that the fts3_tokenizer() function
|
||||
** can be used as designed by the C-code in the queryTokenizer and
|
||||
** registerTokenizer() functions above. These two functions are repeated
|
||||
** in the README.tokenizer file as an example, so it is important to
|
||||
** test them.
|
||||
**
|
||||
** To run the tests, evaluate the fts3_tokenizer_internal_test() scalar
|
||||
** function with no arguments. An assert() will fail if a problem is
|
||||
** detected. i.e.:
|
||||
**
|
||||
** SELECT fts3_tokenizer_internal_test();
|
||||
**
|
||||
*/
|
||||
static void intTestFunc(
|
||||
sqlite3_context *context,
|
||||
int argc,
|
||||
sqlite3_value **argv
|
||||
){
|
||||
int rc;
|
||||
const sqlite3_tokenizer_module *p1;
|
||||
const sqlite3_tokenizer_module *p2;
|
||||
sqlite3 *db = (sqlite3 *)sqlite3_user_data(context);
|
||||
|
||||
UNUSED_PARAMETER(argc);
|
||||
UNUSED_PARAMETER(argv);
|
||||
|
||||
/* Test the query function */
|
||||
sqlite3Fts3SimpleTokenizerModule(&p1);
|
||||
rc = queryTokenizer(db, "simple", &p2);
|
||||
assert( rc==SQLITE_OK );
|
||||
assert( p1==p2 );
|
||||
rc = queryTokenizer(db, "nosuchtokenizer", &p2);
|
||||
assert( rc==SQLITE_ERROR );
|
||||
assert( p2==0 );
|
||||
assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") );
|
||||
|
||||
/* Test the storage function */
|
||||
if( fts3TokenizerEnabled(context) ){
|
||||
rc = registerTokenizer(db, "nosuchtokenizer", p1);
|
||||
assert( rc==SQLITE_OK );
|
||||
rc = queryTokenizer(db, "nosuchtokenizer", &p2);
|
||||
assert( rc==SQLITE_OK );
|
||||
assert( p2==p1 );
|
||||
}
|
||||
|
||||
sqlite3_result_text(context, "ok", -1, SQLITE_STATIC);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Set up SQL objects in database db used to access the contents of
|
||||
** the hash table pointed to by argument pHash. The hash table must
|
||||
** been initialized to use string keys, and to take a private copy
|
||||
** of the key when a value is inserted. i.e. by a call similar to:
|
||||
**
|
||||
** sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1);
|
||||
**
|
||||
** This function adds a scalar function (see header comment above
|
||||
** fts3TokenizerFunc() in this file for details) and, if ENABLE_TABLE is
|
||||
** defined at compilation time, a temporary virtual table (see header
|
||||
** comment above struct HashTableVtab) to the database schema. Both
|
||||
** provide read/write access to the contents of *pHash.
|
||||
**
|
||||
** The third argument to this function, zName, is used as the name
|
||||
** of both the scalar and, if created, the virtual table.
|
||||
*/
|
||||
int sqlite3Fts3InitHashTable(
|
||||
sqlite3 *db,
|
||||
Fts3Hash *pHash,
|
||||
const char *zName
|
||||
){
|
||||
int rc = SQLITE_OK;
|
||||
void *p = (void *)pHash;
|
||||
const int any = SQLITE_UTF8|SQLITE_DIRECTONLY;
|
||||
|
||||
#ifdef SQLITE_TEST
|
||||
char *zTest = 0;
|
||||
char *zTest2 = 0;
|
||||
void *pdb = (void *)db;
|
||||
zTest = sqlite3_mprintf("%s_test", zName);
|
||||
zTest2 = sqlite3_mprintf("%s_internal_test", zName);
|
||||
if( !zTest || !zTest2 ){
|
||||
rc = SQLITE_NOMEM;
|
||||
}
|
||||
#endif
|
||||
|
||||
if( SQLITE_OK==rc ){
|
||||
rc = sqlite3_create_function(db, zName, 1, any, p, fts3TokenizerFunc, 0, 0);
|
||||
}
|
||||
if( SQLITE_OK==rc ){
|
||||
rc = sqlite3_create_function(db, zName, 2, any, p, fts3TokenizerFunc, 0, 0);
|
||||
}
|
||||
#ifdef SQLITE_TEST
|
||||
if( SQLITE_OK==rc ){
|
||||
rc = sqlite3_create_function(db, zTest, -1, any, p, testFunc, 0, 0);
|
||||
}
|
||||
if( SQLITE_OK==rc ){
|
||||
rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SQLITE_TEST
|
||||
sqlite3_free(zTest);
|
||||
sqlite3_free(zTest2);
|
||||
#endif
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue