Index: src/db.c ================================================================== --- src/db.c +++ src/db.c @@ -65,14 +65,10 @@ */ static void db_err(const char *zFormat, ...){ va_list ap; char *z; int rc = 1; - static const char zRebuildMsg[] = - "If you have recently updated your fossil executable, you might\n" - "need to run \"fossil all rebuild\" to bring the repository\n" - "schemas up to date.\n"; va_start(ap, zFormat); z = vmprintf(zFormat, ap); va_end(ap); #ifdef FOSSIL_ENABLE_JSON if( g.json.isJsonMode ){ @@ -88,15 +84,14 @@ @ error Database\serror:\s%F(z) cgi_reply(); } else if( g.cgiOutput ){ g.cgiOutput = 0; - cgi_printf("

Database Error

\n" - "
%h
\n

%s

\n", z, zRebuildMsg); + cgi_printf("

Database Error

\n

%h

\n", z); cgi_reply(); }else{ - fprintf(stderr, "%s: %s\n\n%s", g.argv[0], z, zRebuildMsg); + fprintf(stderr, "%s: %s\n", g.argv[0], z); } free(z); db_force_rollback(); fossil_exit(rc); } Index: src/main.mk ================================================================== --- src/main.mk +++ src/main.mk @@ -491,11 +491,11 @@ $(OBJDIR)/cson_amalgamation.o $(APPNAME): $(OBJDIR)/headers $(OBJDIR)/codecheck1 $(OBJ) $(EXTRAOBJ) $(OBJDIR)/codecheck1 $(TRANS_SRC) - $(TCC) -o $(APPNAME) $(OBJ) $(EXTRAOBJ) $(LIB) + $(TCC) $(CFLAGS) -o $(APPNAME) $(OBJ) $(EXTRAOBJ) $(LIB) # This rule prevents make from using its default rules to try build # an executable named "manifest" out of the file named "manifest.c" # $(SRCDIR)/../manifest: Index: src/search.c ================================================================== --- src/search.c +++ src/search.c @@ -213,11 +213,11 @@ aiLastDoc[j] = iDoc; aiLastOfst[j] = i; for(k=1; j-k>=0 && anMatch[j-k] && aiWordIdx[j-k]==iWord-k; k++){} for(ii=0; ii>= 1; } + return n; +} /* ** Implemenation of the rank() function used with rank(matchinfo(*,'pcsx')). */ static void search_rank_sqlfunc( @@ -694,24 +743,45 @@ int argc, sqlite3_value **argv ){ const unsigned *aVal = (unsigned int*)sqlite3_value_blob(argv[0]); int nVal = sqlite3_value_bytes(argv[0])/4; + int nCol; /* Number of columns in the index */ int nTerm; /* Number of search terms in the query */ - int i; /* Loop counter */ - double r = 1.0; /* Score */ + int i, j; /* Loop counter */ + double r = 0.0; /* Score */ + const unsigned *aX, *aS; - if( nVal<6 ) return; - if( aVal[1]!=1 ) return; + if( nVal<2 ) return; nTerm = aVal[0]; - r *= 1<<((30*(aVal[2]-1))/nTerm); - for(i=1; i<=nTerm; i++){ - int hits_this_row = aVal[3*i]; - int hits_all_rows = aVal[3*i+1]; - int rows_with_hit = aVal[3*i+2]; - double avg_hits_per_row = (double)hits_all_rows/(double)rows_with_hit; - r *= hits_this_row/avg_hits_per_row; + nCol = aVal[1]; + if( nVal<2+3*nCol*nTerm+nCol ) return; + aS = aVal+2; + aX = aS+nCol; + for(j=0; j0 ){ + x = 0.0; + for(i=0; i','',' ... ',-1,35)" " FROM ftsidx CROSS JOIN ftsdocs" " WHERE ftsidx MATCH %Q" " AND ftsdocs.rowid=ftsidx.docid", @@ -838,29 +909,30 @@ ** ** Return the number of rows. */ int search_run_and_output( const char *zPattern, /* The query pattern */ - unsigned int srchFlags /* What to search over */ + unsigned int srchFlags, /* What to search over */ + int fDebug /* Extra debugging output */ ){ Stmt q; int nRow = 0; srchFlags = search_restrict(srchFlags); if( srchFlags==0 ) return 0; search_sql_setup(g.db); add_content_sql_commands(g.db); db_multi_exec( - "CREATE TEMP TABLE x(label,url,score,date,snip);" + "CREATE TEMP TABLE x(label,url,score,id,date,snip);" ); if( !search_index_exists() ){ search_fullscan(zPattern, srchFlags); }else{ search_update_index(srchFlags); search_indexed(zPattern, srchFlags); } - db_prepare(&q, "SELECT url, snip, label" + db_prepare(&q, "SELECT url, snip, label, score, id" " FROM x" " ORDER BY score DESC, date DESC;"); while( db_step(&q)==SQLITE_ROW ){ const char *zUrl = db_column_text(&q, 0); const char *zSnippet = db_column_text(&q, 1); @@ -867,12 +939,15 @@ const char *zLabel = db_column_text(&q, 2); if( nRow==0 ){ @
    } nRow++; - @
  1. %h(zLabel)
    - @ %z(cleanSnippet(zSnippet))

  2. + @
  3. %h(zLabel) + if( fDebug ){ + @ (%e(db_column_double(&q,3)), %s(db_column_text(&q,4))) + } + @
    %z(cleanSnippet(zSnippet))

  4. } db_finalize(&q); if( nRow ){ @
} @@ -900,10 +975,11 @@ const char *zType = 0; const char *zClass = 0; const char *zDisable1; const char *zDisable2; const char *zPattern; + int fDebug = PB("debug"); srchFlags = search_restrict(srchFlags); switch( srchFlags ){ case SRCH_CKIN: zType = " Check-ins"; zClass = "Ckin"; break; case SRCH_DOC: zType = " Docs"; zClass = "Doc"; break; case SRCH_TKT: zType = " Tickets"; zClass = "Tkt"; break; @@ -947,10 +1023,13 @@ cgi_printf(">%s\n", aY[i].zNm); } @ srchFlags = newFlags; } + if( fDebug ){ + @ + } @ if( srchFlags==0 ){ @

Search is disabled

} @ @@ -959,11 +1038,11 @@ if( zClass ){ @
}else{ @
} - if( search_run_and_output(zPattern, srchFlags)==0 ){ + if( search_run_and_output(zPattern, srchFlags, fDebug)==0 ){ @

No matches for: %h(zPattern)

} @
} } @@ -983,10 +1062,14 @@ /* ** This is a helper function for search_stext(). Writing into pOut ** the search text obtained from pIn according to zMimetype. +** +** The title of the document is the first line of text. All subsequent +** lines are the body. If the document has no title, the first line +** is blank. */ static void get_stext_by_mimetype( Blob *pIn, const char *zMimetype, Blob *pOut @@ -994,41 +1077,74 @@ Blob html, title; blob_init(&html, 0, 0); blob_init(&title, 0, 0); if( zMimetype==0 ) zMimetype = "text/plain"; if( fossil_strcmp(zMimetype,"text/x-fossil-wiki")==0 ){ - wiki_convert(pIn, &html, 0); + Blob tail; + blob_init(&tail, 0, 0); + if( wiki_find_title(pIn, &title, &tail) ){ + blob_appendf(pOut, "%s\n", blob_str(&title)); + wiki_convert(&tail, &html, 0); + blob_reset(&tail); + }else{ + blob_append(pOut, "\n", 1); + wiki_convert(pIn, &html, 0); + } html_to_plaintext(blob_str(&html), pOut); }else if( fossil_strcmp(zMimetype,"text/x-markdown")==0 ){ markdown_to_html(pIn, &title, &html); + if( blob_size(&title) ){ + blob_appendf(pOut, "%s\n", blob_str(&title)); + }else{ + blob_append(pOut, "\n", 1); + } html_to_plaintext(blob_str(&html), pOut); }else if( fossil_strcmp(zMimetype,"text/html")==0 ){ + if( doc_is_embedded_html(pIn, &title) ){ + blob_appendf(pOut, "%s\n", blob_str(&title)); + } html_to_plaintext(blob_str(pIn), pOut); }else{ - *pOut = *pIn; - blob_init(pIn, 0, 0); + blob_append(pOut, blob_buffer(pIn), blob_size(pIn)); } blob_reset(&html); blob_reset(&title); } /* ** Query pQuery is pointing at a single row of output. Append a text ** representation of every text-compatible column to pAccum. */ -static void append_all_ticket_fields(Blob *pAccum, Stmt *pQuery){ +static void append_all_ticket_fields(Blob *pAccum, Stmt *pQuery, int iTitle){ int n = db_column_count(pQuery); int i; + const char *zMime = 0; + if( iTitle>=0 && iTitle0)" " ||')'" " FROM event WHERE objid=:x AND type='ci'"); + if( isPlainText<0 ){ + isPlainText = db_get_boolean("timeline-plaintext",0); + } db_bind_int(&q, ":x", rid); if( db_step(&q)==SQLITE_ROW ){ - db_column_blob(&q, 0, pOut); blob_append(pOut, "\n", 1); + if( isPlainText ){ + db_column_blob(&q, 0, pOut); + }else{ + Blob x; + blob_init(&x,0,0); + db_column_blob(&q, 0, &x); + get_stext_by_mimetype(&x, "text/x-fossil-wiki", pOut); + blob_reset(&x); + } } db_reset(&q); break; } case 't': { /* Tickets */ static Stmt q1; - Blob raw; + static int iTitle = -1; db_static_prepare(&q1, "SELECT * FROM ticket WHERE tkt_id=:rid"); - blob_init(&raw,0,0); db_bind_int(&q1, ":rid", rid); if( db_step(&q1)==SQLITE_ROW ){ - append_all_ticket_fields(&raw, &q1); + if( iTitle<0 ){ + int n = db_column_count(&q1); + for(iTitle=0; iTitle0 ){ + blob_reset(&cache.stext); + }else{ + blob_init(&cache.stext,0,0); + } + cache.cType = cType; + cache.rid = rid; + if( cType==0 ) return 0; + search_stext(cType, rid, zName, &cache.stext); + z = blob_str(&cache.stext); + for(i=0; z[i] && z[i]!='\n'; i++){} + cache.nTitle = i; + } + if( pnTitle ) *pnTitle = cache.nTitle; + return blob_str(&cache.stext); +} /* ** COMMAND: test-search-stext ** ** Usage: fossil test-search-stext TYPE ARG1 ARG2 @@ -1131,10 +1303,30 @@ if( g.argc!=5 ) usage("TYPE RID NAME"); search_stext(g.argv[2][0], atoi(g.argv[3]), g.argv[4], &out); fossil_print("%s\n",blob_str(&out)); blob_reset(&out); } + +/* +** COMMAND: test-convert-stext +** +** Usage: fossil test-convert-stext FILE MIMETYPE +** +** Read the content of FILE and convert it to stext according to MIMETYPE. +** Send the result to standard output. +*/ +void test_convert_stext(void){ + Blob in, out; + db_find_and_open_repository(0,0); + if( g.argc!=4 ) usage("FILENAME MIMETYPE"); + blob_read_from_file(&in, g.argv[2]); + blob_init(&out, 0, 0); + get_stext_by_mimetype(&in, g.argv[3], &out); + fossil_print("%s\n",blob_str(&out)); + blob_reset(&in); + blob_reset(&out); +} /* The schema for the full-text index */ static const char zFtsSchema[] = @ -- One entry for each possible search result @@ -1145,20 +1337,21 @@ @ name TEXT, -- Additional document description @ idxed BOOLEAN, -- True if currently in the index @ label TEXT, -- Label to print on search results @ url TEXT, -- URL to access this document @ mtime DATE, -- Date when document created +@ bx TEXT, -- Temporary "body" content cache @ UNIQUE(type,rid) @ ); @ CREATE INDEX "%w".ftsdocIdxed ON ftsdocs(type,rid,name) WHERE idxed==0; @ CREATE INDEX "%w".ftsdocName ON ftsdocs(name) WHERE type='w'; @ CREATE VIEW IF NOT EXISTS "%w".ftscontent AS @ SELECT rowid, type, rid, name, idxed, label, url, mtime, -@ stext(type,rid,name) AS 'stext' +@ title(type,rid,name) AS 'title', body(type,rid,name) AS 'body' @ FROM ftsdocs; @ CREATE VIRTUAL TABLE IF NOT EXISTS "%w".ftsidx -@ USING fts4(content="ftscontent", stext); +@ USING fts4(content="ftscontent", title, body%s); ; static const char zFtsDrop[] = @ DROP TABLE IF EXISTS "%w".ftsidx; @ DROP VIEW IF EXISTS "%w".ftscontent; @ DROP TABLE IF EXISTS "%w".ftsdocs; @@ -1168,13 +1361,15 @@ ** Create or drop the tables associated with a full-text index. */ static int searchIdxExists = -1; void search_create_index(void){ const char *zDb = db_name("repository"); + int useStemmer = db_get_boolean("search-stemmer",0); + const char *zExtra = useStemmer ? ",tokenize=porter" : ""; search_sql_setup(g.db); - db_multi_exec(zFtsSchema/*works-like:"%w%w%w%w%w"*/, - zDb, zDb, zDb, zDb, zDb); + db_multi_exec(zFtsSchema/*works-like:"%w%w%w%w%w%s"*/, + zDb, zDb, zDb, zDb, zDb, zExtra/*safe-for-%s*/); searchIdxExists = 1; } void search_drop_index(void){ const char *zDb = db_name("repository"); db_multi_exec(zFtsDrop/*works-like:"%w%w%w"*/, zDb, zDb, zDb); @@ -1292,34 +1487,39 @@ db_multi_exec( "DELETE FROM ftsdocs WHERE type='d'" " AND rid NOT IN (SELECT rid FROM current_docs)" ); db_multi_exec( - "INSERT OR IGNORE INTO ftsdocs(type,rid,name,idxed,label,url,mtime)" + "INSERT OR IGNORE INTO ftsdocs(type,rid,name,idxed,label,bx,url,mtime)" " SELECT 'd', rid, name, 0," - " printf('Document: %%s',name)," + " title('d',rid,name)," + " body('d',rid,name)," " printf('/doc/%q/%%s',urlencode(name))," " %.17g" " FROM current_docs", zBrUuid, rTime ); db_multi_exec( - "INSERT INTO ftsidx(docid,stext)" - " SELECT rowid, stext FROM ftscontent WHERE type='d' AND NOT idxed" + "INSERT INTO ftsidx(docid,title,body)" + " SELECT rowid, label, bx FROM ftsdocs WHERE type='d' AND NOT idxed" ); db_multi_exec( - "UPDATE ftsdocs SET idxed=1 WHERE type='d' AND NOT idxed" + "UPDATE ftsdocs SET" + " idxed=1," + " bx=NULL," + " label='Document: '||label" + " WHERE type='d' AND NOT idxed" ); } /* ** Deal with all of the unindexed 'c' terms in FTSDOCS */ static void search_update_checkin_index(void){ db_multi_exec( - "INSERT INTO ftsidx(docid,stext)" - " SELECT rowid, stext('c',rid,NULL) FROM ftsdocs" + "INSERT INTO ftsidx(docid,title,body)" + " SELECT rowid, '', body('c',rid,NULL) FROM ftsdocs" " WHERE type='c' AND NOT idxed;" ); db_multi_exec( "REPLACE INTO ftsdocs(rowid,idxed,type,rid,name,label,url,mtime)" " SELECT ftsdocs.rowid, 1, 'c', ftsdocs.rid, NULL," @@ -1336,19 +1536,20 @@ /* ** Deal with all of the unindexed 't' terms in FTSDOCS */ static void search_update_ticket_index(void){ db_multi_exec( - "INSERT INTO ftsidx(docid,stext)" - " SELECT rowid, stext('t',rid,NULL) FROM ftsdocs" + "INSERT INTO ftsidx(docid,title,body)" + " SELECT rowid, title('t',rid,NULL), body('t',rid,NULL) FROM ftsdocs" " WHERE type='t' AND NOT idxed;" ); if( db_changes()==0 ) return; db_multi_exec( "REPLACE INTO ftsdocs(rowid,idxed,type,rid,name,label,url,mtime)" " SELECT ftsdocs.rowid, 1, 't', ftsdocs.rid, NULL," - " printf('Ticket [%%.16s] on %%s',tkt_uuid,datetime(tkt_mtime))," + " printf('Ticket: %%s (%%s)',title('t',tkt_id,null)," + " datetime(tkt_mtime))," " printf('/tktview/%%.20s',tkt_uuid)," " tkt_mtime" " FROM ftsdocs, ticket" " WHERE ftsdocs.type='t' AND NOT ftsdocs.idxed" " AND ticket.tkt_id=ftsdocs.rid" @@ -1358,12 +1559,12 @@ /* ** Deal with all of the unindexed 'w' terms in FTSDOCS */ static void search_update_wiki_index(void){ db_multi_exec( - "INSERT INTO ftsidx(docid,stext)" - " SELECT rowid, stext('w',rid,NULL) FROM ftsdocs" + "INSERT INTO ftsidx(docid,title,body)" + " SELECT rowid, title('w',rid,NULL),body('w',rid,NULL) FROM ftsdocs" " WHERE type='w' AND NOT idxed;" ); if( db_changes()==0 ) return; db_multi_exec( "REPLACE INTO ftsdocs(rowid,idxed,type,rid,name,label,url,mtime)" @@ -1416,19 +1617,22 @@ ** Usage: fossil fts-config ?SUBCOMMAND? ?ARGUMENT? ** ** The "fossil fts-config" command configures the full-text search capabilities ** of the repository. Subcommands: ** -** reindex Rebuild the search index. Create it if it does -** not already exist +** reindex Rebuild the search index. This is a no-op if +** index search is disabled ** ** index (on|off) Turn the search index on or off ** ** enable cdtw Enable various kinds of search. c=Check-ins, ** d=Documents, t=Tickets, w=Wiki. ** ** disable cdtw Disable versious kinds of search +** +** stemmer (on|off) Turn the Porter stemmer on or off for indexed +** search. (Unindexed search is never stemmed.) ** ** The current search settings are displayed after any changes are applied. ** Run this command with no arguments to simply see the settings. */ void test_fts_cmd(void){ @@ -1435,16 +1639,17 @@ static const struct { int iCmd; const char *z; } aCmd[] = { { 1, "reindex" }, { 2, "index" }, { 3, "disable" }, { 4, "enable" }, + { 5, "stemmer" }, }; static const struct { char *zSetting; char *zName; char *zSw; } aSetng[] = { - { "search-ckin", "check-in search:", "c" }, - { "search-doc", "document search:", "d" }, - { "search-tkt", "ticket search:", "t" }, - { "search-wiki", "wiki search:", "w" }, + { "search-ckin", "check-in search:", "c" }, + { "search-doc", "document search:", "d" }, + { "search-tkt", "ticket search:", "t" }, + { "search-wiki", "wiki search:", "w" }, }; char *zSubCmd; int i, j, n; int iCmd = 0; int iAction = 0; @@ -1464,11 +1669,11 @@ return; } iCmd = aCmd[i].iCmd; } if( iCmd==1 ){ - iAction = 2; + if( search_index_exists() ) iAction = 2; } if( iCmd==2 ){ if( g.argc<3 ) usage("index (on|off)"); iAction = 1 + is_truth(g.argv[3]); } @@ -1475,18 +1680,23 @@ db_begin_transaction(); /* Adjust search settings */ if( iCmd==3 || iCmd==4 ){ const char *zCtrl; - if( g.argc<4 ) usage("enable STRING"); + if( g.argc<4 ) usage(mprintf("%s STRING",zSubCmd)); zCtrl = g.argv[3]; for(j=0; j=1 ){ search_drop_index(); } @@ -1497,14 +1707,16 @@ /* Always show the status before ending */ for(i=0; iCurrently using an SQLite FTS4 search index. This makes search @ run faster, especially on large repositories, but takes up space.

+ onoff_attribute("Use Porter Stemmer","search-stemmer","ss",0,0); @

@ }else{ @

The SQLite FTS4 search index is disabled. All searching will be @ a full-text scan. This usually works fine, but can be slow for @ larger repositories.

+ onoff_attribute("Use Porter Stemmer","search-stemmer","ss",0,0); @

} @

style_footer(); } Index: src/wikiformat.c ================================================================== --- src/wikiformat.c +++ src/wikiformat.c @@ -1965,17 +1965,26 @@ ** z points to the start of a token. Return the number of ** characters in that token. */ static int nextHtmlToken(const char *z){ int n; - if( z[0]=='<' ){ + char c; + if( (c=z[0])=='<' ){ n = markupLength(z); if( n<=0 ) n = 1; - }else if( fossil_isspace(z[0]) ){ + }else if( fossil_isspace(c) ){ for(n=1; z[n] && fossil_isspace(z[n]); n++){} + }else if( c=='&' ){ + n = z[1]=='#' ? 2 : 1; + while( fossil_isalnum(z[n]) ) n++; + if( z[n]==';' ) n++; }else{ - for(n=1; z[n] && z[n]!='<' && !fossil_isspace(z[n]); n++){} + n = 1; + for(n=1; 1; n++){ + if( (c = z[n]) > '<' ) continue; + if( c=='<' || c=='&' || fossil_isspace(c) || c==0 ) break; + } } return n; } /* @@ -2100,16 +2109,22 @@ } /* ** Remove all HTML markup from the input text. The output written into ** pOut is pure text. +** +** Put the title on the first line, if there is any markup. +** If there is no <title>, then create a blank first line. */ void html_to_plaintext(const char *zIn, Blob *pOut){ int n; int i, j; + int inTitle = 0; /* True between <title>... */ + int seenText = 0; /* True after first non-whitespace seen */ int nNL = 0; /* Number of \n characters at the end of pOut */ int nWS = 0; /* True if pOut ends with whitespace */ + while( fossil_isspace(zIn[0]) ) zIn++; while( zIn[0] ){ n = nextHtmlToken(zIn); if( zIn[0]=='<' && n>1 ){ int isCloseTag; int eTag; @@ -2130,26 +2145,66 @@ zIn += n; } if( zIn[0]=='<' ) zIn += n; continue; } - if( !isCloseTag && (eType & (MUTYPE_BLOCK|MUTYPE_TABLE))!=0 ){ + if( eTag==MARKUP_TITLE ){ + inTitle = !isCloseTag; + } + if( !isCloseTag && seenText && (eType & (MUTYPE_BLOCK|MUTYPE_TABLE))!=0 ){ if( nNL==0 ){ blob_append(pOut, "\n", 1); nNL++; } nWS = 1; } }else if( fossil_isspace(zIn[0]) ){ - for(i=nNL=0; i ' ' within */ + for(i=0; i<n; i++) if( zIn[i]=='\n' ) nNL++; + } + if( !nWS ){ + blob_append(pOut, nNL ? "\n" : " ", 1); + nWS = 1; + } + } + }else if( zIn[0]=='&' ){ + char c = '?'; + if( zIn[1]=='#' ){ + int x = atoi(&zIn[1]); + if( x>0 && x<=127 ) c = x; + }else{ + static const struct { int n; char c; char *z; } aEntity[] = { + { 5, '&', "&" }, + { 4, '<', "<" }, + { 4, '>', ">" }, + { 6, ' ', " " }, + }; + int jj; + for(jj=0; jj<ArraySize(aEntity); jj++){ + if( aEntity[jj].n==n && strncmp(aEntity[jj].z,zIn,n)==0 ){ + c = aEntity[jj].c; + break; + } + } + } + if( fossil_isspace(c) ){ + if( nWS==0 && seenText ) blob_append(pOut, &c, 1); nWS = 1; + nNL = c=='\n'; + }else{ + if( !seenText && !inTitle ) blob_append(pOut, "\n", 1); + seenText = 1; + nNL = nWS = 0; + blob_append(pOut, &c, 1); } }else{ - blob_append(pOut, zIn, n); + if( !seenText && !inTitle ) blob_append(pOut, "\n", 1); + seenText = 1; nNL = nWS = 0; + blob_append(pOut, zIn, n); } zIn += n; } if( nNL==0 ) blob_append(pOut, "\n", 1); }