Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Changes In Branch search-enhancements Excluding Merge-Ins
This is equivalent to a diff from c62e94f8 to 9f67861a
2015-03-09
| ||
04:42 | Always report full UUID for checkin and branch commands since they actually alter the repository and should report a complete, unambiguous UUID. ... (check-in: 153e17c5 user: andybradford tags: trunk) | |
2015-02-14
| ||
15:17 | Enhance /search to distinguish between the title and the body of a document and provide support for the Porter stemmer for indexed search. Improved scoring and snippet presentation. NB: Run "fossil fts-config reindex" when upgrading through this change. ... (check-in: 0f96ffb9 user: drh tags: trunk) | |
15:06 | Improvements to login/logout processing: (1) When the user is "nobody", show hyperlinks to pages that require "anonymous" but have those links redirect to the /login page. (2) Clean up the /login page - less verbage. (3) Redirects from /login to /tarball or /zip provide a button to press, rather than immediately starting the download. ... (check-in: 653dd402 user: drh tags: trunk) | |
12:24 | When the user is "nobody", make the g.anon permission vector for "anonymous" available in addition to g.perm. Hyperlinks to pages that would be available to anonymous are shown rather than suppressed. When permission is denied and control jumps to login_needed() a new flag shows whether or not logging in as "anonymous" would help. Work in progress. ... (check-in: 2f50d427 user: drh tags: login-enhancements) | |
02:12 | Improvements to the ranking function. Add the undocumented "debug" query parameter to /search. ... (Closed-Leaf check-in: 9f67861a user: drh tags: search-enhancements) | |
00:37 | Enabled indexed search with separate title and body and with the option to use the Porter stemmer. ... (check-in: 71295a98 user: drh tags: search-enhancements) | |
2015-02-13
| ||
21:21 | Merge enhancements and fixes from trunk. ... (check-in: 23c86b50 user: drh tags: search-enhancements) | |
09:03 | A few more places where displayed UUID length should be configurable. ... (check-in: c62e94f8 user: jan.nijtmans tags: trunk) | |
09:00 | Take over latest shell.c, but without SQLITE_TESTCTRL_IMPOSTER support (included SQLite doesn't have that yet). Reason: add '.dbinfo' command to "fossil sqlite" 0f65a7e2e0 and fix bug in '.import' 9c5bcad1 ... (check-in: d9648886 user: jan.nijtmans tags: trunk) | |
Changes to src/db.c.
︙ | ︙ | |||
63 64 65 66 67 68 69 | /* ** Call this routine when a database error occurs. */ static void db_err(const char *zFormat, ...){ va_list ap; char *z; int rc = 1; | < < < < | < | | 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 | /* ** Call this routine when a database error occurs. */ static void db_err(const char *zFormat, ...){ va_list ap; char *z; int rc = 1; va_start(ap, zFormat); z = vmprintf(zFormat, ap); va_end(ap); #ifdef FOSSIL_ENABLE_JSON if( g.json.isJsonMode ){ json_err( 0, z, 1 ); if( g.isHTTP ){ rc = 0 /* avoid HTTP 500 */; } } else #endif /* FOSSIL_ENABLE_JSON */ if( g.xferPanic ){ cgi_reset_content(); @ error Database\serror:\s%F(z) cgi_reply(); } else if( g.cgiOutput ){ g.cgiOutput = 0; cgi_printf("<h1>Database Error</h1>\n<p>%h</p>\n", z); cgi_reply(); }else{ fprintf(stderr, "%s: %s\n", g.argv[0], z); } free(z); db_force_rollback(); fossil_exit(rc); } /* |
︙ | ︙ |
Changes to src/main.mk.
︙ | ︙ | |||
489 490 491 492 493 494 495 | $(OBJDIR)/th_lang.o \ $(OBJDIR)/th_tcl.o \ $(OBJDIR)/cson_amalgamation.o $(APPNAME): $(OBJDIR)/headers $(OBJDIR)/codecheck1 $(OBJ) $(EXTRAOBJ) $(OBJDIR)/codecheck1 $(TRANS_SRC) | | | 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 | $(OBJDIR)/th_lang.o \ $(OBJDIR)/th_tcl.o \ $(OBJDIR)/cson_amalgamation.o $(APPNAME): $(OBJDIR)/headers $(OBJDIR)/codecheck1 $(OBJ) $(EXTRAOBJ) $(OBJDIR)/codecheck1 $(TRANS_SRC) $(TCC) $(CFLAGS) -o $(APPNAME) $(OBJ) $(EXTRAOBJ) $(LIB) # This rule prevents make from using its default rules to try build # an executable named "manifest" out of the file named "manifest.c" # $(SRCDIR)/../manifest: # noop |
︙ | ︙ |
Changes to src/search.c.
︙ | ︙ | |||
211 212 213 214 215 216 217 | ){ aiWordIdx[j] = iWord; aiLastDoc[j] = iDoc; aiLastOfst[j] = i; for(k=1; j-k>=0 && anMatch[j-k] && aiWordIdx[j-k]==iWord-k; k++){} for(ii=0; ii<k; ii++){ if( anMatch[j-ii]<k ){ | | | 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 | ){ aiWordIdx[j] = iWord; aiLastDoc[j] = iDoc; aiLastOfst[j] = i; for(k=1; j-k>=0 && anMatch[j-k] && aiWordIdx[j-k]==iWord-k; k++){} for(ii=0; ii<k; ii++){ if( anMatch[j-ii]<k ){ anMatch[j-ii] = k*(nDoc-iDoc); aiBestDoc[j-ii] = aiLastDoc[j-ii]; aiBestOfst[j-ii] = aiLastOfst[j-ii]; } } break; } } |
︙ | ︙ | |||
394 395 396 397 398 399 400 | ** Return non-zero on a match and zero on a miss. */ static void search_match_sqlfunc( sqlite3_context *context, int argc, sqlite3_value **argv ){ | | > > > | > | | 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 | ** Return non-zero on a match and zero on a miss. */ static void search_match_sqlfunc( sqlite3_context *context, int argc, sqlite3_value **argv ){ const char *azDoc[5]; int nDoc; int rc; for(nDoc=0; nDoc<ArraySize(azDoc) && nDoc<argc; nDoc++){ azDoc[nDoc] = (const char*)sqlite3_value_text(argv[nDoc]); if( azDoc[nDoc]==0 ) azDoc[nDoc] = ""; } rc = search_match(&gSearch, nDoc, azDoc); sqlite3_result_int(context, rc); } /* ** These SQL functions return the results of the last ** call to the search_match() SQL function. */ |
︙ | ︙ | |||
433 434 435 436 437 438 439 | ** search_stext() routine for further detail. */ static void search_stext_sqlfunc( sqlite3_context *context, int argc, sqlite3_value **argv ){ | > > > > > > > | > > > > | > | > > > > > > > > > > > > > > > | 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 | ** search_stext() routine for further detail. */ static void search_stext_sqlfunc( sqlite3_context *context, int argc, sqlite3_value **argv ){ const char *zType = (const char*)sqlite3_value_text(argv[0]); int rid = sqlite3_value_int(argv[1]); const char *zName = (const char*)sqlite3_value_text(argv[2]); sqlite3_result_text(context, search_stext_cached(zType[0],rid,zName,0), -1, SQLITE_TRANSIENT); } static void search_title_sqlfunc( sqlite3_context *context, int argc, sqlite3_value **argv ){ const char *zType = (const char*)sqlite3_value_text(argv[0]); int rid = sqlite3_value_int(argv[1]); const char *zName = (const char*)sqlite3_value_text(argv[2]); int nHdr; char *z = search_stext_cached(zType[0], rid, zName, &nHdr); if( nHdr || zType[0]!='d' ){ sqlite3_result_text(context, z, nHdr, SQLITE_TRANSIENT); }else{ sqlite3_result_value(context, argv[2]); } } static void search_body_sqlfunc( sqlite3_context *context, int argc, sqlite3_value **argv ){ const char *zType = (const char*)sqlite3_value_text(argv[0]); int rid = sqlite3_value_int(argv[1]); const char *zName = (const char*)sqlite3_value_text(argv[2]); int nHdr; char *z = search_stext_cached(zType[0], rid, zName, &nHdr); sqlite3_result_text(context, z+nHdr+1, -1, SQLITE_TRANSIENT); } /* ** Encode a string for use as a query parameter in a URL */ static void search_urlencode_sqlfunc( sqlite3_context *context, |
︙ | ︙ | |||
461 462 463 464 465 466 467 | ** Register the "score()" SQL function to score its input text ** using the given Search object. Once this function is registered, ** do not delete the Search object. */ void search_sql_setup(sqlite3 *db){ static int once = 0; if( once++ ) return; | | > > > > | 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 | ** Register the "score()" SQL function to score its input text ** using the given Search object. Once this function is registered, ** do not delete the Search object. */ void search_sql_setup(sqlite3 *db){ static int once = 0; if( once++ ) return; sqlite3_create_function(db, "search_match", -1, SQLITE_UTF8, 0, search_match_sqlfunc, 0, 0); sqlite3_create_function(db, "search_score", 0, SQLITE_UTF8, 0, search_score_sqlfunc, 0, 0); sqlite3_create_function(db, "search_snippet", 0, SQLITE_UTF8, 0, search_snippet_sqlfunc, 0, 0); sqlite3_create_function(db, "search_init", -1, SQLITE_UTF8, 0, search_init_sqlfunc, 0, 0); sqlite3_create_function(db, "stext", 3, SQLITE_UTF8, 0, search_stext_sqlfunc, 0, 0); sqlite3_create_function(db, "title", 3, SQLITE_UTF8, 0, search_title_sqlfunc, 0, 0); sqlite3_create_function(db, "body", 3, SQLITE_UTF8, 0, search_body_sqlfunc, 0, 0); sqlite3_create_function(db, "urlencode", 1, SQLITE_UTF8, 0, search_urlencode_sqlfunc, 0, 0); } /* ** Testing the search function. ** |
︙ | ︙ | |||
614 615 616 617 618 619 620 | char *zDocGlob = db_get("doc-glob",""); char *zDocBr = db_get("doc-branch","trunk"); if( zDocGlob && zDocGlob[0] && zDocBr && zDocBr[0] ){ db_multi_exec( "CREATE VIRTUAL TABLE IF NOT EXISTS temp.foci USING files_of_checkin;" ); db_multi_exec( | | | > > | | > | | > | | | | > | > > > > > > > > > > | | > | < | > > > > > > > | | | | | > > | > > > > > > > > > > > | 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 | char *zDocGlob = db_get("doc-glob",""); char *zDocBr = db_get("doc-branch","trunk"); if( zDocGlob && zDocGlob[0] && zDocBr && zDocBr[0] ){ db_multi_exec( "CREATE VIRTUAL TABLE IF NOT EXISTS temp.foci USING files_of_checkin;" ); db_multi_exec( "INSERT INTO x(label,url,score,id,date,snip)" " SELECT printf('Document: %%s',title('d',blob.rid,foci.filename))," " printf('/doc/%T/%%s',foci.filename)," " search_score()," " 'd'||blob.rid," " (SELECT datetime(event.mtime) FROM event" " WHERE objid=symbolic_name_to_rid('trunk'))," " search_snippet()" " FROM foci CROSS JOIN blob" " WHERE checkinID=symbolic_name_to_rid('trunk')" " AND blob.uuid=foci.uuid" " AND search_match(title('d',blob.rid,foci.filename)," " body('d',blob.rid,foci.filename))" " AND %z", zDocBr, glob_expr("foci.filename", zDocGlob) ); } } if( (srchFlags & SRCH_WIKI)!=0 ){ db_multi_exec( "WITH wiki(name,rid,mtime) AS (" " SELECT substr(tagname,6), tagxref.rid, max(tagxref.mtime)" " FROM tag, tagxref" " WHERE tag.tagname GLOB 'wiki-*'" " AND tagxref.tagid=tag.tagid" " GROUP BY 1" ")" "INSERT INTO x(label,url,score,id,date,snip)" " SELECT printf('Wiki: %%s',name)," " printf('/wiki?name=%%s',urlencode(name))," " search_score()," " 'w'||rid," " datetime(mtime)," " search_snippet()" " FROM wiki" " WHERE search_match(title('w',rid,name),body('w',rid,name));" ); } if( (srchFlags & SRCH_CKIN)!=0 ){ db_multi_exec( "WITH ckin(uuid,rid,mtime) AS (" " SELECT blob.uuid, event.objid, event.mtime" " FROM event, blob" " WHERE event.type='ci'" " AND blob.rid=event.objid" ")" "INSERT INTO x(label,url,score,id,date,snip)" " SELECT printf('Check-in [%%.10s] on %%s',uuid,datetime(mtime))," " printf('/timeline?c=%%s&n=8&y=ci',uuid)," " search_score()," " 'c'||rid," " datetime(mtime)," " search_snippet()" " FROM ckin" " WHERE search_match('',body('c',rid,NULL));" ); } if( (srchFlags & SRCH_TKT)!=0 ){ db_multi_exec( "INSERT INTO x(label,url,score,id,date,snip)" " SELECT printf('Ticket: %%s (%%s)',title('t',tkt_id,NULL)," "datetime(tkt_mtime))," " printf('/tktview/%%.20s',tkt_uuid)," " search_score()," " 't'||tkt_id," " datetime(tkt_mtime)," " search_snippet()" " FROM ticket" " WHERE search_match(title('t',tkt_id,NULL),body('t',tkt_id,NULL));" ); } } /* ** Number of significant bits in a u32 */ static int nbits(u32 x){ int n = 0; while( x ){ n++; x >>= 1; } return n; } /* ** Implemenation of the rank() function used with rank(matchinfo(*,'pcsx')). */ static void search_rank_sqlfunc( sqlite3_context *context, int argc, sqlite3_value **argv ){ const unsigned *aVal = (unsigned int*)sqlite3_value_blob(argv[0]); int nVal = sqlite3_value_bytes(argv[0])/4; int nCol; /* Number of columns in the index */ int nTerm; /* Number of search terms in the query */ int i, j; /* Loop counter */ double r = 0.0; /* Score */ const unsigned *aX, *aS; if( nVal<2 ) return; nTerm = aVal[0]; nCol = aVal[1]; if( nVal<2+3*nCol*nTerm+nCol ) return; aS = aVal+2; aX = aS+nCol; for(j=0; j<nCol; j++){ double x; if( aS[j]>0 ){ x = 0.0; for(i=0; i<nTerm; i++){ int hits_this_row; int hits_all_rows; int rows_with_hit; double avg_hits_per_row; hits_this_row = aX[j + i*nCol*3]; if( hits_this_row==0 )continue; hits_all_rows = aX[j + i*nCol*3 + 1]; rows_with_hit = aX[j + i*nCol*3 + 2]; if( rows_with_hit==0 ) continue; avg_hits_per_row = hits_all_rows/(double)rows_with_hit; x += hits_this_row/(avg_hits_per_row*nbits(rows_with_hit)); } x *= (1<<((30*(aS[j]-1))/nTerm)); }else{ x = 0.0; } r = r*10.0 + x; } #define SEARCH_DEBUG_RANK 0 #if SEARCH_DEBUG_RANK { Blob x; blob_init(&x,0,0); blob_appendf(&x,"%08x", (int)r); |
︙ | ︙ | |||
744 745 746 747 748 749 750 | ){ Blob sql; if( srchFlags==0 ) return; sqlite3_create_function(g.db, "rank", 1, SQLITE_UTF8, 0, search_rank_sqlfunc, 0, 0); blob_init(&sql, 0, 0); blob_appendf(&sql, | | > | 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 | ){ Blob sql; if( srchFlags==0 ) return; sqlite3_create_function(g.db, "rank", 1, SQLITE_UTF8, 0, search_rank_sqlfunc, 0, 0); blob_init(&sql, 0, 0); blob_appendf(&sql, "INSERT INTO x(label,url,score,id,date,snip) " " SELECT ftsdocs.label," " ftsdocs.url," " rank(matchinfo(ftsidx,'pcsx'))," " ftsdocs.type || ftsdocs.rid," " datetime(ftsdocs.mtime)," " snippet(ftsidx,'<mark>','</mark>',' ... ',-1,35)" " FROM ftsidx CROSS JOIN ftsdocs" " WHERE ftsidx MATCH %Q" " AND ftsdocs.rowid=ftsidx.docid", zPattern ); |
︙ | ︙ | |||
836 837 838 839 840 841 842 | ** Other web-pages can invoke this routine to add search results ** in the middle of the page. ** ** Return the number of rows. */ int search_run_and_output( const char *zPattern, /* The query pattern */ | | > | | | > > > | | 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 | ** Other web-pages can invoke this routine to add search results ** in the middle of the page. ** ** Return the number of rows. */ int search_run_and_output( const char *zPattern, /* The query pattern */ unsigned int srchFlags, /* What to search over */ int fDebug /* Extra debugging output */ ){ Stmt q; int nRow = 0; srchFlags = search_restrict(srchFlags); if( srchFlags==0 ) return 0; search_sql_setup(g.db); add_content_sql_commands(g.db); db_multi_exec( "CREATE TEMP TABLE x(label,url,score,id,date,snip);" ); if( !search_index_exists() ){ search_fullscan(zPattern, srchFlags); }else{ search_update_index(srchFlags); search_indexed(zPattern, srchFlags); } db_prepare(&q, "SELECT url, snip, label, score, id" " FROM x" " ORDER BY score DESC, date DESC;"); while( db_step(&q)==SQLITE_ROW ){ const char *zUrl = db_column_text(&q, 0); const char *zSnippet = db_column_text(&q, 1); const char *zLabel = db_column_text(&q, 2); if( nRow==0 ){ @ <ol> } nRow++; @ <li><p><a href='%R%s(zUrl)'>%h(zLabel)</a> if( fDebug ){ @ (%e(db_column_double(&q,3)), %s(db_column_text(&q,4))) } @ <br><span class='snippet'>%z(cleanSnippet(zSnippet))</span></li> } db_finalize(&q); if( nRow ){ @ </ol> } return nRow; } |
︙ | ︙ | |||
898 899 900 901 902 903 904 905 906 907 908 909 910 911 | */ void search_screen(unsigned srchFlags, int useYparam){ const char *zType = 0; const char *zClass = 0; const char *zDisable1; const char *zDisable2; const char *zPattern; srchFlags = search_restrict(srchFlags); switch( srchFlags ){ case SRCH_CKIN: zType = " Check-ins"; zClass = "Ckin"; break; case SRCH_DOC: zType = " Docs"; zClass = "Doc"; break; case SRCH_TKT: zType = " Tickets"; zClass = "Tkt"; break; case SRCH_WIKI: zType = " Wiki"; zClass = "Wiki"; break; } | > | 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 | */ void search_screen(unsigned srchFlags, int useYparam){ const char *zType = 0; const char *zClass = 0; const char *zDisable1; const char *zDisable2; const char *zPattern; int fDebug = PB("debug"); srchFlags = search_restrict(srchFlags); switch( srchFlags ){ case SRCH_CKIN: zType = " Check-ins"; zClass = "Ckin"; break; case SRCH_DOC: zType = " Docs"; zClass = "Doc"; break; case SRCH_TKT: zType = " Tickets"; zClass = "Tkt"; break; case SRCH_WIKI: zType = " Wiki"; zClass = "Wiki"; break; } |
︙ | ︙ | |||
945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 | cgi_printf(" selected"); } cgi_printf(">%s</option>\n", aY[i].zNm); } @ </select> srchFlags = newFlags; } @ <input type="submit" value="Search%s(zType)"%s(zDisable2)> if( srchFlags==0 ){ @ <p class="generalError">Search is disabled</p> } @ </div></form> while( fossil_isspace(zPattern[0]) ) zPattern++; if( zPattern[0] ){ if( zClass ){ @ <div class='searchResult searchResult%s(zClass)'> }else{ @ <div class='searchResult'> } | > > > | | 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 | cgi_printf(" selected"); } cgi_printf(">%s</option>\n", aY[i].zNm); } @ </select> srchFlags = newFlags; } if( fDebug ){ @ <input type="hidden" name="debug" value="1"> } @ <input type="submit" value="Search%s(zType)"%s(zDisable2)> if( srchFlags==0 ){ @ <p class="generalError">Search is disabled</p> } @ </div></form> while( fossil_isspace(zPattern[0]) ) zPattern++; if( zPattern[0] ){ if( zClass ){ @ <div class='searchResult searchResult%s(zClass)'> }else{ @ <div class='searchResult'> } if( search_run_and_output(zPattern, srchFlags, fDebug)==0 ){ @ <p class='searchEmpty'>No matches for: <span>%h(zPattern)</span></p> } @ </div> } } /* |
︙ | ︙ | |||
981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 | style_footer(); } /* ** This is a helper function for search_stext(). Writing into pOut ** the search text obtained from pIn according to zMimetype. */ static void get_stext_by_mimetype( Blob *pIn, const char *zMimetype, Blob *pOut ){ Blob html, title; blob_init(&html, 0, 0); blob_init(&title, 0, 0); if( zMimetype==0 ) zMimetype = "text/plain"; if( fossil_strcmp(zMimetype,"text/x-fossil-wiki")==0 ){ | > > > > > > > > > > > > | > > > > > > > > > < | | > > > > > > > > > > > | | < < > | | > > > > > > > | 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 | style_footer(); } /* ** This is a helper function for search_stext(). Writing into pOut ** the search text obtained from pIn according to zMimetype. ** ** The title of the document is the first line of text. All subsequent ** lines are the body. If the document has no title, the first line ** is blank. */ static void get_stext_by_mimetype( Blob *pIn, const char *zMimetype, Blob *pOut ){ Blob html, title; blob_init(&html, 0, 0); blob_init(&title, 0, 0); if( zMimetype==0 ) zMimetype = "text/plain"; if( fossil_strcmp(zMimetype,"text/x-fossil-wiki")==0 ){ Blob tail; blob_init(&tail, 0, 0); if( wiki_find_title(pIn, &title, &tail) ){ blob_appendf(pOut, "%s\n", blob_str(&title)); wiki_convert(&tail, &html, 0); blob_reset(&tail); }else{ blob_append(pOut, "\n", 1); wiki_convert(pIn, &html, 0); } html_to_plaintext(blob_str(&html), pOut); }else if( fossil_strcmp(zMimetype,"text/x-markdown")==0 ){ markdown_to_html(pIn, &title, &html); if( blob_size(&title) ){ blob_appendf(pOut, "%s\n", blob_str(&title)); }else{ blob_append(pOut, "\n", 1); } html_to_plaintext(blob_str(&html), pOut); }else if( fossil_strcmp(zMimetype,"text/html")==0 ){ if( doc_is_embedded_html(pIn, &title) ){ blob_appendf(pOut, "%s\n", blob_str(&title)); } html_to_plaintext(blob_str(pIn), pOut); }else{ blob_append(pOut, blob_buffer(pIn), blob_size(pIn)); } blob_reset(&html); blob_reset(&title); } /* ** Query pQuery is pointing at a single row of output. Append a text ** representation of every text-compatible column to pAccum. */ static void append_all_ticket_fields(Blob *pAccum, Stmt *pQuery, int iTitle){ int n = db_column_count(pQuery); int i; const char *zMime = 0; if( iTitle>=0 && iTitle<n ){ if( db_column_type(pQuery,iTitle)==SQLITE_TEXT ){ blob_append(pAccum, db_column_text(pQuery,iTitle), -1); } blob_append(pAccum, "\n", 1); } for(i=0; i<n; i++){ const char *zColName = db_column_name(pQuery,i); int eType = db_column_type(pQuery,i); if( i==iTitle ) continue; if( fossil_strnicmp(zColName,"tkt_",4)==0 ) continue; if( fossil_strnicmp(zColName,"private_",8)==0 ) continue; if( eType==SQLITE_BLOB || eType==SQLITE_NULL ) continue; if( fossil_stricmp(zColName,"mimetype")==0 ){ zMime = db_column_text(pQuery,i); if( fossil_strcmp(zMime,"text/plain")==0 ) zMime = 0; }else if( zMime==0 || eType!=SQLITE_TEXT ){ blob_appendf(pAccum, "%s: %s |\n", zColName, db_column_text(pQuery,i)); }else{ Blob txt; blob_init(&txt, db_column_text(pQuery,i), -1); blob_appendf(pAccum, "%s: ", zColName); get_stext_by_mimetype(&txt, zMime, pAccum); blob_append(pAccum, " |", 2); blob_reset(&txt); } } } /* ** Return "search text" - a reduced version of a document appropriate for |
︙ | ︙ | |||
1052 1053 1054 1055 1056 1057 1058 | const char *zName, /* Auxiliary information */ Blob *pOut /* OUT: Initialize to the search text */ ){ blob_init(pOut, 0, 0); switch( cType ){ case 'd': { /* Documents */ Blob doc; | | > > > > > > | > > > > > | > | < > > > > > > | | > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | > > | | | 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 | const char *zName, /* Auxiliary information */ Blob *pOut /* OUT: Initialize to the search text */ ){ blob_init(pOut, 0, 0); switch( cType ){ case 'd': { /* Documents */ Blob doc; content_get(rid, &doc); blob_to_utf8_no_bom(&doc, 0); get_stext_by_mimetype(&doc, mimetype_from_name(zName), pOut); blob_reset(&doc); break; } case 'w': { /* Wiki */ Manifest *pWiki = manifest_get(rid, CFTYPE_WIKI,0); Blob wiki; if( pWiki==0 ) break; blob_init(&wiki, pWiki->zWiki, -1); get_stext_by_mimetype(&wiki, wiki_filter_mimetypes(pWiki->zMimetype), pOut); blob_reset(&wiki); manifest_destroy(pWiki); break; } case 'c': { /* Check-in Comments */ static Stmt q; static int isPlainText = -1; db_static_prepare(&q, "SELECT coalesce(ecomment,comment)" " ||' (user: '||coalesce(euser,user,'?')" " ||', tags: '||" " (SELECT group_concat(substr(tag.tagname,5),',')" " FROM tag, tagxref" " WHERE tagname GLOB 'sym-*' AND tag.tagid=tagxref.tagid" " AND tagxref.rid=event.objid AND tagxref.tagtype>0)" " ||')'" " FROM event WHERE objid=:x AND type='ci'"); if( isPlainText<0 ){ isPlainText = db_get_boolean("timeline-plaintext",0); } db_bind_int(&q, ":x", rid); if( db_step(&q)==SQLITE_ROW ){ blob_append(pOut, "\n", 1); if( isPlainText ){ db_column_blob(&q, 0, pOut); }else{ Blob x; blob_init(&x,0,0); db_column_blob(&q, 0, &x); get_stext_by_mimetype(&x, "text/x-fossil-wiki", pOut); blob_reset(&x); } } db_reset(&q); break; } case 't': { /* Tickets */ static Stmt q1; static int iTitle = -1; db_static_prepare(&q1, "SELECT * FROM ticket WHERE tkt_id=:rid"); db_bind_int(&q1, ":rid", rid); if( db_step(&q1)==SQLITE_ROW ){ if( iTitle<0 ){ int n = db_column_count(&q1); for(iTitle=0; iTitle<n; iTitle++){ if( fossil_stricmp(db_column_name(&q1,iTitle),"title")==0 ) break; } } append_all_ticket_fields(pOut, &q1, iTitle); } db_reset(&q1); if( db_table_exists("repository","ticketchng") ){ static Stmt q2; db_static_prepare(&q2, "SELECT * FROM ticketchng WHERE tkt_id=:rid" " ORDER BY tkt_mtime"); db_bind_int(&q2, ":rid", rid); while( db_step(&q2)==SQLITE_ROW ){ append_all_ticket_fields(pOut, &q2, -1); } db_reset(&q2); } break; } } } /* ** This routine is a wrapper around search_stext(). ** ** This routine looks up the search text, stores it in an internal ** buffer, and returns a pointer to the text. Subsequent requests ** for the same document return the same pointer. The returned pointer ** is valid until the next invocation of this routine. Call this routine ** with an eType of 0 to clear the cache. */ char *search_stext_cached( char cType, /* Type of document */ int rid, /* BLOB.RID or TAG.TAGID value for document */ const char *zName, /* Auxiliary information */ int *pnTitle /* OUT: length of title in bytes excluding \n */ ){ static struct { Blob stext; /* Cached search text */ char cType; /* The type */ int rid; /* The RID */ int nTitle; /* Number of bytes in title */ } cache; int i; char *z; if( cType!=cache.cType || rid!=cache.rid ){ if( cache.rid>0 ){ blob_reset(&cache.stext); }else{ blob_init(&cache.stext,0,0); } cache.cType = cType; cache.rid = rid; if( cType==0 ) return 0; search_stext(cType, rid, zName, &cache.stext); z = blob_str(&cache.stext); for(i=0; z[i] && z[i]!='\n'; i++){} cache.nTitle = i; } if( pnTitle ) *pnTitle = cache.nTitle; return blob_str(&cache.stext); } /* ** COMMAND: test-search-stext ** ** Usage: fossil test-search-stext TYPE ARG1 ARG2 */ void test_search_stext(void){ Blob out; db_find_and_open_repository(0,0); if( g.argc!=5 ) usage("TYPE RID NAME"); search_stext(g.argv[2][0], atoi(g.argv[3]), g.argv[4], &out); fossil_print("%s\n",blob_str(&out)); blob_reset(&out); } /* ** COMMAND: test-convert-stext ** ** Usage: fossil test-convert-stext FILE MIMETYPE ** ** Read the content of FILE and convert it to stext according to MIMETYPE. ** Send the result to standard output. */ void test_convert_stext(void){ Blob in, out; db_find_and_open_repository(0,0); if( g.argc!=4 ) usage("FILENAME MIMETYPE"); blob_read_from_file(&in, g.argv[2]); blob_init(&out, 0, 0); get_stext_by_mimetype(&in, g.argv[3], &out); fossil_print("%s\n",blob_str(&out)); blob_reset(&in); blob_reset(&out); } /* The schema for the full-text index */ static const char zFtsSchema[] = @ -- One entry for each possible search result @ CREATE TABLE IF NOT EXISTS "%w".ftsdocs( @ rowid INTEGER PRIMARY KEY, -- Maps to the ftsidx.docid @ type CHAR(1), -- Type of document @ rid INTEGER, -- BLOB.RID or TAG.TAGID for the document @ name TEXT, -- Additional document description @ idxed BOOLEAN, -- True if currently in the index @ label TEXT, -- Label to print on search results @ url TEXT, -- URL to access this document @ mtime DATE, -- Date when document created @ bx TEXT, -- Temporary "body" content cache @ UNIQUE(type,rid) @ ); @ CREATE INDEX "%w".ftsdocIdxed ON ftsdocs(type,rid,name) WHERE idxed==0; @ CREATE INDEX "%w".ftsdocName ON ftsdocs(name) WHERE type='w'; @ CREATE VIEW IF NOT EXISTS "%w".ftscontent AS @ SELECT rowid, type, rid, name, idxed, label, url, mtime, @ title(type,rid,name) AS 'title', body(type,rid,name) AS 'body' @ FROM ftsdocs; @ CREATE VIRTUAL TABLE IF NOT EXISTS "%w".ftsidx @ USING fts4(content="ftscontent", title, body%s); ; static const char zFtsDrop[] = @ DROP TABLE IF EXISTS "%w".ftsidx; @ DROP VIEW IF EXISTS "%w".ftscontent; @ DROP TABLE IF EXISTS "%w".ftsdocs; ; /* ** Create or drop the tables associated with a full-text index. */ static int searchIdxExists = -1; void search_create_index(void){ const char *zDb = db_name("repository"); int useStemmer = db_get_boolean("search-stemmer",0); const char *zExtra = useStemmer ? ",tokenize=porter" : ""; search_sql_setup(g.db); db_multi_exec(zFtsSchema/*works-like:"%w%w%w%w%w%s"*/, zDb, zDb, zDb, zDb, zDb, zExtra/*safe-for-%s*/); searchIdxExists = 1; } void search_drop_index(void){ const char *zDb = db_name("repository"); db_multi_exec(zFtsDrop/*works-like:"%w%w%w"*/, zDb, zDb, zDb); searchIdxExists = 0; } |
︙ | ︙ | |||
1290 1291 1292 1293 1294 1295 1296 | " AND rid NOT IN (SELECT rid FROM current_docs))" ); db_multi_exec( "DELETE FROM ftsdocs WHERE type='d'" " AND rid NOT IN (SELECT rid FROM current_docs)" ); db_multi_exec( | | | > | | | > > > > | | | | > | | | | 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 | " AND rid NOT IN (SELECT rid FROM current_docs))" ); db_multi_exec( "DELETE FROM ftsdocs WHERE type='d'" " AND rid NOT IN (SELECT rid FROM current_docs)" ); db_multi_exec( "INSERT OR IGNORE INTO ftsdocs(type,rid,name,idxed,label,bx,url,mtime)" " SELECT 'd', rid, name, 0," " title('d',rid,name)," " body('d',rid,name)," " printf('/doc/%q/%%s',urlencode(name))," " %.17g" " FROM current_docs", zBrUuid, rTime ); db_multi_exec( "INSERT INTO ftsidx(docid,title,body)" " SELECT rowid, label, bx FROM ftsdocs WHERE type='d' AND NOT idxed" ); db_multi_exec( "UPDATE ftsdocs SET" " idxed=1," " bx=NULL," " label='Document: '||label" " WHERE type='d' AND NOT idxed" ); } /* ** Deal with all of the unindexed 'c' terms in FTSDOCS */ static void search_update_checkin_index(void){ db_multi_exec( "INSERT INTO ftsidx(docid,title,body)" " SELECT rowid, '', body('c',rid,NULL) FROM ftsdocs" " WHERE type='c' AND NOT idxed;" ); db_multi_exec( "REPLACE INTO ftsdocs(rowid,idxed,type,rid,name,label,url,mtime)" " SELECT ftsdocs.rowid, 1, 'c', ftsdocs.rid, NULL," " printf('Check-in [%%.16s] on %%s',blob.uuid,datetime(event.mtime))," " printf('/timeline?y=ci&c=%%.20s',blob.uuid)," " event.mtime" " FROM ftsdocs, event, blob" " WHERE ftsdocs.type='c' AND NOT ftsdocs.idxed" " AND event.objid=ftsdocs.rid" " AND blob.rid=ftsdocs.rid" ); } /* ** Deal with all of the unindexed 't' terms in FTSDOCS */ static void search_update_ticket_index(void){ db_multi_exec( "INSERT INTO ftsidx(docid,title,body)" " SELECT rowid, title('t',rid,NULL), body('t',rid,NULL) FROM ftsdocs" " WHERE type='t' AND NOT idxed;" ); if( db_changes()==0 ) return; db_multi_exec( "REPLACE INTO ftsdocs(rowid,idxed,type,rid,name,label,url,mtime)" " SELECT ftsdocs.rowid, 1, 't', ftsdocs.rid, NULL," " printf('Ticket: %%s (%%s)',title('t',tkt_id,null)," " datetime(tkt_mtime))," " printf('/tktview/%%.20s',tkt_uuid)," " tkt_mtime" " FROM ftsdocs, ticket" " WHERE ftsdocs.type='t' AND NOT ftsdocs.idxed" " AND ticket.tkt_id=ftsdocs.rid" ); } /* ** Deal with all of the unindexed 'w' terms in FTSDOCS */ static void search_update_wiki_index(void){ db_multi_exec( "INSERT INTO ftsidx(docid,title,body)" " SELECT rowid, title('w',rid,NULL),body('w',rid,NULL) FROM ftsdocs" " WHERE type='w' AND NOT idxed;" ); if( db_changes()==0 ) return; db_multi_exec( "REPLACE INTO ftsdocs(rowid,idxed,type,rid,name,label,url,mtime)" " SELECT ftsdocs.rowid, 1, 'w', ftsdocs.rid, ftsdocs.name," " 'Wiki: '||ftsdocs.name," |
︙ | ︙ | |||
1414 1415 1416 1417 1418 1419 1420 | ** COMMAND: fts-config* ** ** Usage: fossil fts-config ?SUBCOMMAND? ?ARGUMENT? ** ** The "fossil fts-config" command configures the full-text search capabilities ** of the repository. Subcommands: ** | | | > > > > | | | | | 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 | ** COMMAND: fts-config* ** ** Usage: fossil fts-config ?SUBCOMMAND? ?ARGUMENT? ** ** The "fossil fts-config" command configures the full-text search capabilities ** of the repository. Subcommands: ** ** reindex Rebuild the search index. This is a no-op if ** index search is disabled ** ** index (on|off) Turn the search index on or off ** ** enable cdtw Enable various kinds of search. c=Check-ins, ** d=Documents, t=Tickets, w=Wiki. ** ** disable cdtw Disable versious kinds of search ** ** stemmer (on|off) Turn the Porter stemmer on or off for indexed ** search. (Unindexed search is never stemmed.) ** ** The current search settings are displayed after any changes are applied. ** Run this command with no arguments to simply see the settings. */ void test_fts_cmd(void){ static const struct { int iCmd; const char *z; } aCmd[] = { { 1, "reindex" }, { 2, "index" }, { 3, "disable" }, { 4, "enable" }, { 5, "stemmer" }, }; static const struct { char *zSetting; char *zName; char *zSw; } aSetng[] = { { "search-ckin", "check-in search:", "c" }, { "search-doc", "document search:", "d" }, { "search-tkt", "ticket search:", "t" }, { "search-wiki", "wiki search:", "w" }, }; char *zSubCmd; int i, j, n; int iCmd = 0; int iAction = 0; db_find_and_open_repository(0, 0); if( g.argc>2 ){ |
︙ | ︙ | |||
1462 1463 1464 1465 1466 1467 1468 | fossil_fatal("unknown \"%s\" - should be on of:%s", zSubCmd, blob_str(&all)); return; } iCmd = aCmd[i].iCmd; } if( iCmd==1 ){ | | | > > > > > > > | 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 | fossil_fatal("unknown \"%s\" - should be on of:%s", zSubCmd, blob_str(&all)); return; } iCmd = aCmd[i].iCmd; } if( iCmd==1 ){ if( search_index_exists() ) iAction = 2; } if( iCmd==2 ){ if( g.argc<3 ) usage("index (on|off)"); iAction = 1 + is_truth(g.argv[3]); } db_begin_transaction(); /* Adjust search settings */ if( iCmd==3 || iCmd==4 ){ const char *zCtrl; if( g.argc<4 ) usage(mprintf("%s STRING",zSubCmd)); zCtrl = g.argv[3]; for(j=0; j<ArraySize(aSetng); j++){ if( strchr(zCtrl, aSetng[j].zSw[0])!=0 ){ db_set_int(aSetng[j].zSetting, iCmd-3, 0); } } } if( iCmd==5 ){ if( g.argc<4 ) usage("porter ON/OFF"); db_set_int("search-stemmer", is_truth(g.argv[3]), 0); } /* destroy or rebuild the index, if requested */ if( iAction>=1 ){ search_drop_index(); } if( iAction>=2 ){ search_rebuild_index(); } /* Always show the status before ending */ for(i=0; i<ArraySize(aSetng); i++){ fossil_print("%-16s %s\n", aSetng[i].zName, db_get_boolean(aSetng[i].zSetting,0) ? "on" : "off"); } fossil_print("%-16s %s\n", "Porter stemmer:", db_get_boolean("search-stemmer",0) ? "on" : "off"); if( search_index_exists() ){ fossil_print("%-16s enabled\n", "full-text index:"); fossil_print("%-16s %d\n", "documents:", db_int(0, "SELECT count(*) FROM ftsdocs")); }else{ fossil_print("%-16s disabled\n", "full-text index:"); } db_end_transaction(0); } |
Changes to src/setup.c.
︙ | ︙ | |||
2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 | search_create_index(); search_fill_index(); search_update_index(search_restrict(SRCH_ALL)); } if( search_index_exists() ){ @ <p>Currently using an SQLite FTS4 search index. This makes search @ run faster, especially on large repositories, but takes up space.</p> @ <p><input type="submit" name="fts0" value="Delete The Full-Text Index"> @ <input type="submit" name="fts1" value="Rebuild The Full-Text Index"> }else{ @ <p>The SQLite FTS4 search index is disabled. All searching will be @ a full-text scan. This usually works fine, but can be slow for @ larger repositories.</p> @ <p><input type="submit" name="fts1" value="Create A Full-Text Index"> } @ </div></form> style_footer(); } | > > | 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 | search_create_index(); search_fill_index(); search_update_index(search_restrict(SRCH_ALL)); } if( search_index_exists() ){ @ <p>Currently using an SQLite FTS4 search index. This makes search @ run faster, especially on large repositories, but takes up space.</p> onoff_attribute("Use Porter Stemmer","search-stemmer","ss",0,0); @ <p><input type="submit" name="fts0" value="Delete The Full-Text Index"> @ <input type="submit" name="fts1" value="Rebuild The Full-Text Index"> }else{ @ <p>The SQLite FTS4 search index is disabled. All searching will be @ a full-text scan. This usually works fine, but can be slow for @ larger repositories.</p> onoff_attribute("Use Porter Stemmer","search-stemmer","ss",0,0); @ <p><input type="submit" name="fts1" value="Create A Full-Text Index"> } @ </div></form> style_footer(); } |
Changes to src/wikiformat.c.
︙ | ︙ | |||
1963 1964 1965 1966 1967 1968 1969 | ** Get the next HTML token. ** ** z points to the start of a token. Return the number of ** characters in that token. */ static int nextHtmlToken(const char *z){ int n; | > | | > > > > > | > > > | 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 | ** Get the next HTML token. ** ** z points to the start of a token. Return the number of ** characters in that token. */ static int nextHtmlToken(const char *z){ int n; char c; if( (c=z[0])=='<' ){ n = markupLength(z); if( n<=0 ) n = 1; }else if( fossil_isspace(c) ){ for(n=1; z[n] && fossil_isspace(z[n]); n++){} }else if( c=='&' ){ n = z[1]=='#' ? 2 : 1; while( fossil_isalnum(z[n]) ) n++; if( z[n]==';' ) n++; }else{ n = 1; for(n=1; 1; n++){ if( (c = z[n]) > '<' ) continue; if( c=='<' || c=='&' || fossil_isspace(c) || c==0 ) break; } } return n; } /* ** Attempt to reformat messy HTML to be easily readable by humans. ** |
︙ | ︙ | |||
2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 | blob_reset(&out); } } /* ** Remove all HTML markup from the input text. The output written into ** pOut is pure text. */ void html_to_plaintext(const char *zIn, Blob *pOut){ int n; int i, j; int nNL = 0; /* Number of \n characters at the end of pOut */ int nWS = 0; /* True if pOut ends with whitespace */ while( zIn[0] ){ n = nextHtmlToken(zIn); if( zIn[0]=='<' && n>1 ){ int isCloseTag; int eTag; int eType; char zTag[32]; | > > > > > > | 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 | blob_reset(&out); } } /* ** Remove all HTML markup from the input text. The output written into ** pOut is pure text. ** ** Put the title on the first line, if there is any <title> markup. ** If there is no <title>, then create a blank first line. */ void html_to_plaintext(const char *zIn, Blob *pOut){ int n; int i, j; int inTitle = 0; /* True between <title>...</title> */ int seenText = 0; /* True after first non-whitespace seen */ int nNL = 0; /* Number of \n characters at the end of pOut */ int nWS = 0; /* True if pOut ends with whitespace */ while( fossil_isspace(zIn[0]) ) zIn++; while( zIn[0] ){ n = nextHtmlToken(zIn); if( zIn[0]=='<' && n>1 ){ int isCloseTag; int eTag; int eType; char zTag[32]; |
︙ | ︙ | |||
2128 2129 2130 2131 2132 2133 2134 | n = nextHtmlToken(zIn); if( fossil_strnicmp(zIn, "</style",7)==0 ) break; zIn += n; } if( zIn[0]=='<' ) zIn += n; continue; } | > > > | > > > | > | | | | > > > > > > | > > > > > > > > > > > > > > > > > > > > > > | > > > > > | 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 | n = nextHtmlToken(zIn); if( fossil_strnicmp(zIn, "</style",7)==0 ) break; zIn += n; } if( zIn[0]=='<' ) zIn += n; continue; } if( eTag==MARKUP_TITLE ){ inTitle = !isCloseTag; } if( !isCloseTag && seenText && (eType & (MUTYPE_BLOCK|MUTYPE_TABLE))!=0 ){ if( nNL==0 ){ blob_append(pOut, "\n", 1); nNL++; } nWS = 1; } }else if( fossil_isspace(zIn[0]) ){ if( seenText ){ nNL = 0; if( !inTitle ){ /* '\n' -> ' ' within <title> */ for(i=0; i<n; i++) if( zIn[i]=='\n' ) nNL++; } if( !nWS ){ blob_append(pOut, nNL ? "\n" : " ", 1); nWS = 1; } } }else if( zIn[0]=='&' ){ char c = '?'; if( zIn[1]=='#' ){ int x = atoi(&zIn[1]); if( x>0 && x<=127 ) c = x; }else{ static const struct { int n; char c; char *z; } aEntity[] = { { 5, '&', "&" }, { 4, '<', "<" }, { 4, '>', ">" }, { 6, ' ', " " }, }; int jj; for(jj=0; jj<ArraySize(aEntity); jj++){ if( aEntity[jj].n==n && strncmp(aEntity[jj].z,zIn,n)==0 ){ c = aEntity[jj].c; break; } } } if( fossil_isspace(c) ){ if( nWS==0 && seenText ) blob_append(pOut, &c, 1); nWS = 1; nNL = c=='\n'; }else{ if( !seenText && !inTitle ) blob_append(pOut, "\n", 1); seenText = 1; nNL = nWS = 0; blob_append(pOut, &c, 1); } }else{ if( !seenText && !inTitle ) blob_append(pOut, "\n", 1); seenText = 1; nNL = nWS = 0; blob_append(pOut, zIn, n); } zIn += n; } if( nNL==0 ) blob_append(pOut, "\n", 1); } /* |
︙ | ︙ |