/* ** Copyright (c) 2014 D. Richard Hipp ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the Simplified BSD License (also ** known as the "2-Clause License" or "FreeBSD License".) ** ** This program is distributed in the hope that it will be useful, ** but without any warranty; without even the implied warranty of ** merchantability or fitness for a particular purpose. ** ** Author contact information: ** drh@hwaci.com ** http://www.hwaci.com/drh/ ** ******************************************************************************* ** ** This program reads Fossil source code files and tries to verify that ** printf-style format strings are correct. ** ** This program implements a compile-time validation step on the Fossil ** source code. Running this program is entirely optional. Its role is ** similar to the -Wall compiler switch on gcc, or the scan-build utility ** of clang, or other static analyzers. The purpose is to try to identify ** problems in the source code at compile-time. The difference is that this ** static checker is specifically designed for the particular printf formatter ** implementation used by Fossil. ** ** Checks include: ** ** * Verify that vararg formatting routines like blob_printf() or ** db_multi_exec() have the correct number of arguments for their ** format string. ** ** * For routines designed to generate SQL, warn about the use of %s ** which might allow SQL injection. */ #include #include #include #include #include /* ** Malloc, aborting if it fails. */ void *safe_malloc(int nByte){ void *x = malloc(nByte); if( x==0 ){ fprintf(stderr, "failed to allocate %d bytes\n", nByte); exit(1); } return x; } void *safe_realloc(void *pOld, int nByte){ void *x = realloc(pOld, nByte); if( x==0 ){ fprintf(stderr, "failed to allocate %d bytes\n", nByte); exit(1); } return x; } /* ** Read the entire content of the file named zFilename into memory obtained ** from malloc(). Add a zero-terminator to the end. ** Return a pointer to that memory. */ static char *read_file(const char *zFilename){ FILE *in; char *z; int nByte; int got; in = fopen(zFilename, "rb"); if( in==0 ){ return 0; } fseek(in, 0, SEEK_END); nByte = ftell(in); fseek(in, 0, SEEK_SET); z = safe_malloc( nByte+1 ); got = fread(z, 1, nByte, in); z[got] = 0; fclose(in); return z; } /* ** When parsing the input file, the following token types are recognized. */ #define TK_SPACE 1 /* Whitespace or comments */ #define TK_ID 2 /* An identifier */ #define TK_STR 3 /* A string literal in double-quotes */ #define TK_OTHER 4 /* Any other token */ #define TK_EOF 99 /* End of file */ /* ** Determine the length and type of the token beginning at z[0] */ static int token_length(const char *z, int *pType, int *pLN){ int i; if( z[0]==0 ){ *pType = TK_EOF; return 0; } if( z[0]=='"' || z[0]=='\'' ){ for(i=1; z[i] && z[i]!=z[0]; i++){ if( z[i]=='\\' && z[i+1]!=0 ){ if( z[i+1]=='\n' ) (*pLN)++; i++; } } if( z[i]!=0 ) i++; *pType = z[0]=='"' ? TK_STR : TK_OTHER; return i; } if( isalnum(z[0]) || z[0]=='_' ){ for(i=1; isalnum(z[i]) || z[i]=='_'; i++){} *pType = isalpha(z[0]) || z[0]=='_' ? TK_ID : TK_OTHER; return i; } if( isspace(z[0]) ){ if( z[0]=='\n' ) (*pLN)++; for(i=1; isspace(z[i]); i++){ if( z[i]=='\n' ) (*pLN)++; } *pType = TK_SPACE; return i; } if( z[0]=='/' && z[1]=='*' ){ for(i=2; z[i] && (z[i]!='*' || z[i+1]!='/'); i++){ if( z[i]=='\n' ) (*pLN)++; } if( z[i] ) i += 2; *pType = TK_SPACE; return i; } if( z[0]=='/' && z[1]=='/' ){ for(i=2; z[i] && z[i]!='\n'; i++){} if( z[i] ){ (*pLN)++; i++; } *pType = TK_SPACE; return i; } *pType = TK_OTHER; return 1; } /* ** Return the next non-whitespace token */ const char *next_non_whitespace(const char *z, int *pLen, int *pType){ int len; int eType; int ln = 0; while( (len = token_length(z, &eType, &ln))>0 && eType==TK_SPACE ){ z += len; } *pLen = len; *pType = eType; return z; } /* ** Return index into z[] for the first balanced TK_OTHER token with ** value cValue. */ static int distance_to(const char *z, char cVal){ int len; int dist = 0; int eType; int nNest = 0; int ln = 0; while( z[0] && (len = token_length(z, &eType, &ln))>0 ){ if( eType==TK_OTHER ){ if( z[0]==cVal && nNest==0 ){ break; }else if( z[0]=='(' ){ nNest++; }else if( z[0]==')' ){ nNest--; } } dist += len; z += len; } return dist; } /* ** Return the first non-whitespace characters in z[] */ static const char *skip_space(const char *z){ while( isspace(z[0]) ){ z++; } return z; } /* ** Return true if the input is a string literal. */ static int is_string_lit(const char *z){ int nu1, nu2; z = next_non_whitespace(z, &nu1, &nu2); return z[0]=='"'; } /* ** Return true if the input is an expression of string literals: ** ** EXPR ? "..." : "..." */ static int is_string_expr(const char *z){ int len = 0, eType; const char *zOrig = z; len = distance_to(z, '?'); if( z[len]==0 && skip_space(z)[0]=='(' ){ z = skip_space(z) + 1; len = distance_to(z, '?'); } z += len; if( z[0]=='?' ){ z++; z = next_non_whitespace(z, &len, &eType); if( eType==TK_STR ){ z += len; z = next_non_whitespace(z, &len, &eType); if( eType==TK_OTHER && z[0]==':' ){ z += len; z = next_non_whitespace(z, &len, &eType); if( eType==TK_STR ){ z += len; z = next_non_whitespace(z, &len, &eType); if( eType==TK_EOF ) return 1; if( eType==TK_OTHER && z[0]==')' && skip_space(zOrig)[0]=='(' ){ z += len; z = next_non_whitespace(z, &len, &eType); if( eType==TK_EOF ) return 1; } } } } } return 0; } /* ** A list of functions that return strings that are safe to insert into ** SQL using %s. */ static const char *azSafeFunc[] = { "filename_collation", "leaf_is_closed_sql", "timeline_query_for_www", "timeline_query_for_tty", "blob_sql_text", "glob_expr", "fossil_all_reserved_names", "configure_inop_rhs", "db_setting_inop_rhs", }; /* ** Return true if the input is an argument that is safe to use with %s ** while building an SQL statement. */ static int is_s_safe(const char *z){ int len, eType; int i; /* A string literal is safe for use with %s */ if( is_string_lit(z) ) return 1; /* Certain functions are guaranteed to return a string that is safe ** for use with %s */ z = next_non_whitespace(z, &len, &eType); for(i=0; i0 && isspace(z[i]); i--){ z[i] = 0; } z += len + 1; } acType = (char*)&azArg[nArg]; if( fmtArg>nArg ){ printf("%s:%d: too few arguments to %.*s()\n", zFilename, lnFCall, szFName, zFCall); nErr++; }else{ const char *zFmt = azArg[fmtArg-1]; const char *zOverride = strstr(zFmt, "/*works-like:"); if( zOverride ) zFmt = zOverride + sizeof("/*works-like:")-1; if( !is_string_lit(zFmt) ){ printf("%s:%d: %.*s() has non-constant format string\n", zFilename, lnFCall, szFName, zFCall); nErr++; }else if( (k = formatArgCount(zFmt, nArg, acType))>=0 && nArg!=fmtArg+k ){ printf("%s:%d: too %s arguments to %.*s() " "- got %d and expected %d\n", zFilename, lnFCall, (nArg0 && z[0]=='(' && ePrev==TK_ID && (x = isFormatFunc(zPrev,szPrev,&fmtFlags))>0 ){ nErr += checkFormatFunc(zName, zPrev, lnPrev, x, fmtFlags); } } zPrev = z; ePrev = eToken; szPrev = szToken; lnPrev = ln; } return nErr; } /* ** Check for format-string design rule violations on all files listed ** on the command-line. */ int main(int argc, char **argv){ int i; int nErr = 0; for(i=1; i