00001 /****************************************************************************** 00002 * Copyright (c) 1999, Frank Warmerdam 00003 * 00004 * Permission is hereby granted, free of charge, to any person obtaining a 00005 * copy of this software and associated documentation files (the "Software"), 00006 * to deal in the Software without restriction, including without limitation 00007 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 00008 * and/or sell copies of the Software, and to permit persons to whom the 00009 * Software is furnished to do so, subject to the following conditions: 00010 * 00011 * The above copyright notice and this permission notice shall be included 00012 * in all copies or substantial portions of the Software. 00013 * 00014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 00015 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 00017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 00018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 00019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 00020 * DEALINGS IN THE SOFTWARE. 00021 ****************************************************************************** 00022 * 00023 * cpl_csv.c: Support functions for accessing CSV files. 00024 * 00025 * $Log$ 00025 * Revision 1.2 2001/03/05 04:58:33 warmerda 00025 * updated 00025 * 00026 * Revision 1.10 2001/01/17 15:32:19 warmerda 00027 * Include /usr/share/epsg_csv and share/epsg_csv in csv search path. 00028 * 00029 * Revision 1.9 2000/12/12 19:34:36 warmerda 00030 * Use CSV_DATA_DIR if defined. 00031 * 00032 * Revision 1.8 2000/08/22 04:33:33 warmerda 00033 * added support for /usr/local/shared/epsg_csv 00034 * 00035 * Revision 1.7 1999/12/03 14:42:59 warmerda 00036 * Passing a NULL filename into CSVAccess() now results in a graceful 00037 * failure to open the file. 00038 * 00039 * Revision 1.6 1999/06/26 17:28:51 warmerda 00040 * Fixed reading of records with newlines embedded in quoted strings. 00041 * 00042 * Revision 1.5 1999/05/04 03:07:24 warmerda 00043 * avoid warning 00044 * 00045 * Revision 1.4 1999/04/28 19:59:56 warmerda 00046 * added some doxygen style documentation 00047 * 00048 * Revision 1.3 1999/03/17 19:53:15 geotiff 00049 * sys includes moved to cpl_serv.h 00050 * 00051 * Revision 1.2 1999/03/10 16:54:42 geotiff 00052 * Added use of the GEOTIFF_CSV environment variable to locate CSV files. 00053 * 00054 * Revision 1.1 1999/03/09 15:57:04 geotiff 00055 * New 00056 * 00057 * Revision 1.2 1999/02/24 16:23:21 warmerda 00058 * added lots 00059 * 00060 * Revision 1.1 1999/01/05 16:52:36 warmerda 00061 * New 00062 * 00063 */ 00064 00065 #include "cpl_csv.h" 00066 #include "geo_tiffp.h" 00067 00068 /* ==================================================================== */ 00069 /* The CSVTable is a persistant set of info about an open CSV */ 00070 /* table. While it doesn't currently maintain a record index, */ 00071 /* or in-memory copy of the table, it could be changed to do so */ 00072 /* in the future. */ 00073 /* ==================================================================== */ 00074 typedef struct ctb { 00075 FILE *fp; 00076 00077 struct ctb *psNext; 00078 00079 char *pszFilename; 00080 00081 char **papszFieldNames; 00082 00083 char **papszRecFields; 00084 } CSVTable; 00085 00086 static CSVTable *psCSVTableList = NULL; 00087 00088 /************************************************************************/ 00089 /* CSVAccess() */ 00090 /* */ 00091 /* This function will fetch a handle to the requested table. */ 00092 /* If not found in the ``open table list'' the table will be */ 00093 /* opened and added to the list. Eventually this function may */ 00094 /* become public with an abstracted return type so that */ 00095 /* applications can set options about the table. For now this */ 00096 /* isn't done. */ 00097 /************************************************************************/ 00098 00099 static CSVTable *CSVAccess( const char * pszFilename ) 00100 00101 { 00102 CSVTable *psTable; 00103 FILE *fp; 00104 00105 if( pszFilename == NULL ) 00106 return NULL; 00107 00108 /* -------------------------------------------------------------------- */ 00109 /* Is the table already in the list. */ 00110 /* -------------------------------------------------------------------- */ 00111 for( psTable = psCSVTableList; psTable != NULL; psTable = psTable->psNext ) 00112 { 00113 if( EQUAL(psTable->pszFilename,pszFilename) ) 00114 { 00115 /* 00116 * Eventually we should consider promoting to the front of 00117 * the list to accelerate frequently accessed tables. 00118 */ 00119 00120 return( psTable ); 00121 } 00122 } 00123 00124 /* -------------------------------------------------------------------- */ 00125 /* If not, try to open it. */ 00126 /* -------------------------------------------------------------------- */ 00127 fp = VSIFOpen( pszFilename, "r" ); 00128 if( fp == NULL ) 00129 return NULL; 00130 00131 /* -------------------------------------------------------------------- */ 00132 /* Create an information structure about this table, and add to */ 00133 /* the front of the list. */ 00134 /* -------------------------------------------------------------------- */ 00135 psTable = (CSVTable *) CPLCalloc(sizeof(CSVTable),1); 00136 00137 psTable->fp = fp; 00138 psTable->pszFilename = CPLStrdup( pszFilename ); 00139 psTable->psNext = psCSVTableList; 00140 00141 psCSVTableList = psTable; 00142 00143 /* -------------------------------------------------------------------- */ 00144 /* Read the table header record containing the field names. */ 00145 /* -------------------------------------------------------------------- */ 00146 psTable->papszFieldNames = CSVReadParseLine( fp ); 00147 00148 return( psTable ); 00149 } 00150 00151 /************************************************************************/ 00152 /* CSVDeaccess() */ 00153 /************************************************************************/ 00154 00155 void CSVDeaccess( const char * pszFilename ) 00156 00157 { 00158 CSVTable *psLast, *psTable; 00159 00160 /* -------------------------------------------------------------------- */ 00161 /* A NULL means deaccess all tables. */ 00162 /* -------------------------------------------------------------------- */ 00163 if( pszFilename == NULL ) 00164 { 00165 while( psCSVTableList != NULL ) 00166 CSVDeaccess( psCSVTableList->pszFilename ); 00167 00168 return; 00169 } 00170 00171 /* -------------------------------------------------------------------- */ 00172 /* Find this table. */ 00173 /* -------------------------------------------------------------------- */ 00174 psLast = NULL; 00175 for( psTable = psCSVTableList; 00176 psTable != NULL && !EQUAL(psTable->pszFilename,pszFilename); 00177 psTable = psTable->psNext ) 00178 { 00179 psLast = psTable; 00180 } 00181 00182 if( psTable == NULL ) 00183 return; 00184 00185 /* -------------------------------------------------------------------- */ 00186 /* Remove the link from the list. */ 00187 /* -------------------------------------------------------------------- */ 00188 if( psLast != NULL ) 00189 psLast->psNext = psTable->psNext; 00190 else 00191 psCSVTableList = psTable->psNext; 00192 00193 /* -------------------------------------------------------------------- */ 00194 /* Free the table. */ 00195 /* -------------------------------------------------------------------- */ 00196 VSIFClose( psTable->fp ); 00197 00198 CSLDestroy( psTable->papszFieldNames ); 00199 CSLDestroy( psTable->papszRecFields ); 00200 CPLFree( psTable->pszFilename ); 00201 00202 CPLFree( psTable ); 00203 00204 /* -------------------------------------------------------------------- */ 00205 /* Free working buffer in CPLReadLine(). */ 00206 /* -------------------------------------------------------------------- */ 00207 CPLReadLine( NULL ); 00208 } 00209 00210 /************************************************************************/ 00211 /* CSVReadParseLine() */ 00212 /* */ 00213 /* Read one line, and return split into fields. The return */ 00214 /* result is a stringlist, in the sense of the CSL functions. */ 00215 /************************************************************************/ 00216 00217 char **CSVReadParseLine( FILE * fp ) 00218 00219 { 00220 const char *pszLine; 00221 char *pszWorkLine; 00222 char **papszReturn; 00223 00224 CPLAssert( fp != NULL ); 00225 if( fp == NULL ) 00226 return( NULL ); 00227 00228 pszLine = CPLReadLine( fp ); 00229 if( pszLine == NULL ) 00230 return( NULL ); 00231 00232 /* -------------------------------------------------------------------- */ 00233 /* If there are no quotes, then this is the simple case. */ 00234 /* Parse, and return tokens. */ 00235 /* -------------------------------------------------------------------- */ 00236 if( strchr(pszLine,'\"') == NULL ) 00237 return CSLTokenizeStringComplex( pszLine, ",", TRUE, TRUE ); 00238 00239 /* -------------------------------------------------------------------- */ 00240 /* We must now count the quotes in our working string, and as */ 00241 /* long as it is odd, keep adding new lines. */ 00242 /* -------------------------------------------------------------------- */ 00243 pszWorkLine = CPLStrdup( pszLine ); 00244 00245 while( TRUE ) 00246 { 00247 int i, nCount = 0; 00248 00249 for( i = 0; pszWorkLine[i] != '\0'; i++ ) 00250 { 00251 if( pszWorkLine[i] == '\"' 00252 && (i == 0 || pszWorkLine[i-1] != '\\') ) 00253 nCount++; 00254 } 00255 00256 if( nCount % 2 == 0 ) 00257 break; 00258 00259 pszLine = CPLReadLine( fp ); 00260 if( pszLine == NULL ) 00261 break; 00262 00263 pszWorkLine = (char *) 00264 CPLRealloc(pszWorkLine, 00265 strlen(pszWorkLine) + strlen(pszLine) + 1); 00266 strcat( pszWorkLine, pszLine ); 00267 } 00268 00269 papszReturn = CSLTokenizeStringComplex( pszWorkLine, ",", TRUE, TRUE ); 00270 00271 CPLFree( pszWorkLine ); 00272 00273 return papszReturn; 00274 } 00275 00276 /************************************************************************/ 00277 /* CSVCompare() */ 00278 /* */ 00279 /* Compare a field to a search value using a particular */ 00280 /* criteria. */ 00281 /************************************************************************/ 00282 00283 static int CSVCompare( const char * pszFieldValue, const char * pszTarget, 00284 CSVCompareCriteria eCriteria ) 00285 00286 { 00287 if( eCriteria == CC_ExactString ) 00288 { 00289 return( strcmp( pszFieldValue, pszTarget ) == 0 ); 00290 } 00291 else if( eCriteria == CC_ApproxString ) 00292 { 00293 return( EQUAL( pszFieldValue, pszTarget ) ); 00294 } 00295 else if( eCriteria == CC_Integer ) 00296 { 00297 return( atoi(pszFieldValue) == atoi(pszTarget) ); 00298 } 00299 00300 return FALSE; 00301 } 00302 00303 /************************************************************************/ 00304 /* CSVScanLines() */ 00305 /* */ 00306 /* Read the file scanline for lines where the key field equals */ 00307 /* the indicated value with the suggested comparison criteria. */ 00308 /* Return the first matching line split into fields. */ 00309 /************************************************************************/ 00310 00311 char **CSVScanLines( FILE *fp, int iKeyField, const char * pszValue, 00312 CSVCompareCriteria eCriteria ) 00313 00314 { 00315 char **papszFields = NULL; 00316 int bSelected = FALSE, nTestValue; 00317 00318 CPLAssert( pszValue != NULL ); 00319 CPLAssert( iKeyField >= 0 ); 00320 CPLAssert( fp != NULL ); 00321 00322 nTestValue = atoi(pszValue); 00323 00324 while( !bSelected ) { 00325 papszFields = CSVReadParseLine( fp ); 00326 if( papszFields == NULL ) 00327 return( NULL ); 00328 00329 if( CSLCount( papszFields ) < iKeyField+1 ) 00330 { 00331 /* not selected */ 00332 } 00333 else if( eCriteria == CC_Integer 00334 && atoi(papszFields[iKeyField]) == nTestValue ) 00335 { 00336 bSelected = TRUE; 00337 } 00338 else 00339 { 00340 bSelected = CSVCompare( papszFields[iKeyField], pszValue, 00341 eCriteria ); 00342 } 00343 00344 if( !bSelected ) 00345 { 00346 CSLDestroy( papszFields ); 00347 papszFields = NULL; 00348 } 00349 } 00350 00351 return( papszFields ); 00352 } 00353 00354 /************************************************************************/ 00355 /* CSVScanFile() */ 00356 /* */ 00357 /* Scan a whole file using criteria similar to above, but also */ 00358 /* taking care of file opening and closing. */ 00359 /************************************************************************/ 00360 00361 char **CSVScanFile( const char * pszFilename, int iKeyField, 00362 const char * pszValue, CSVCompareCriteria eCriteria ) 00363 00364 { 00365 CSVTable *psTable; 00366 00367 /* -------------------------------------------------------------------- */ 00368 /* Get access to the table. */ 00369 /* -------------------------------------------------------------------- */ 00370 CPLAssert( pszFilename != NULL ); 00371 00372 if( iKeyField < 0 ) 00373 return NULL; 00374 00375 psTable = CSVAccess( pszFilename ); 00376 if( psTable == NULL ) 00377 return NULL; 00378 00379 /* -------------------------------------------------------------------- */ 00380 /* Does the current record match the criteria? If so, just */ 00381 /* return it again. */ 00382 /* -------------------------------------------------------------------- */ 00383 if( iKeyField >= 0 00384 && iKeyField < CSLCount(psTable->papszRecFields) 00385 && CSVCompare(pszValue,psTable->papszRecFields[iKeyField],eCriteria) ) 00386 { 00387 return psTable->papszRecFields; 00388 } 00389 00390 /* -------------------------------------------------------------------- */ 00391 /* Scan the file from the beginning, replacing the ``current */ 00392 /* record'' in our structure with the one that is found. */ 00393 /* -------------------------------------------------------------------- */ 00394 VSIRewind( psTable->fp ); 00395 CPLReadLine( psTable->fp ); /* throw away the header line */ 00396 00397 CSLDestroy( psTable->papszRecFields ); 00398 psTable->papszRecFields = 00399 CSVScanLines( psTable->fp, iKeyField, pszValue, eCriteria ); 00400 00401 return( psTable->papszRecFields ); 00402 } 00403 00404 /************************************************************************/ 00405 /* CPLGetFieldId() */ 00406 /* */ 00407 /* Read the first record of a CSV file (rewinding to be sure), */ 00408 /* and find the field with the indicated name. Returns -1 if */ 00409 /* it fails to find the field name. Comparison is case */ 00410 /* insensitive, but otherwise exact. After this function has */ 00411 /* been called the file pointer will be positioned just after */ 00412 /* the first record. */ 00413 /************************************************************************/ 00414 00415 int CSVGetFieldId( FILE * fp, const char * pszFieldName ) 00416 00417 { 00418 char **papszFields; 00419 int i; 00420 00421 CPLAssert( fp != NULL && pszFieldName != NULL ); 00422 00423 VSIRewind( fp ); 00424 00425 papszFields = CSVReadParseLine( fp ); 00426 for( i = 0; papszFields != NULL && papszFields[i] != NULL; i++ ) 00427 { 00428 if( EQUAL(papszFields[i],pszFieldName) ) 00429 { 00430 CSLDestroy( papszFields ); 00431 return i; 00432 } 00433 } 00434 00435 CSLDestroy( papszFields ); 00436 00437 return -1; 00438 } 00439 00440 /************************************************************************/ 00441 /* CSVGetFileFieldId() */ 00442 /* */ 00443 /* Same as CPLGetFieldId(), except that we get the file based */ 00444 /* on filename, rather than having an existing handle. */ 00445 /************************************************************************/ 00446 00447 int CSVGetFileFieldId( const char * pszFilename, const char * pszFieldName ) 00448 00449 { 00450 CSVTable *psTable; 00451 int i; 00452 00453 /* -------------------------------------------------------------------- */ 00454 /* Get access to the table. */ 00455 /* -------------------------------------------------------------------- */ 00456 CPLAssert( pszFilename != NULL ); 00457 00458 psTable = CSVAccess( pszFilename ); 00459 if( psTable == NULL ) 00460 return -1; 00461 00462 /* -------------------------------------------------------------------- */ 00463 /* Find the requested field. */ 00464 /* -------------------------------------------------------------------- */ 00465 for( i = 0; 00466 psTable->papszFieldNames != NULL 00467 && psTable->papszFieldNames[i] != NULL; 00468 i++ ) 00469 { 00470 if( EQUAL(psTable->papszFieldNames[i],pszFieldName) ) 00471 { 00472 return i; 00473 } 00474 } 00475 00476 return -1; 00477 } 00478 00479 00480 /************************************************************************/ 00481 /* CSVScanFileByName() */ 00482 /* */ 00483 /* Same as CSVScanFile(), but using a field name instead of a */ 00484 /* field number. */ 00485 /************************************************************************/ 00486 00487 char **CSVScanFileByName( const char * pszFilename, 00488 const char * pszKeyFieldName, 00489 const char * pszValue, CSVCompareCriteria eCriteria ) 00490 00491 { 00492 int iKeyField; 00493 00494 iKeyField = CSVGetFileFieldId( pszFilename, pszKeyFieldName ); 00495 if( iKeyField == -1 ) 00496 return NULL; 00497 00498 return( CSVScanFile( pszFilename, iKeyField, pszValue, eCriteria ) ); 00499 } 00500 00501 /************************************************************************/ 00502 /* CSVGetField() */ 00503 /* */ 00504 /* The all-in-one function to fetch a particular field value */ 00505 /* from a CSV file. Note this function will return an empty */ 00506 /* string, rather than NULL if it fails to find the desired */ 00507 /* value for some reason. The caller can't establish that the */ 00508 /* fetch failed. */ 00509 /************************************************************************/ 00510 00511 const char *CSVGetField( const char * pszFilename, 00512 const char * pszKeyFieldName, 00513 const char * pszKeyFieldValue, 00514 CSVCompareCriteria eCriteria, 00515 const char * pszTargetField ) 00516 00517 { 00518 CSVTable *psTable; 00519 char **papszRecord; 00520 int iTargetField; 00521 00522 /* -------------------------------------------------------------------- */ 00523 /* Find the table. */ 00524 /* -------------------------------------------------------------------- */ 00525 psTable = CSVAccess( pszFilename ); 00526 if( psTable == NULL ) 00527 return ""; 00528 00529 /* -------------------------------------------------------------------- */ 00530 /* Find the correct record. */ 00531 /* -------------------------------------------------------------------- */ 00532 papszRecord = CSVScanFileByName( pszFilename, pszKeyFieldName, 00533 pszKeyFieldValue, eCriteria ); 00534 00535 if( papszRecord == NULL ) 00536 return ""; 00537 00538 /* -------------------------------------------------------------------- */ 00539 /* Figure out which field we want out of this. */ 00540 /* -------------------------------------------------------------------- */ 00541 iTargetField = CSVGetFileFieldId( pszFilename, pszTargetField ); 00542 if( iTargetField < 0 ) 00543 return ""; 00544 00545 if( iTargetField >= CSLCount( papszRecord ) ) 00546 return ""; 00547 00548 return( papszRecord[iTargetField] ); 00549 } 00550 00551 /************************************************************************/ 00552 /* CSVFilename() */ 00553 /* */ 00554 /* Return the full path to a particular CSV file. */ 00555 /************************************************************************/ 00556 00557 static const char *(*pfnCSVFilenameHook)(const char *) = NULL; 00558 00559 const char * CSVFilename( const char *pszBasename ) 00560 00561 { 00562 static char szPath[512]; 00563 00564 if( pfnCSVFilenameHook == NULL ) 00565 { 00566 FILE *fp = NULL; 00567 00568 if( getenv("GEOTIFF_CSV") != NULL ) 00569 { 00570 sprintf( szPath, "%s/%s", getenv("GEOTIFF_CSV"), pszBasename ); 00571 } 00572 #ifdef CSV_DATA_DIR 00573 else 00574 { 00575 sprintf( szPath, "%s/%s", CSV_DATA_DIR, pszBasename ); 00576 } 00577 #else 00578 else if( (fp = fopen( "/usr/local/share/epsg/csv/horiz_cs.csv", "rt" )) != NULL ) 00579 { 00580 sprintf( szPath, "/usr/local/share/epsg/csv/%s", pszBasename ); 00581 } 00582 else if( (fp = fopen( "csv/horiz_cs.csv", "rt" )) != NULL ) 00583 { 00584 sprintf( szPath, "csv/%s", pszBasename ); 00585 } 00586 else if( (fp = fopen( "share/epsg_csv/horiz_cs.csv", "rt" )) != NULL ) 00587 { 00588 sprintf( szPath, "share/epsg_csv/%s", pszBasename ); 00589 } 00590 else if( (fp = fopen( "/usr/share/epsg_csv/horiz_cs.csv", "rt" )) != NULL ) 00591 { 00592 sprintf( szPath, "/usr/share/epsg_csv/%s", pszBasename ); 00593 } 00594 else 00595 { 00596 sprintf( szPath, "/usr/local/share/epsg_csv/%s", pszBasename ); 00597 } 00598 #endif 00599 00600 if( fp != NULL ) 00601 fclose( fp ); 00602 00603 return( szPath ); 00604 } 00605 else 00606 return( pfnCSVFilenameHook( pszBasename ) ); 00607 } 00608 00609 /************************************************************************/ 00610 /* SetCSVFilenameHook() */ 00611 /* */ 00612 /* Applications can use this to set a function that will */ 00613 /* massage CSV filenames. */ 00614 /************************************************************************/ 00615 00660 void SetCSVFilenameHook( const char *(*pfnNewHook)( const char * ) ) 00661 00662 { 00663 pfnCSVFilenameHook = pfnNewHook; 00664 }