00001 /******************************************************************************
00002 * Copyright (c) 1999, Frank Warmerdam
00003 *
00004 * Permission is hereby granted, free of charge, to any person obtaining a
00005 * copy of this software and associated documentation files (the "Software"),
00006 * to deal in the Software without restriction, including without limitation
00007 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
00008 * and/or sell copies of the Software, and to permit persons to whom the
00009 * Software is furnished to do so, subject to the following conditions:
00010 *
00011 * The above copyright notice and this permission notice shall be included
00012 * in all copies or substantial portions of the Software.
00013 *
00014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00015 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
00017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
00019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
00020 * DEALINGS IN THE SOFTWARE.
00021 ******************************************************************************
00022 *
00023 * cpl_csv.c: Support functions for accessing CSV files.
00024 *
00025 * $Log$
00025 * Revision 1.2 2001/03/05 04:58:33 warmerda
00025 * updated
00025 *
00026 * Revision 1.10 2001/01/17 15:32:19 warmerda
00027 * Include /usr/share/epsg_csv and share/epsg_csv in csv search path.
00028 *
00029 * Revision 1.9 2000/12/12 19:34:36 warmerda
00030 * Use CSV_DATA_DIR if defined.
00031 *
00032 * Revision 1.8 2000/08/22 04:33:33 warmerda
00033 * added support for /usr/local/shared/epsg_csv
00034 *
00035 * Revision 1.7 1999/12/03 14:42:59 warmerda
00036 * Passing a NULL filename into CSVAccess() now results in a graceful
00037 * failure to open the file.
00038 *
00039 * Revision 1.6 1999/06/26 17:28:51 warmerda
00040 * Fixed reading of records with newlines embedded in quoted strings.
00041 *
00042 * Revision 1.5 1999/05/04 03:07:24 warmerda
00043 * avoid warning
00044 *
00045 * Revision 1.4 1999/04/28 19:59:56 warmerda
00046 * added some doxygen style documentation
00047 *
00048 * Revision 1.3 1999/03/17 19:53:15 geotiff
00049 * sys includes moved to cpl_serv.h
00050 *
00051 * Revision 1.2 1999/03/10 16:54:42 geotiff
00052 * Added use of the GEOTIFF_CSV environment variable to locate CSV files.
00053 *
00054 * Revision 1.1 1999/03/09 15:57:04 geotiff
00055 * New
00056 *
00057 * Revision 1.2 1999/02/24 16:23:21 warmerda
00058 * added lots
00059 *
00060 * Revision 1.1 1999/01/05 16:52:36 warmerda
00061 * New
00062 *
00063 */
00064
00065 #include "cpl_csv.h"
00066 #include "geo_tiffp.h"
00067
00068 /* ==================================================================== */
00069 /* The CSVTable is a persistant set of info about an open CSV */
00070 /* table. While it doesn't currently maintain a record index, */
00071 /* or in-memory copy of the table, it could be changed to do so */
00072 /* in the future. */
00073 /* ==================================================================== */
00074 typedef struct ctb {
00075 FILE *fp;
00076
00077 struct ctb *psNext;
00078
00079 char *pszFilename;
00080
00081 char **papszFieldNames;
00082
00083 char **papszRecFields;
00084 } CSVTable;
00085
00086 static CSVTable *psCSVTableList = NULL;
00087
00088 /************************************************************************/
00089 /* CSVAccess() */
00090 /* */
00091 /* This function will fetch a handle to the requested table. */
00092 /* If not found in the ``open table list'' the table will be */
00093 /* opened and added to the list. Eventually this function may */
00094 /* become public with an abstracted return type so that */
00095 /* applications can set options about the table. For now this */
00096 /* isn't done. */
00097 /************************************************************************/
00098
00099 static CSVTable *CSVAccess( const char * pszFilename )
00100
00101 {
00102 CSVTable *psTable;
00103 FILE *fp;
00104
00105 if( pszFilename == NULL )
00106 return NULL;
00107
00108 /* -------------------------------------------------------------------- */
00109 /* Is the table already in the list. */
00110 /* -------------------------------------------------------------------- */
00111 for( psTable = psCSVTableList; psTable != NULL; psTable = psTable->psNext )
00112 {
00113 if( EQUAL(psTable->pszFilename,pszFilename) )
00114 {
00115 /*
00116 * Eventually we should consider promoting to the front of
00117 * the list to accelerate frequently accessed tables.
00118 */
00119
00120 return( psTable );
00121 }
00122 }
00123
00124 /* -------------------------------------------------------------------- */
00125 /* If not, try to open it. */
00126 /* -------------------------------------------------------------------- */
00127 fp = VSIFOpen( pszFilename, "r" );
00128 if( fp == NULL )
00129 return NULL;
00130
00131 /* -------------------------------------------------------------------- */
00132 /* Create an information structure about this table, and add to */
00133 /* the front of the list. */
00134 /* -------------------------------------------------------------------- */
00135 psTable = (CSVTable *) CPLCalloc(sizeof(CSVTable),1);
00136
00137 psTable->fp = fp;
00138 psTable->pszFilename = CPLStrdup( pszFilename );
00139 psTable->psNext = psCSVTableList;
00140
00141 psCSVTableList = psTable;
00142
00143 /* -------------------------------------------------------------------- */
00144 /* Read the table header record containing the field names. */
00145 /* -------------------------------------------------------------------- */
00146 psTable->papszFieldNames = CSVReadParseLine( fp );
00147
00148 return( psTable );
00149 }
00150
00151 /************************************************************************/
00152 /* CSVDeaccess() */
00153 /************************************************************************/
00154
00155 void CSVDeaccess( const char * pszFilename )
00156
00157 {
00158 CSVTable *psLast, *psTable;
00159
00160 /* -------------------------------------------------------------------- */
00161 /* A NULL means deaccess all tables. */
00162 /* -------------------------------------------------------------------- */
00163 if( pszFilename == NULL )
00164 {
00165 while( psCSVTableList != NULL )
00166 CSVDeaccess( psCSVTableList->pszFilename );
00167
00168 return;
00169 }
00170
00171 /* -------------------------------------------------------------------- */
00172 /* Find this table. */
00173 /* -------------------------------------------------------------------- */
00174 psLast = NULL;
00175 for( psTable = psCSVTableList;
00176 psTable != NULL && !EQUAL(psTable->pszFilename,pszFilename);
00177 psTable = psTable->psNext )
00178 {
00179 psLast = psTable;
00180 }
00181
00182 if( psTable == NULL )
00183 return;
00184
00185 /* -------------------------------------------------------------------- */
00186 /* Remove the link from the list. */
00187 /* -------------------------------------------------------------------- */
00188 if( psLast != NULL )
00189 psLast->psNext = psTable->psNext;
00190 else
00191 psCSVTableList = psTable->psNext;
00192
00193 /* -------------------------------------------------------------------- */
00194 /* Free the table. */
00195 /* -------------------------------------------------------------------- */
00196 VSIFClose( psTable->fp );
00197
00198 CSLDestroy( psTable->papszFieldNames );
00199 CSLDestroy( psTable->papszRecFields );
00200 CPLFree( psTable->pszFilename );
00201
00202 CPLFree( psTable );
00203
00204 /* -------------------------------------------------------------------- */
00205 /* Free working buffer in CPLReadLine(). */
00206 /* -------------------------------------------------------------------- */
00207 CPLReadLine( NULL );
00208 }
00209
00210 /************************************************************************/
00211 /* CSVReadParseLine() */
00212 /* */
00213 /* Read one line, and return split into fields. The return */
00214 /* result is a stringlist, in the sense of the CSL functions. */
00215 /************************************************************************/
00216
00217 char **CSVReadParseLine( FILE * fp )
00218
00219 {
00220 const char *pszLine;
00221 char *pszWorkLine;
00222 char **papszReturn;
00223
00224 CPLAssert( fp != NULL );
00225 if( fp == NULL )
00226 return( NULL );
00227
00228 pszLine = CPLReadLine( fp );
00229 if( pszLine == NULL )
00230 return( NULL );
00231
00232 /* -------------------------------------------------------------------- */
00233 /* If there are no quotes, then this is the simple case. */
00234 /* Parse, and return tokens. */
00235 /* -------------------------------------------------------------------- */
00236 if( strchr(pszLine,'\"') == NULL )
00237 return CSLTokenizeStringComplex( pszLine, ",", TRUE, TRUE );
00238
00239 /* -------------------------------------------------------------------- */
00240 /* We must now count the quotes in our working string, and as */
00241 /* long as it is odd, keep adding new lines. */
00242 /* -------------------------------------------------------------------- */
00243 pszWorkLine = CPLStrdup( pszLine );
00244
00245 while( TRUE )
00246 {
00247 int i, nCount = 0;
00248
00249 for( i = 0; pszWorkLine[i] != '\0'; i++ )
00250 {
00251 if( pszWorkLine[i] == '\"'
00252 && (i == 0 || pszWorkLine[i-1] != '\\') )
00253 nCount++;
00254 }
00255
00256 if( nCount % 2 == 0 )
00257 break;
00258
00259 pszLine = CPLReadLine( fp );
00260 if( pszLine == NULL )
00261 break;
00262
00263 pszWorkLine = (char *)
00264 CPLRealloc(pszWorkLine,
00265 strlen(pszWorkLine) + strlen(pszLine) + 1);
00266 strcat( pszWorkLine, pszLine );
00267 }
00268
00269 papszReturn = CSLTokenizeStringComplex( pszWorkLine, ",", TRUE, TRUE );
00270
00271 CPLFree( pszWorkLine );
00272
00273 return papszReturn;
00274 }
00275
00276 /************************************************************************/
00277 /* CSVCompare() */
00278 /* */
00279 /* Compare a field to a search value using a particular */
00280 /* criteria. */
00281 /************************************************************************/
00282
00283 static int CSVCompare( const char * pszFieldValue, const char * pszTarget,
00284 CSVCompareCriteria eCriteria )
00285
00286 {
00287 if( eCriteria == CC_ExactString )
00288 {
00289 return( strcmp( pszFieldValue, pszTarget ) == 0 );
00290 }
00291 else if( eCriteria == CC_ApproxString )
00292 {
00293 return( EQUAL( pszFieldValue, pszTarget ) );
00294 }
00295 else if( eCriteria == CC_Integer )
00296 {
00297 return( atoi(pszFieldValue) == atoi(pszTarget) );
00298 }
00299
00300 return FALSE;
00301 }
00302
00303 /************************************************************************/
00304 /* CSVScanLines() */
00305 /* */
00306 /* Read the file scanline for lines where the key field equals */
00307 /* the indicated value with the suggested comparison criteria. */
00308 /* Return the first matching line split into fields. */
00309 /************************************************************************/
00310
00311 char **CSVScanLines( FILE *fp, int iKeyField, const char * pszValue,
00312 CSVCompareCriteria eCriteria )
00313
00314 {
00315 char **papszFields = NULL;
00316 int bSelected = FALSE, nTestValue;
00317
00318 CPLAssert( pszValue != NULL );
00319 CPLAssert( iKeyField >= 0 );
00320 CPLAssert( fp != NULL );
00321
00322 nTestValue = atoi(pszValue);
00323
00324 while( !bSelected ) {
00325 papszFields = CSVReadParseLine( fp );
00326 if( papszFields == NULL )
00327 return( NULL );
00328
00329 if( CSLCount( papszFields ) < iKeyField+1 )
00330 {
00331 /* not selected */
00332 }
00333 else if( eCriteria == CC_Integer
00334 && atoi(papszFields[iKeyField]) == nTestValue )
00335 {
00336 bSelected = TRUE;
00337 }
00338 else
00339 {
00340 bSelected = CSVCompare( papszFields[iKeyField], pszValue,
00341 eCriteria );
00342 }
00343
00344 if( !bSelected )
00345 {
00346 CSLDestroy( papszFields );
00347 papszFields = NULL;
00348 }
00349 }
00350
00351 return( papszFields );
00352 }
00353
00354 /************************************************************************/
00355 /* CSVScanFile() */
00356 /* */
00357 /* Scan a whole file using criteria similar to above, but also */
00358 /* taking care of file opening and closing. */
00359 /************************************************************************/
00360
00361 char **CSVScanFile( const char * pszFilename, int iKeyField,
00362 const char * pszValue, CSVCompareCriteria eCriteria )
00363
00364 {
00365 CSVTable *psTable;
00366
00367 /* -------------------------------------------------------------------- */
00368 /* Get access to the table. */
00369 /* -------------------------------------------------------------------- */
00370 CPLAssert( pszFilename != NULL );
00371
00372 if( iKeyField < 0 )
00373 return NULL;
00374
00375 psTable = CSVAccess( pszFilename );
00376 if( psTable == NULL )
00377 return NULL;
00378
00379 /* -------------------------------------------------------------------- */
00380 /* Does the current record match the criteria? If so, just */
00381 /* return it again. */
00382 /* -------------------------------------------------------------------- */
00383 if( iKeyField >= 0
00384 && iKeyField < CSLCount(psTable->papszRecFields)
00385 && CSVCompare(pszValue,psTable->papszRecFields[iKeyField],eCriteria) )
00386 {
00387 return psTable->papszRecFields;
00388 }
00389
00390 /* -------------------------------------------------------------------- */
00391 /* Scan the file from the beginning, replacing the ``current */
00392 /* record'' in our structure with the one that is found. */
00393 /* -------------------------------------------------------------------- */
00394 VSIRewind( psTable->fp );
00395 CPLReadLine( psTable->fp ); /* throw away the header line */
00396
00397 CSLDestroy( psTable->papszRecFields );
00398 psTable->papszRecFields =
00399 CSVScanLines( psTable->fp, iKeyField, pszValue, eCriteria );
00400
00401 return( psTable->papszRecFields );
00402 }
00403
00404 /************************************************************************/
00405 /* CPLGetFieldId() */
00406 /* */
00407 /* Read the first record of a CSV file (rewinding to be sure), */
00408 /* and find the field with the indicated name. Returns -1 if */
00409 /* it fails to find the field name. Comparison is case */
00410 /* insensitive, but otherwise exact. After this function has */
00411 /* been called the file pointer will be positioned just after */
00412 /* the first record. */
00413 /************************************************************************/
00414
00415 int CSVGetFieldId( FILE * fp, const char * pszFieldName )
00416
00417 {
00418 char **papszFields;
00419 int i;
00420
00421 CPLAssert( fp != NULL && pszFieldName != NULL );
00422
00423 VSIRewind( fp );
00424
00425 papszFields = CSVReadParseLine( fp );
00426 for( i = 0; papszFields != NULL && papszFields[i] != NULL; i++ )
00427 {
00428 if( EQUAL(papszFields[i],pszFieldName) )
00429 {
00430 CSLDestroy( papszFields );
00431 return i;
00432 }
00433 }
00434
00435 CSLDestroy( papszFields );
00436
00437 return -1;
00438 }
00439
00440 /************************************************************************/
00441 /* CSVGetFileFieldId() */
00442 /* */
00443 /* Same as CPLGetFieldId(), except that we get the file based */
00444 /* on filename, rather than having an existing handle. */
00445 /************************************************************************/
00446
00447 int CSVGetFileFieldId( const char * pszFilename, const char * pszFieldName )
00448
00449 {
00450 CSVTable *psTable;
00451 int i;
00452
00453 /* -------------------------------------------------------------------- */
00454 /* Get access to the table. */
00455 /* -------------------------------------------------------------------- */
00456 CPLAssert( pszFilename != NULL );
00457
00458 psTable = CSVAccess( pszFilename );
00459 if( psTable == NULL )
00460 return -1;
00461
00462 /* -------------------------------------------------------------------- */
00463 /* Find the requested field. */
00464 /* -------------------------------------------------------------------- */
00465 for( i = 0;
00466 psTable->papszFieldNames != NULL
00467 && psTable->papszFieldNames[i] != NULL;
00468 i++ )
00469 {
00470 if( EQUAL(psTable->papszFieldNames[i],pszFieldName) )
00471 {
00472 return i;
00473 }
00474 }
00475
00476 return -1;
00477 }
00478
00479
00480 /************************************************************************/
00481 /* CSVScanFileByName() */
00482 /* */
00483 /* Same as CSVScanFile(), but using a field name instead of a */
00484 /* field number. */
00485 /************************************************************************/
00486
00487 char **CSVScanFileByName( const char * pszFilename,
00488 const char * pszKeyFieldName,
00489 const char * pszValue, CSVCompareCriteria eCriteria )
00490
00491 {
00492 int iKeyField;
00493
00494 iKeyField = CSVGetFileFieldId( pszFilename, pszKeyFieldName );
00495 if( iKeyField == -1 )
00496 return NULL;
00497
00498 return( CSVScanFile( pszFilename, iKeyField, pszValue, eCriteria ) );
00499 }
00500
00501 /************************************************************************/
00502 /* CSVGetField() */
00503 /* */
00504 /* The all-in-one function to fetch a particular field value */
00505 /* from a CSV file. Note this function will return an empty */
00506 /* string, rather than NULL if it fails to find the desired */
00507 /* value for some reason. The caller can't establish that the */
00508 /* fetch failed. */
00509 /************************************************************************/
00510
00511 const char *CSVGetField( const char * pszFilename,
00512 const char * pszKeyFieldName,
00513 const char * pszKeyFieldValue,
00514 CSVCompareCriteria eCriteria,
00515 const char * pszTargetField )
00516
00517 {
00518 CSVTable *psTable;
00519 char **papszRecord;
00520 int iTargetField;
00521
00522 /* -------------------------------------------------------------------- */
00523 /* Find the table. */
00524 /* -------------------------------------------------------------------- */
00525 psTable = CSVAccess( pszFilename );
00526 if( psTable == NULL )
00527 return "";
00528
00529 /* -------------------------------------------------------------------- */
00530 /* Find the correct record. */
00531 /* -------------------------------------------------------------------- */
00532 papszRecord = CSVScanFileByName( pszFilename, pszKeyFieldName,
00533 pszKeyFieldValue, eCriteria );
00534
00535 if( papszRecord == NULL )
00536 return "";
00537
00538 /* -------------------------------------------------------------------- */
00539 /* Figure out which field we want out of this. */
00540 /* -------------------------------------------------------------------- */
00541 iTargetField = CSVGetFileFieldId( pszFilename, pszTargetField );
00542 if( iTargetField < 0 )
00543 return "";
00544
00545 if( iTargetField >= CSLCount( papszRecord ) )
00546 return "";
00547
00548 return( papszRecord[iTargetField] );
00549 }
00550
00551 /************************************************************************/
00552 /* CSVFilename() */
00553 /* */
00554 /* Return the full path to a particular CSV file. */
00555 /************************************************************************/
00556
00557 static const char *(*pfnCSVFilenameHook)(const char *) = NULL;
00558
00559 const char * CSVFilename( const char *pszBasename )
00560
00561 {
00562 static char szPath[512];
00563
00564 if( pfnCSVFilenameHook == NULL )
00565 {
00566 FILE *fp = NULL;
00567
00568 if( getenv("GEOTIFF_CSV") != NULL )
00569 {
00570 sprintf( szPath, "%s/%s", getenv("GEOTIFF_CSV"), pszBasename );
00571 }
00572 #ifdef CSV_DATA_DIR
00573 else
00574 {
00575 sprintf( szPath, "%s/%s", CSV_DATA_DIR, pszBasename );
00576 }
00577 #else
00578 else if( (fp = fopen( "/usr/local/share/epsg/csv/horiz_cs.csv", "rt" )) != NULL )
00579 {
00580 sprintf( szPath, "/usr/local/share/epsg/csv/%s", pszBasename );
00581 }
00582 else if( (fp = fopen( "csv/horiz_cs.csv", "rt" )) != NULL )
00583 {
00584 sprintf( szPath, "csv/%s", pszBasename );
00585 }
00586 else if( (fp = fopen( "share/epsg_csv/horiz_cs.csv", "rt" )) != NULL )
00587 {
00588 sprintf( szPath, "share/epsg_csv/%s", pszBasename );
00589 }
00590 else if( (fp = fopen( "/usr/share/epsg_csv/horiz_cs.csv", "rt" )) != NULL )
00591 {
00592 sprintf( szPath, "/usr/share/epsg_csv/%s", pszBasename );
00593 }
00594 else
00595 {
00596 sprintf( szPath, "/usr/local/share/epsg_csv/%s", pszBasename );
00597 }
00598 #endif
00599
00600 if( fp != NULL )
00601 fclose( fp );
00602
00603 return( szPath );
00604 }
00605 else
00606 return( pfnCSVFilenameHook( pszBasename ) );
00607 }
00608
00609 /************************************************************************/
00610 /* SetCSVFilenameHook() */
00611 /* */
00612 /* Applications can use this to set a function that will */
00613 /* massage CSV filenames. */
00614 /************************************************************************/
00615
00660 void SetCSVFilenameHook( const char *(*pfnNewHook)( const char * ) )
00661
00662 {
00663 pfnCSVFilenameHook = pfnNewHook;
00664 }
1.2.3-20001105 written by Dimitri van Heesch,
© 1997-2000