Main Page   Compound List   File List   Compound Members   File Members   Related Pages  

cpl_csv.c

00001 /******************************************************************************
00002  * Copyright (c) 1999, Frank Warmerdam
00003  *
00004  * Permission is hereby granted, free of charge, to any person obtaining a
00005  * copy of this software and associated documentation files (the "Software"),
00006  * to deal in the Software without restriction, including without limitation
00007  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
00008  * and/or sell copies of the Software, and to permit persons to whom the
00009  * Software is furnished to do so, subject to the following conditions:
00010  *
00011  * The above copyright notice and this permission notice shall be included
00012  * in all copies or substantial portions of the Software.
00013  *
00014  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00015  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00016  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
00017  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00018  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
00019  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
00020  * DEALINGS IN THE SOFTWARE.
00021  ******************************************************************************
00022  *
00023  * cpl_csv.c: Support functions for accessing CSV files.
00024  *
00025  * $Log$
00025  * Revision 1.2  2001/03/05 04:58:33  warmerda
00025  * updated
00025  *
00026  * Revision 1.10  2001/01/17 15:32:19  warmerda
00027  * Include /usr/share/epsg_csv and share/epsg_csv in csv search path.
00028  *
00029  * Revision 1.9  2000/12/12 19:34:36  warmerda
00030  * Use CSV_DATA_DIR if defined.
00031  *
00032  * Revision 1.8  2000/08/22 04:33:33  warmerda
00033  * added support for /usr/local/shared/epsg_csv
00034  *
00035  * Revision 1.7  1999/12/03 14:42:59  warmerda
00036  * Passing a NULL filename into CSVAccess() now results in a graceful
00037  * failure to open the file.
00038  *
00039  * Revision 1.6  1999/06/26 17:28:51  warmerda
00040  * Fixed reading of records with newlines embedded in quoted strings.
00041  *
00042  * Revision 1.5  1999/05/04 03:07:24  warmerda
00043  * avoid warning
00044  *
00045  * Revision 1.4  1999/04/28 19:59:56  warmerda
00046  * added some doxygen style documentation
00047  *
00048  * Revision 1.3  1999/03/17 19:53:15  geotiff
00049  * sys includes moved to cpl_serv.h
00050  *
00051  * Revision 1.2  1999/03/10 16:54:42  geotiff
00052  * Added use of the GEOTIFF_CSV environment variable to locate CSV files.
00053  *
00054  * Revision 1.1  1999/03/09 15:57:04  geotiff
00055  * New
00056  *
00057  * Revision 1.2  1999/02/24 16:23:21  warmerda
00058  * added lots
00059  *
00060  * Revision 1.1  1999/01/05 16:52:36  warmerda
00061  * New
00062  *
00063  */
00064 
00065 #include "cpl_csv.h"
00066 #include "geo_tiffp.h"
00067 
00068 /* ==================================================================== */
00069 /*      The CSVTable is a persistant set of info about an open CSV      */
00070 /*      table.  While it doesn't currently maintain a record index,     */
00071 /*      or in-memory copy of the table, it could be changed to do so    */
00072 /*      in the future.                                                  */
00073 /* ==================================================================== */
00074 typedef struct ctb {
00075     FILE        *fp;
00076 
00077     struct ctb *psNext;
00078 
00079     char        *pszFilename;
00080 
00081     char        **papszFieldNames;
00082 
00083     char        **papszRecFields;
00084 } CSVTable;
00085 
00086 static CSVTable *psCSVTableList = NULL;
00087 
00088 /************************************************************************/
00089 /*                             CSVAccess()                              */
00090 /*                                                                      */
00091 /*      This function will fetch a handle to the requested table.       */
00092 /*      If not found in the ``open table list'' the table will be       */
00093 /*      opened and added to the list.  Eventually this function may     */
00094 /*      become public with an abstracted return type so that            */
00095 /*      applications can set options about the table.  For now this     */
00096 /*      isn't done.                                                     */
00097 /************************************************************************/
00098 
00099 static CSVTable *CSVAccess( const char * pszFilename )
00100 
00101 {
00102     CSVTable    *psTable;
00103     FILE        *fp;
00104 
00105     if( pszFilename == NULL )
00106         return NULL;
00107 
00108 /* -------------------------------------------------------------------- */
00109 /*      Is the table already in the list.                               */
00110 /* -------------------------------------------------------------------- */
00111     for( psTable = psCSVTableList; psTable != NULL; psTable = psTable->psNext )
00112     {
00113         if( EQUAL(psTable->pszFilename,pszFilename) )
00114         {
00115             /*
00116              * Eventually we should consider promoting to the front of
00117              * the list to accelerate frequently accessed tables.
00118              */
00119             
00120             return( psTable );
00121         }
00122     }
00123 
00124 /* -------------------------------------------------------------------- */
00125 /*      If not, try to open it.                                         */
00126 /* -------------------------------------------------------------------- */
00127     fp = VSIFOpen( pszFilename, "r" );
00128     if( fp == NULL )
00129         return NULL;
00130 
00131 /* -------------------------------------------------------------------- */
00132 /*      Create an information structure about this table, and add to    */
00133 /*      the front of the list.                                          */
00134 /* -------------------------------------------------------------------- */
00135     psTable = (CSVTable *) CPLCalloc(sizeof(CSVTable),1);
00136 
00137     psTable->fp = fp;
00138     psTable->pszFilename = CPLStrdup( pszFilename );
00139     psTable->psNext = psCSVTableList;
00140     
00141     psCSVTableList = psTable;
00142 
00143 /* -------------------------------------------------------------------- */
00144 /*      Read the table header record containing the field names.        */
00145 /* -------------------------------------------------------------------- */
00146     psTable->papszFieldNames = CSVReadParseLine( fp );
00147 
00148     return( psTable );
00149 }
00150 
00151 /************************************************************************/
00152 /*                            CSVDeaccess()                             */
00153 /************************************************************************/
00154 
00155 void CSVDeaccess( const char * pszFilename )
00156 
00157 {
00158     CSVTable    *psLast, *psTable;
00159     
00160 /* -------------------------------------------------------------------- */
00161 /*      A NULL means deaccess all tables.                               */
00162 /* -------------------------------------------------------------------- */
00163     if( pszFilename == NULL )
00164     {
00165         while( psCSVTableList != NULL )
00166             CSVDeaccess( psCSVTableList->pszFilename );
00167         
00168         return;
00169     }
00170 
00171 /* -------------------------------------------------------------------- */
00172 /*      Find this table.                                                */
00173 /* -------------------------------------------------------------------- */
00174     psLast = NULL;
00175     for( psTable = psCSVTableList;
00176          psTable != NULL && !EQUAL(psTable->pszFilename,pszFilename);
00177          psTable = psTable->psNext )
00178     {
00179         psLast = psTable;
00180     }
00181 
00182     if( psTable == NULL )
00183         return;
00184 
00185 /* -------------------------------------------------------------------- */
00186 /*      Remove the link from the list.                                  */
00187 /* -------------------------------------------------------------------- */
00188     if( psLast != NULL )
00189         psLast->psNext = psTable->psNext;
00190     else
00191         psCSVTableList = psTable->psNext;
00192 
00193 /* -------------------------------------------------------------------- */
00194 /*      Free the table.                                                 */
00195 /* -------------------------------------------------------------------- */
00196     VSIFClose( psTable->fp );
00197 
00198     CSLDestroy( psTable->papszFieldNames );
00199     CSLDestroy( psTable->papszRecFields );
00200     CPLFree( psTable->pszFilename );
00201 
00202     CPLFree( psTable );
00203 
00204 /* -------------------------------------------------------------------- */
00205 /*      Free working buffer in CPLReadLine().                           */
00206 /* -------------------------------------------------------------------- */
00207     CPLReadLine( NULL );
00208 }
00209 
00210 /************************************************************************/
00211 /*                          CSVReadParseLine()                          */
00212 /*                                                                      */
00213 /*      Read one line, and return split into fields.  The return        */
00214 /*      result is a stringlist, in the sense of the CSL functions.      */
00215 /************************************************************************/
00216 
00217 char **CSVReadParseLine( FILE * fp )
00218 
00219 {
00220     const char  *pszLine;
00221     char        *pszWorkLine;
00222     char        **papszReturn;
00223 
00224     CPLAssert( fp != NULL );
00225     if( fp == NULL )
00226         return( NULL );
00227     
00228     pszLine = CPLReadLine( fp );
00229     if( pszLine == NULL )
00230         return( NULL );
00231 
00232 /* -------------------------------------------------------------------- */
00233 /*      If there are no quotes, then this is the simple case.           */
00234 /*      Parse, and return tokens.                                       */
00235 /* -------------------------------------------------------------------- */
00236     if( strchr(pszLine,'\"') == NULL )
00237         return CSLTokenizeStringComplex( pszLine, ",", TRUE, TRUE );
00238 
00239 /* -------------------------------------------------------------------- */
00240 /*      We must now count the quotes in our working string, and as      */
00241 /*      long as it is odd, keep adding new lines.                       */
00242 /* -------------------------------------------------------------------- */
00243     pszWorkLine = CPLStrdup( pszLine );
00244 
00245     while( TRUE )
00246     {
00247         int             i, nCount = 0;
00248 
00249         for( i = 0; pszWorkLine[i] != '\0'; i++ )
00250         {
00251             if( pszWorkLine[i] == '\"'
00252                 && (i == 0 || pszWorkLine[i-1] != '\\') )
00253                 nCount++;
00254         }
00255 
00256         if( nCount % 2 == 0 )
00257             break;
00258 
00259         pszLine = CPLReadLine( fp );
00260         if( pszLine == NULL )
00261             break;
00262 
00263         pszWorkLine = (char *)
00264             CPLRealloc(pszWorkLine,
00265                        strlen(pszWorkLine) + strlen(pszLine) + 1);
00266         strcat( pszWorkLine, pszLine );
00267     }
00268     
00269     papszReturn = CSLTokenizeStringComplex( pszWorkLine, ",", TRUE, TRUE );
00270 
00271     CPLFree( pszWorkLine );
00272 
00273     return papszReturn;
00274 }
00275 
00276 /************************************************************************/
00277 /*                             CSVCompare()                             */
00278 /*                                                                      */
00279 /*      Compare a field to a search value using a particular            */
00280 /*      criteria.                                                       */
00281 /************************************************************************/
00282 
00283 static int CSVCompare( const char * pszFieldValue, const char * pszTarget,
00284                        CSVCompareCriteria eCriteria )
00285 
00286 {
00287     if( eCriteria == CC_ExactString )
00288     {
00289         return( strcmp( pszFieldValue, pszTarget ) == 0 );
00290     }
00291     else if( eCriteria == CC_ApproxString )
00292     {
00293         return( EQUAL( pszFieldValue, pszTarget ) );
00294     }
00295     else if( eCriteria == CC_Integer )
00296     {
00297         return( atoi(pszFieldValue) == atoi(pszTarget) );
00298     }
00299 
00300     return FALSE;
00301 }
00302 
00303 /************************************************************************/
00304 /*                            CSVScanLines()                            */
00305 /*                                                                      */
00306 /*      Read the file scanline for lines where the key field equals     */
00307 /*      the indicated value with the suggested comparison criteria.     */
00308 /*      Return the first matching line split into fields.               */
00309 /************************************************************************/
00310 
00311 char **CSVScanLines( FILE *fp, int iKeyField, const char * pszValue,
00312                      CSVCompareCriteria eCriteria )
00313 
00314 {
00315     char        **papszFields = NULL;
00316     int         bSelected = FALSE, nTestValue;
00317 
00318     CPLAssert( pszValue != NULL );
00319     CPLAssert( iKeyField >= 0 );
00320     CPLAssert( fp != NULL );
00321     
00322     nTestValue = atoi(pszValue);
00323     
00324     while( !bSelected ) {
00325         papszFields = CSVReadParseLine( fp );
00326         if( papszFields == NULL )
00327             return( NULL );
00328 
00329         if( CSLCount( papszFields ) < iKeyField+1 )
00330         {
00331             /* not selected */
00332         }
00333         else if( eCriteria == CC_Integer
00334                  && atoi(papszFields[iKeyField]) == nTestValue )
00335         {
00336             bSelected = TRUE;
00337         }
00338         else
00339         {
00340             bSelected = CSVCompare( papszFields[iKeyField], pszValue,
00341                                     eCriteria );
00342         }
00343 
00344         if( !bSelected )
00345         {
00346             CSLDestroy( papszFields );
00347             papszFields = NULL;
00348         }
00349     }
00350     
00351     return( papszFields );
00352 }
00353 
00354 /************************************************************************/
00355 /*                            CSVScanFile()                             */
00356 /*                                                                      */
00357 /*      Scan a whole file using criteria similar to above, but also     */
00358 /*      taking care of file opening and closing.                        */
00359 /************************************************************************/
00360 
00361 char **CSVScanFile( const char * pszFilename, int iKeyField,
00362                     const char * pszValue, CSVCompareCriteria eCriteria )
00363 
00364 {
00365     CSVTable    *psTable;
00366 
00367 /* -------------------------------------------------------------------- */
00368 /*      Get access to the table.                                        */
00369 /* -------------------------------------------------------------------- */
00370     CPLAssert( pszFilename != NULL );
00371 
00372     if( iKeyField < 0 )
00373         return NULL;
00374 
00375     psTable = CSVAccess( pszFilename );
00376     if( psTable == NULL )
00377         return NULL;
00378 
00379 /* -------------------------------------------------------------------- */
00380 /*      Does the current record match the criteria?  If so, just        */
00381 /*      return it again.                                                */
00382 /* -------------------------------------------------------------------- */
00383     if( iKeyField >= 0
00384         && iKeyField < CSLCount(psTable->papszRecFields)
00385         && CSVCompare(pszValue,psTable->papszRecFields[iKeyField],eCriteria) )
00386     {
00387         return psTable->papszRecFields;
00388     }
00389 
00390 /* -------------------------------------------------------------------- */
00391 /*      Scan the file from the beginning, replacing the ``current       */
00392 /*      record'' in our structure with the one that is found.           */
00393 /* -------------------------------------------------------------------- */
00394     VSIRewind( psTable->fp );
00395     CPLReadLine( psTable->fp );         /* throw away the header line */
00396     
00397     CSLDestroy( psTable->papszRecFields );
00398     psTable->papszRecFields =
00399         CSVScanLines( psTable->fp, iKeyField, pszValue, eCriteria );
00400 
00401     return( psTable->papszRecFields );
00402 }
00403 
00404 /************************************************************************/
00405 /*                           CPLGetFieldId()                            */
00406 /*                                                                      */
00407 /*      Read the first record of a CSV file (rewinding to be sure),     */
00408 /*      and find the field with the indicated name.  Returns -1 if      */
00409 /*      it fails to find the field name.  Comparison is case            */
00410 /*      insensitive, but otherwise exact.  After this function has      */
00411 /*      been called the file pointer will be positioned just after      */
00412 /*      the first record.                                               */
00413 /************************************************************************/
00414 
00415 int CSVGetFieldId( FILE * fp, const char * pszFieldName )
00416 
00417 {
00418     char        **papszFields;
00419     int         i;
00420     
00421     CPLAssert( fp != NULL && pszFieldName != NULL );
00422 
00423     VSIRewind( fp );
00424 
00425     papszFields = CSVReadParseLine( fp );
00426     for( i = 0; papszFields != NULL && papszFields[i] != NULL; i++ )
00427     {
00428         if( EQUAL(papszFields[i],pszFieldName) )
00429         {
00430             CSLDestroy( papszFields );
00431             return i;
00432         }
00433     }
00434 
00435     CSLDestroy( papszFields );
00436 
00437     return -1;
00438 }
00439 
00440 /************************************************************************/
00441 /*                         CSVGetFileFieldId()                          */
00442 /*                                                                      */
00443 /*      Same as CPLGetFieldId(), except that we get the file based      */
00444 /*      on filename, rather than having an existing handle.             */
00445 /************************************************************************/
00446 
00447 int CSVGetFileFieldId( const char * pszFilename, const char * pszFieldName )
00448 
00449 {
00450     CSVTable    *psTable;
00451     int         i;
00452     
00453 /* -------------------------------------------------------------------- */
00454 /*      Get access to the table.                                        */
00455 /* -------------------------------------------------------------------- */
00456     CPLAssert( pszFilename != NULL );
00457 
00458     psTable = CSVAccess( pszFilename );
00459     if( psTable == NULL )
00460         return -1;
00461 
00462 /* -------------------------------------------------------------------- */
00463 /*      Find the requested field.                                       */
00464 /* -------------------------------------------------------------------- */
00465     for( i = 0;
00466          psTable->papszFieldNames != NULL
00467              && psTable->papszFieldNames[i] != NULL;
00468          i++ )
00469     {
00470         if( EQUAL(psTable->papszFieldNames[i],pszFieldName) )
00471         {
00472             return i;
00473         }
00474     }
00475 
00476     return -1;
00477 }
00478 
00479 
00480 /************************************************************************/
00481 /*                         CSVScanFileByName()                          */
00482 /*                                                                      */
00483 /*      Same as CSVScanFile(), but using a field name instead of a      */
00484 /*      field number.                                                   */
00485 /************************************************************************/
00486 
00487 char **CSVScanFileByName( const char * pszFilename,
00488                           const char * pszKeyFieldName,
00489                           const char * pszValue, CSVCompareCriteria eCriteria )
00490 
00491 {
00492     int         iKeyField;
00493 
00494     iKeyField = CSVGetFileFieldId( pszFilename, pszKeyFieldName );
00495     if( iKeyField == -1 )
00496         return NULL;
00497 
00498     return( CSVScanFile( pszFilename, iKeyField, pszValue, eCriteria ) );
00499 }
00500 
00501 /************************************************************************/
00502 /*                            CSVGetField()                             */
00503 /*                                                                      */
00504 /*      The all-in-one function to fetch a particular field value       */
00505 /*      from a CSV file.  Note this function will return an empty       */
00506 /*      string, rather than NULL if it fails to find the desired        */
00507 /*      value for some reason.  The caller can't establish that the     */
00508 /*      fetch failed.                                                   */
00509 /************************************************************************/
00510 
00511 const char *CSVGetField( const char * pszFilename,
00512                          const char * pszKeyFieldName,
00513                          const char * pszKeyFieldValue,
00514                          CSVCompareCriteria eCriteria,
00515                          const char * pszTargetField )
00516 
00517 {
00518     CSVTable    *psTable;
00519     char        **papszRecord;
00520     int         iTargetField;
00521     
00522 /* -------------------------------------------------------------------- */
00523 /*      Find the table.                                                 */
00524 /* -------------------------------------------------------------------- */
00525     psTable = CSVAccess( pszFilename );
00526     if( psTable == NULL )
00527         return "";
00528 
00529 /* -------------------------------------------------------------------- */
00530 /*      Find the correct record.                                        */
00531 /* -------------------------------------------------------------------- */
00532     papszRecord = CSVScanFileByName( pszFilename, pszKeyFieldName,
00533                                      pszKeyFieldValue, eCriteria );
00534 
00535     if( papszRecord == NULL )
00536         return "";
00537 
00538 /* -------------------------------------------------------------------- */
00539 /*      Figure out which field we want out of this.                     */
00540 /* -------------------------------------------------------------------- */
00541     iTargetField = CSVGetFileFieldId( pszFilename, pszTargetField );
00542     if( iTargetField < 0 )
00543         return "";
00544 
00545     if( iTargetField >= CSLCount( papszRecord ) )
00546         return "";
00547 
00548     return( papszRecord[iTargetField] );
00549 }
00550 
00551 /************************************************************************/
00552 /*                            CSVFilename()                             */
00553 /*                                                                      */
00554 /*      Return the full path to a particular CSV file.                  */
00555 /************************************************************************/
00556 
00557 static const char *(*pfnCSVFilenameHook)(const char *) = NULL;
00558 
00559 const char * CSVFilename( const char *pszBasename )
00560 
00561 {
00562     static char         szPath[512];
00563 
00564     if( pfnCSVFilenameHook == NULL )
00565     {
00566         FILE    *fp = NULL;
00567 
00568         if( getenv("GEOTIFF_CSV") != NULL )
00569         {
00570             sprintf( szPath, "%s/%s", getenv("GEOTIFF_CSV"), pszBasename );
00571         }
00572 #ifdef CSV_DATA_DIR
00573         else 
00574         {
00575             sprintf( szPath, "%s/%s", CSV_DATA_DIR, pszBasename );
00576         }
00577 #else
00578         else if( (fp = fopen( "/usr/local/share/epsg/csv/horiz_cs.csv", "rt" )) != NULL )
00579         {
00580             sprintf( szPath, "/usr/local/share/epsg/csv/%s", pszBasename );
00581         }
00582         else if( (fp = fopen( "csv/horiz_cs.csv", "rt" )) != NULL )
00583         {
00584             sprintf( szPath, "csv/%s", pszBasename );
00585         }
00586         else if( (fp = fopen( "share/epsg_csv/horiz_cs.csv", "rt" )) != NULL )
00587         {
00588             sprintf( szPath, "share/epsg_csv/%s", pszBasename );
00589         }
00590         else if( (fp = fopen( "/usr/share/epsg_csv/horiz_cs.csv", "rt" )) != NULL )
00591         {
00592             sprintf( szPath, "/usr/share/epsg_csv/%s", pszBasename );
00593         }
00594         else
00595         {
00596             sprintf( szPath, "/usr/local/share/epsg_csv/%s", pszBasename );
00597         }
00598 #endif
00599 
00600         if( fp != NULL )
00601             fclose( fp );
00602         
00603         return( szPath );
00604     }
00605     else
00606         return( pfnCSVFilenameHook( pszBasename ) );
00607 }
00608 
00609 /************************************************************************/
00610 /*                         SetCSVFilenameHook()                         */
00611 /*                                                                      */
00612 /*      Applications can use this to set a function that will           */
00613 /*      massage CSV filenames.                                          */
00614 /************************************************************************/
00615 
00660 void SetCSVFilenameHook( const char *(*pfnNewHook)( const char * ) )
00661 
00662 {
00663     pfnCSVFilenameHook = pfnNewHook;
00664 }

Generated at Sun Mar 4 23:32:44 2001 for libgeotiff by doxygen1.2.3-20001105 written by Dimitri van Heesch, © 1997-2000