Main Page   Class Hierarchy   Compound List   File List   Compound Members   File Members   Related Pages  

cpl_csv.cpp

00001 /******************************************************************************
00002  * $Id: cpl_csv_cpp-source.html,v 1.8 2002/04/16 13:11:47 warmerda Exp $
00003  *
00004  * Project:  CPL - Common Portability Library
00005  * Purpose:  CSV (comma separated value) file access.
00006  * Author:   Frank Warmerdam, warmerda@home.com
00007  *
00008  ******************************************************************************
00009  * Copyright (c) 1999, Frank Warmerdam
00010  *
00011  * Permission is hereby granted, free of charge, to any person obtaining a
00012  * copy of this software and associated documentation files (the "Software"),
00013  * to deal in the Software without restriction, including without limitation
00014  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
00015  * and/or sell copies of the Software, and to permit persons to whom the
00016  * Software is furnished to do so, subject to the following conditions:
00017  *
00018  * The above copyright notice and this permission notice shall be included
00019  * in all copies or substantial portions of the Software.
00020  *
00021  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00022  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00023  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
00024  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00025  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
00026  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
00027  * DEALINGS IN THE SOFTWARE.
00028  ******************************************************************************
00029  *
00030  * $Log: cpl_csv_cpp-source.html,v $
00030  * Revision 1.8  2002/04/16 13:11:47  warmerda
00030  * updated
00030  *
00031  * Revision 1.3  2001/07/18 04:00:49  warmerda
00032  * added CPL_CVSID
00033  *
00034  * Revision 1.2  2001/01/19 21:16:41  warmerda
00035  * expanded tabs
00036  *
00037  * Revision 1.1  2000/10/06 15:20:45  warmerda
00038  * New
00039  *
00040  * Revision 1.2  2000/08/29 21:08:08  warmerda
00041  * fallback to use CPLFindFile()
00042  *
00043  * Revision 1.1  2000/04/05 21:55:59  warmerda
00044  * New
00045  *
00046  */
00047 
00048 #include "cpl_csv.h"
00049 #include "cpl_conv.h"
00050 
00051 CPL_CVSID("$Id: cpl_csv_cpp-source.html,v 1.8 2002/04/16 13:11:47 warmerda Exp $");
00052 
00053 /* ==================================================================== */
00054 /*      The CSVTable is a persistant set of info about an open CSV      */
00055 /*      table.  While it doesn't currently maintain a record index,     */
00056 /*      or in-memory copy of the table, it could be changed to do so    */
00057 /*      in the future.                                                  */
00058 /* ==================================================================== */
00059 typedef struct ctb {
00060     FILE        *fp;
00061 
00062     struct ctb *psNext;
00063 
00064     char        *pszFilename;
00065 
00066     char        **papszFieldNames;
00067 
00068     char        **papszRecFields;
00069 } CSVTable;
00070 
00071 static CSVTable *psCSVTableList = NULL;
00072 
00073 /************************************************************************/
00074 /*                             CSVAccess()                              */
00075 /*                                                                      */
00076 /*      This function will fetch a handle to the requested table.       */
00077 /*      If not found in the ``open table list'' the table will be       */
00078 /*      opened and added to the list.  Eventually this function may     */
00079 /*      become public with an abstracted return type so that            */
00080 /*      applications can set options about the table.  For now this     */
00081 /*      isn't done.                                                     */
00082 /************************************************************************/
00083 
00084 static CSVTable *CSVAccess( const char * pszFilename )
00085 
00086 {
00087     CSVTable    *psTable;
00088     FILE        *fp;
00089 
00090 /* -------------------------------------------------------------------- */
00091 /*      Is the table already in the list.                               */
00092 /* -------------------------------------------------------------------- */
00093     for( psTable = psCSVTableList; psTable != NULL; psTable = psTable->psNext )
00094     {
00095         if( EQUAL(psTable->pszFilename,pszFilename) )
00096         {
00097             /*
00098              * Eventually we should consider promoting to the front of
00099              * the list to accelerate frequently accessed tables.
00100              */
00101             
00102             return( psTable );
00103         }
00104     }
00105 
00106 /* -------------------------------------------------------------------- */
00107 /*      If not, try to open it.                                         */
00108 /* -------------------------------------------------------------------- */
00109     fp = VSIFOpen( pszFilename, "r" );
00110     if( fp == NULL )
00111         return NULL;
00112 
00113 /* -------------------------------------------------------------------- */
00114 /*      Create an information structure about this table, and add to    */
00115 /*      the front of the list.                                          */
00116 /* -------------------------------------------------------------------- */
00117     psTable = (CSVTable *) CPLCalloc(sizeof(CSVTable),1);
00118 
00119     psTable->fp = fp;
00120     psTable->pszFilename = CPLStrdup( pszFilename );
00121     psTable->psNext = psCSVTableList;
00122     
00123     psCSVTableList = psTable;
00124 
00125 /* -------------------------------------------------------------------- */
00126 /*      Read the table header record containing the field names.        */
00127 /* -------------------------------------------------------------------- */
00128     psTable->papszFieldNames = CSVReadParseLine( fp );
00129 
00130     return( psTable );
00131 }
00132 
00133 /************************************************************************/
00134 /*                            CSVDeaccess()                             */
00135 /************************************************************************/
00136 
00137 void CSVDeaccess( const char * pszFilename )
00138 
00139 {
00140     CSVTable    *psLast, *psTable;
00141     
00142 /* -------------------------------------------------------------------- */
00143 /*      A NULL means deaccess all tables.                               */
00144 /* -------------------------------------------------------------------- */
00145     if( pszFilename == NULL )
00146     {
00147         while( psCSVTableList != NULL )
00148             CSVDeaccess( psCSVTableList->pszFilename );
00149         
00150         return;
00151     }
00152 
00153 /* -------------------------------------------------------------------- */
00154 /*      Find this table.                                                */
00155 /* -------------------------------------------------------------------- */
00156     psLast = NULL;
00157     for( psTable = psCSVTableList;
00158          psTable != NULL && !EQUAL(psTable->pszFilename,pszFilename);
00159          psTable = psTable->psNext )
00160     {
00161         psLast = psTable;
00162     }
00163 
00164     if( psTable == NULL )
00165         return;
00166 
00167 /* -------------------------------------------------------------------- */
00168 /*      Remove the link from the list.                                  */
00169 /* -------------------------------------------------------------------- */
00170     if( psLast != NULL )
00171         psLast->psNext = psTable->psNext;
00172     else
00173         psCSVTableList = psTable->psNext;
00174 
00175 /* -------------------------------------------------------------------- */
00176 /*      Free the table.                                                 */
00177 /* -------------------------------------------------------------------- */
00178     VSIFClose( psTable->fp );
00179 
00180     CSLDestroy( psTable->papszFieldNames );
00181     CSLDestroy( psTable->papszRecFields );
00182     CPLFree( psTable->pszFilename );
00183 
00184     CPLFree( psTable );
00185 }
00186 
00187 /************************************************************************/
00188 /*                          CSVReadParseLine()                          */
00189 /*                                                                      */
00190 /*      Read one line, and return split into fields.  The return        */
00191 /*      result is a stringlist, in the sense of the CSL functions.      */
00192 /************************************************************************/
00193 
00194 char **CSVReadParseLine( FILE * fp )
00195 
00196 {
00197     const char  *pszLine;
00198     char        *pszWorkLine;
00199     char        **papszReturn;
00200 
00201     CPLAssert( fp != NULL );
00202     if( fp == NULL )
00203         return( NULL );
00204     
00205     pszLine = CPLReadLine( fp );
00206     if( pszLine == NULL )
00207         return( NULL );
00208 
00209 /* -------------------------------------------------------------------- */
00210 /*      If there are no quotes, then this is the simple case.           */
00211 /*      Parse, and return tokens.                                       */
00212 /* -------------------------------------------------------------------- */
00213     if( strchr(pszLine,'\"') == NULL )
00214         return CSLTokenizeStringComplex( pszLine, ",", TRUE, TRUE );
00215 
00216 /* -------------------------------------------------------------------- */
00217 /*      We must now count the quotes in our working string, and as      */
00218 /*      long as it is odd, keep adding new lines.                       */
00219 /* -------------------------------------------------------------------- */
00220     pszWorkLine = CPLStrdup( pszLine );
00221 
00222     while( TRUE )
00223     {
00224         int             i, nCount = 0;
00225 
00226         for( i = 0; pszWorkLine[i] != '\0'; i++ )
00227         {
00228             if( pszWorkLine[i] == '\"'
00229                 && (i == 0 || pszWorkLine[i-1] != '\\') )
00230                 nCount++;
00231         }
00232 
00233         if( nCount % 2 == 0 )
00234             break;
00235 
00236         pszLine = CPLReadLine( fp );
00237         if( pszLine == NULL )
00238             break;
00239 
00240         pszWorkLine = (char *)
00241             CPLRealloc(pszWorkLine,
00242                        strlen(pszWorkLine) + strlen(pszLine) + 1);
00243         strcat( pszWorkLine, pszLine );
00244     }
00245     
00246     papszReturn = CSLTokenizeStringComplex( pszWorkLine, ",", TRUE, TRUE );
00247 
00248     CPLFree( pszWorkLine );
00249 
00250     return papszReturn;
00251 }
00252 
00253 /************************************************************************/
00254 /*                             CSVCompare()                             */
00255 /*                                                                      */
00256 /*      Compare a field to a search value using a particular            */
00257 /*      criteria.                                                       */
00258 /************************************************************************/
00259 
00260 static int CSVCompare( const char * pszFieldValue, const char * pszTarget,
00261                        CSVCompareCriteria eCriteria )
00262 
00263 {
00264     if( eCriteria == CC_ExactString )
00265     {
00266         return( strcmp( pszFieldValue, pszTarget ) == 0 );
00267     }
00268     else if( eCriteria == CC_ApproxString )
00269     {
00270         return( EQUAL( pszFieldValue, pszTarget ) );
00271     }
00272     else if( eCriteria == CC_Integer )
00273     {
00274         return( atoi(pszFieldValue) == atoi(pszTarget) );
00275     }
00276 
00277     return FALSE;
00278 }
00279 
00280 /************************************************************************/
00281 /*                            CSVScanLines()                            */
00282 /*                                                                      */
00283 /*      Read the file scanline for lines where the key field equals     */
00284 /*      the indicated value with the suggested comparison criteria.     */
00285 /*      Return the first matching line split into fields.               */
00286 /************************************************************************/
00287 
00288 char **CSVScanLines( FILE *fp, int iKeyField, const char * pszValue,
00289                      CSVCompareCriteria eCriteria )
00290 
00291 {
00292     char        **papszFields = NULL;
00293     int         bSelected = FALSE, nTestValue;
00294 
00295     CPLAssert( pszValue != NULL );
00296     CPLAssert( iKeyField >= 0 );
00297     CPLAssert( fp != NULL );
00298     
00299     nTestValue = atoi(pszValue);
00300     
00301     while( !bSelected ) {
00302         papszFields = CSVReadParseLine( fp );
00303         if( papszFields == NULL )
00304             return( NULL );
00305 
00306         if( CSLCount( papszFields ) < iKeyField+1 )
00307         {
00308             /* not selected */
00309         }
00310         else if( eCriteria == CC_Integer
00311                  && atoi(papszFields[iKeyField]) == nTestValue )
00312         {
00313             bSelected = TRUE;
00314         }
00315         else
00316         {
00317             bSelected = CSVCompare( papszFields[iKeyField], pszValue,
00318                                     eCriteria );
00319         }
00320 
00321         if( !bSelected )
00322         {
00323             CSLDestroy( papszFields );
00324             papszFields = NULL;
00325         }
00326     }
00327     
00328     return( papszFields );
00329 }
00330 
00331 /************************************************************************/
00332 /*                            CSVScanFile()                             */
00333 /*                                                                      */
00334 /*      Scan a whole file using criteria similar to above, but also     */
00335 /*      taking care of file opening and closing.                        */
00336 /************************************************************************/
00337 
00338 char **CSVScanFile( const char * pszFilename, int iKeyField,
00339                     const char * pszValue, CSVCompareCriteria eCriteria )
00340 
00341 {
00342     CSVTable    *psTable;
00343 
00344 /* -------------------------------------------------------------------- */
00345 /*      Get access to the table.                                        */
00346 /* -------------------------------------------------------------------- */
00347     CPLAssert( pszFilename != NULL );
00348 
00349     if( iKeyField < 0 )
00350         return NULL;
00351 
00352     psTable = CSVAccess( pszFilename );
00353     if( psTable == NULL )
00354         return NULL;
00355 
00356 /* -------------------------------------------------------------------- */
00357 /*      Does the current record match the criteria?  If so, just        */
00358 /*      return it again.                                                */
00359 /* -------------------------------------------------------------------- */
00360     if( iKeyField >= 0
00361         && iKeyField < CSLCount(psTable->papszRecFields)
00362         && CSVCompare(pszValue,psTable->papszRecFields[iKeyField],eCriteria) )
00363     {
00364         return psTable->papszRecFields;
00365     }
00366 
00367 /* -------------------------------------------------------------------- */
00368 /*      Scan the file from the beginning, replacing the ``current       */
00369 /*      record'' in our structure with the one that is found.           */
00370 /* -------------------------------------------------------------------- */
00371     VSIRewind( psTable->fp );
00372     CPLReadLine( psTable->fp );         /* throw away the header line */
00373     
00374     CSLDestroy( psTable->papszRecFields );
00375     psTable->papszRecFields =
00376         CSVScanLines( psTable->fp, iKeyField, pszValue, eCriteria );
00377 
00378     return( psTable->papszRecFields );
00379 }
00380 
00381 /************************************************************************/
00382 /*                           CPLGetFieldId()                            */
00383 /*                                                                      */
00384 /*      Read the first record of a CSV file (rewinding to be sure),     */
00385 /*      and find the field with the indicated name.  Returns -1 if      */
00386 /*      it fails to find the field name.  Comparison is case            */
00387 /*      insensitive, but otherwise exact.  After this function has      */
00388 /*      been called the file pointer will be positioned just after      */
00389 /*      the first record.                                               */
00390 /************************************************************************/
00391 
00392 int CSVGetFieldId( FILE * fp, const char * pszFieldName )
00393 
00394 {
00395     char        **papszFields;
00396     int         i;
00397     
00398     CPLAssert( fp != NULL && pszFieldName != NULL );
00399 
00400     VSIRewind( fp );
00401 
00402     papszFields = CSVReadParseLine( fp );
00403     for( i = 0; papszFields != NULL && papszFields[i] != NULL; i++ )
00404     {
00405         if( EQUAL(papszFields[i],pszFieldName) )
00406         {
00407             CSLDestroy( papszFields );
00408             return i;
00409         }
00410     }
00411 
00412     CSLDestroy( papszFields );
00413 
00414     return -1;
00415 }
00416 
00417 /************************************************************************/
00418 /*                         CSVGetFileFieldId()                          */
00419 /*                                                                      */
00420 /*      Same as CPLGetFieldId(), except that we get the file based      */
00421 /*      on filename, rather than having an existing handle.             */
00422 /************************************************************************/
00423 
00424 int CSVGetFileFieldId( const char * pszFilename, const char * pszFieldName )
00425 
00426 {
00427     CSVTable    *psTable;
00428     int         i;
00429     
00430 /* -------------------------------------------------------------------- */
00431 /*      Get access to the table.                                        */
00432 /* -------------------------------------------------------------------- */
00433     CPLAssert( pszFilename != NULL );
00434 
00435     psTable = CSVAccess( pszFilename );
00436     if( psTable == NULL )
00437         return -1;
00438 
00439 /* -------------------------------------------------------------------- */
00440 /*      Find the requested field.                                       */
00441 /* -------------------------------------------------------------------- */
00442     for( i = 0;
00443          psTable->papszFieldNames != NULL
00444              && psTable->papszFieldNames[i] != NULL;
00445          i++ )
00446     {
00447         if( EQUAL(psTable->papszFieldNames[i],pszFieldName) )
00448         {
00449             return i;
00450         }
00451     }
00452 
00453     return -1;
00454 }
00455 
00456 
00457 /************************************************************************/
00458 /*                         CSVScanFileByName()                          */
00459 /*                                                                      */
00460 /*      Same as CSVScanFile(), but using a field name instead of a      */
00461 /*      field number.                                                   */
00462 /************************************************************************/
00463 
00464 char **CSVScanFileByName( const char * pszFilename,
00465                           const char * pszKeyFieldName,
00466                           const char * pszValue, CSVCompareCriteria eCriteria )
00467 
00468 {
00469     int         iKeyField;
00470 
00471     iKeyField = CSVGetFileFieldId( pszFilename, pszKeyFieldName );
00472     if( iKeyField == -1 )
00473         return NULL;
00474 
00475     return( CSVScanFile( pszFilename, iKeyField, pszValue, eCriteria ) );
00476 }
00477 
00478 /************************************************************************/
00479 /*                            CSVGetField()                             */
00480 /*                                                                      */
00481 /*      The all-in-one function to fetch a particular field value       */
00482 /*      from a CSV file.  Note this function will return an empty       */
00483 /*      string, rather than NULL if it fails to find the desired        */
00484 /*      value for some reason.  The caller can't establish that the     */
00485 /*      fetch failed.                                                   */
00486 /************************************************************************/
00487 
00488 const char *CSVGetField( const char * pszFilename,
00489                          const char * pszKeyFieldName,
00490                          const char * pszKeyFieldValue,
00491                          CSVCompareCriteria eCriteria,
00492                          const char * pszTargetField )
00493 
00494 {
00495     CSVTable    *psTable;
00496     char        **papszRecord;
00497     int         iTargetField;
00498     
00499 /* -------------------------------------------------------------------- */
00500 /*      Find the table.                                                 */
00501 /* -------------------------------------------------------------------- */
00502     psTable = CSVAccess( pszFilename );
00503     if( psTable == NULL )
00504         return "";
00505 
00506 /* -------------------------------------------------------------------- */
00507 /*      Find the correct record.                                        */
00508 /* -------------------------------------------------------------------- */
00509     papszRecord = CSVScanFileByName( pszFilename, pszKeyFieldName,
00510                                      pszKeyFieldValue, eCriteria );
00511 
00512     if( papszRecord == NULL )
00513         return "";
00514 
00515 /* -------------------------------------------------------------------- */
00516 /*      Figure out which field we want out of this.                     */
00517 /* -------------------------------------------------------------------- */
00518     iTargetField = CSVGetFileFieldId( pszFilename, pszTargetField );
00519     if( iTargetField < 0 )
00520         return "";
00521 
00522     if( iTargetField >= CSLCount( papszRecord ) )
00523         return "";
00524 
00525     return( papszRecord[iTargetField] );
00526 }
00527 
00528 /************************************************************************/
00529 /*                            CSVFilename()                             */
00530 /*                                                                      */
00531 /*      Return the full path to a particular CSV file.  This will       */
00532 /*      eventually be something the application can override.           */
00533 /************************************************************************/
00534 
00535 static const char *(*pfnCSVFilenameHook)(const char *) = NULL;
00536 
00537 const char * CSVFilename( const char *pszBasename )
00538 
00539 {
00540     static char         szPath[512];
00541 
00542     if( pfnCSVFilenameHook == NULL )
00543     {
00544         FILE    *fp = NULL;
00545         const char *pszResult = CPLFindFile( "epsg_csv", pszBasename );
00546 
00547         if( pszResult != NULL )
00548             return pszResult;
00549 
00550         if( getenv("GEOTIFF_CSV") != NULL )
00551         {
00552             sprintf( szPath, "%s/%s", getenv("GEOTIFF_CSV"), pszBasename );
00553         }
00554         else if( (fp = fopen( "csv/horiz_cs.csv", "rt" )) != NULL )
00555         {
00556             sprintf( szPath, "csv/%s", pszBasename );
00557         }
00558         else
00559         {
00560             sprintf( szPath, "/usr/local/share/epsg_csv/%s", pszBasename );
00561         }
00562 
00563         if( fp != NULL )
00564             fclose( fp );
00565         
00566         return( szPath );
00567     }
00568     else
00569         return( pfnCSVFilenameHook( pszBasename ) );
00570 }
00571 
00572 /************************************************************************/
00573 /*                         SetCSVFilenameHook()                         */
00574 /*                                                                      */
00575 /*      Applications can use this to set a function that will           */
00576 /*      massage CSV filenames.                                          */
00577 /************************************************************************/
00578 
00623 void SetCSVFilenameHook( const char *(*pfnNewHook)( const char * ) )
00624 
00625 {
00626     pfnCSVFilenameHook = pfnNewHook;
00627 }

Generated at Thu Mar 28 09:47:27 2002 for GDAL by doxygen1.2.3-20001105 written by Dimitri van Heesch, © 1997-2000