00001 /****************************************************************************** 00002 * $Id: cpl_csv_cpp-source.html,v 1.8 2002/04/16 13:11:47 warmerda Exp $ 00003 * 00004 * Project: CPL - Common Portability Library 00005 * Purpose: CSV (comma separated value) file access. 00006 * Author: Frank Warmerdam, warmerda@home.com 00007 * 00008 ****************************************************************************** 00009 * Copyright (c) 1999, Frank Warmerdam 00010 * 00011 * Permission is hereby granted, free of charge, to any person obtaining a 00012 * copy of this software and associated documentation files (the "Software"), 00013 * to deal in the Software without restriction, including without limitation 00014 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 00015 * and/or sell copies of the Software, and to permit persons to whom the 00016 * Software is furnished to do so, subject to the following conditions: 00017 * 00018 * The above copyright notice and this permission notice shall be included 00019 * in all copies or substantial portions of the Software. 00020 * 00021 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 00022 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00023 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 00024 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 00025 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 00026 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 00027 * DEALINGS IN THE SOFTWARE. 00028 ****************************************************************************** 00029 * 00030 * $Log: cpl_csv_cpp-source.html,v $ 00030 * Revision 1.8 2002/04/16 13:11:47 warmerda 00030 * updated 00030 * 00031 * Revision 1.3 2001/07/18 04:00:49 warmerda 00032 * added CPL_CVSID 00033 * 00034 * Revision 1.2 2001/01/19 21:16:41 warmerda 00035 * expanded tabs 00036 * 00037 * Revision 1.1 2000/10/06 15:20:45 warmerda 00038 * New 00039 * 00040 * Revision 1.2 2000/08/29 21:08:08 warmerda 00041 * fallback to use CPLFindFile() 00042 * 00043 * Revision 1.1 2000/04/05 21:55:59 warmerda 00044 * New 00045 * 00046 */ 00047 00048 #include "cpl_csv.h" 00049 #include "cpl_conv.h" 00050 00051 CPL_CVSID("$Id: cpl_csv_cpp-source.html,v 1.8 2002/04/16 13:11:47 warmerda Exp $"); 00052 00053 /* ==================================================================== */ 00054 /* The CSVTable is a persistant set of info about an open CSV */ 00055 /* table. While it doesn't currently maintain a record index, */ 00056 /* or in-memory copy of the table, it could be changed to do so */ 00057 /* in the future. */ 00058 /* ==================================================================== */ 00059 typedef struct ctb { 00060 FILE *fp; 00061 00062 struct ctb *psNext; 00063 00064 char *pszFilename; 00065 00066 char **papszFieldNames; 00067 00068 char **papszRecFields; 00069 } CSVTable; 00070 00071 static CSVTable *psCSVTableList = NULL; 00072 00073 /************************************************************************/ 00074 /* CSVAccess() */ 00075 /* */ 00076 /* This function will fetch a handle to the requested table. */ 00077 /* If not found in the ``open table list'' the table will be */ 00078 /* opened and added to the list. Eventually this function may */ 00079 /* become public with an abstracted return type so that */ 00080 /* applications can set options about the table. For now this */ 00081 /* isn't done. */ 00082 /************************************************************************/ 00083 00084 static CSVTable *CSVAccess( const char * pszFilename ) 00085 00086 { 00087 CSVTable *psTable; 00088 FILE *fp; 00089 00090 /* -------------------------------------------------------------------- */ 00091 /* Is the table already in the list. */ 00092 /* -------------------------------------------------------------------- */ 00093 for( psTable = psCSVTableList; psTable != NULL; psTable = psTable->psNext ) 00094 { 00095 if( EQUAL(psTable->pszFilename,pszFilename) ) 00096 { 00097 /* 00098 * Eventually we should consider promoting to the front of 00099 * the list to accelerate frequently accessed tables. 00100 */ 00101 00102 return( psTable ); 00103 } 00104 } 00105 00106 /* -------------------------------------------------------------------- */ 00107 /* If not, try to open it. */ 00108 /* -------------------------------------------------------------------- */ 00109 fp = VSIFOpen( pszFilename, "r" ); 00110 if( fp == NULL ) 00111 return NULL; 00112 00113 /* -------------------------------------------------------------------- */ 00114 /* Create an information structure about this table, and add to */ 00115 /* the front of the list. */ 00116 /* -------------------------------------------------------------------- */ 00117 psTable = (CSVTable *) CPLCalloc(sizeof(CSVTable),1); 00118 00119 psTable->fp = fp; 00120 psTable->pszFilename = CPLStrdup( pszFilename ); 00121 psTable->psNext = psCSVTableList; 00122 00123 psCSVTableList = psTable; 00124 00125 /* -------------------------------------------------------------------- */ 00126 /* Read the table header record containing the field names. */ 00127 /* -------------------------------------------------------------------- */ 00128 psTable->papszFieldNames = CSVReadParseLine( fp ); 00129 00130 return( psTable ); 00131 } 00132 00133 /************************************************************************/ 00134 /* CSVDeaccess() */ 00135 /************************************************************************/ 00136 00137 void CSVDeaccess( const char * pszFilename ) 00138 00139 { 00140 CSVTable *psLast, *psTable; 00141 00142 /* -------------------------------------------------------------------- */ 00143 /* A NULL means deaccess all tables. */ 00144 /* -------------------------------------------------------------------- */ 00145 if( pszFilename == NULL ) 00146 { 00147 while( psCSVTableList != NULL ) 00148 CSVDeaccess( psCSVTableList->pszFilename ); 00149 00150 return; 00151 } 00152 00153 /* -------------------------------------------------------------------- */ 00154 /* Find this table. */ 00155 /* -------------------------------------------------------------------- */ 00156 psLast = NULL; 00157 for( psTable = psCSVTableList; 00158 psTable != NULL && !EQUAL(psTable->pszFilename,pszFilename); 00159 psTable = psTable->psNext ) 00160 { 00161 psLast = psTable; 00162 } 00163 00164 if( psTable == NULL ) 00165 return; 00166 00167 /* -------------------------------------------------------------------- */ 00168 /* Remove the link from the list. */ 00169 /* -------------------------------------------------------------------- */ 00170 if( psLast != NULL ) 00171 psLast->psNext = psTable->psNext; 00172 else 00173 psCSVTableList = psTable->psNext; 00174 00175 /* -------------------------------------------------------------------- */ 00176 /* Free the table. */ 00177 /* -------------------------------------------------------------------- */ 00178 VSIFClose( psTable->fp ); 00179 00180 CSLDestroy( psTable->papszFieldNames ); 00181 CSLDestroy( psTable->papszRecFields ); 00182 CPLFree( psTable->pszFilename ); 00183 00184 CPLFree( psTable ); 00185 } 00186 00187 /************************************************************************/ 00188 /* CSVReadParseLine() */ 00189 /* */ 00190 /* Read one line, and return split into fields. The return */ 00191 /* result is a stringlist, in the sense of the CSL functions. */ 00192 /************************************************************************/ 00193 00194 char **CSVReadParseLine( FILE * fp ) 00195 00196 { 00197 const char *pszLine; 00198 char *pszWorkLine; 00199 char **papszReturn; 00200 00201 CPLAssert( fp != NULL ); 00202 if( fp == NULL ) 00203 return( NULL ); 00204 00205 pszLine = CPLReadLine( fp ); 00206 if( pszLine == NULL ) 00207 return( NULL ); 00208 00209 /* -------------------------------------------------------------------- */ 00210 /* If there are no quotes, then this is the simple case. */ 00211 /* Parse, and return tokens. */ 00212 /* -------------------------------------------------------------------- */ 00213 if( strchr(pszLine,'\"') == NULL ) 00214 return CSLTokenizeStringComplex( pszLine, ",", TRUE, TRUE ); 00215 00216 /* -------------------------------------------------------------------- */ 00217 /* We must now count the quotes in our working string, and as */ 00218 /* long as it is odd, keep adding new lines. */ 00219 /* -------------------------------------------------------------------- */ 00220 pszWorkLine = CPLStrdup( pszLine ); 00221 00222 while( TRUE ) 00223 { 00224 int i, nCount = 0; 00225 00226 for( i = 0; pszWorkLine[i] != '\0'; i++ ) 00227 { 00228 if( pszWorkLine[i] == '\"' 00229 && (i == 0 || pszWorkLine[i-1] != '\\') ) 00230 nCount++; 00231 } 00232 00233 if( nCount % 2 == 0 ) 00234 break; 00235 00236 pszLine = CPLReadLine( fp ); 00237 if( pszLine == NULL ) 00238 break; 00239 00240 pszWorkLine = (char *) 00241 CPLRealloc(pszWorkLine, 00242 strlen(pszWorkLine) + strlen(pszLine) + 1); 00243 strcat( pszWorkLine, pszLine ); 00244 } 00245 00246 papszReturn = CSLTokenizeStringComplex( pszWorkLine, ",", TRUE, TRUE ); 00247 00248 CPLFree( pszWorkLine ); 00249 00250 return papszReturn; 00251 } 00252 00253 /************************************************************************/ 00254 /* CSVCompare() */ 00255 /* */ 00256 /* Compare a field to a search value using a particular */ 00257 /* criteria. */ 00258 /************************************************************************/ 00259 00260 static int CSVCompare( const char * pszFieldValue, const char * pszTarget, 00261 CSVCompareCriteria eCriteria ) 00262 00263 { 00264 if( eCriteria == CC_ExactString ) 00265 { 00266 return( strcmp( pszFieldValue, pszTarget ) == 0 ); 00267 } 00268 else if( eCriteria == CC_ApproxString ) 00269 { 00270 return( EQUAL( pszFieldValue, pszTarget ) ); 00271 } 00272 else if( eCriteria == CC_Integer ) 00273 { 00274 return( atoi(pszFieldValue) == atoi(pszTarget) ); 00275 } 00276 00277 return FALSE; 00278 } 00279 00280 /************************************************************************/ 00281 /* CSVScanLines() */ 00282 /* */ 00283 /* Read the file scanline for lines where the key field equals */ 00284 /* the indicated value with the suggested comparison criteria. */ 00285 /* Return the first matching line split into fields. */ 00286 /************************************************************************/ 00287 00288 char **CSVScanLines( FILE *fp, int iKeyField, const char * pszValue, 00289 CSVCompareCriteria eCriteria ) 00290 00291 { 00292 char **papszFields = NULL; 00293 int bSelected = FALSE, nTestValue; 00294 00295 CPLAssert( pszValue != NULL ); 00296 CPLAssert( iKeyField >= 0 ); 00297 CPLAssert( fp != NULL ); 00298 00299 nTestValue = atoi(pszValue); 00300 00301 while( !bSelected ) { 00302 papszFields = CSVReadParseLine( fp ); 00303 if( papszFields == NULL ) 00304 return( NULL ); 00305 00306 if( CSLCount( papszFields ) < iKeyField+1 ) 00307 { 00308 /* not selected */ 00309 } 00310 else if( eCriteria == CC_Integer 00311 && atoi(papszFields[iKeyField]) == nTestValue ) 00312 { 00313 bSelected = TRUE; 00314 } 00315 else 00316 { 00317 bSelected = CSVCompare( papszFields[iKeyField], pszValue, 00318 eCriteria ); 00319 } 00320 00321 if( !bSelected ) 00322 { 00323 CSLDestroy( papszFields ); 00324 papszFields = NULL; 00325 } 00326 } 00327 00328 return( papszFields ); 00329 } 00330 00331 /************************************************************************/ 00332 /* CSVScanFile() */ 00333 /* */ 00334 /* Scan a whole file using criteria similar to above, but also */ 00335 /* taking care of file opening and closing. */ 00336 /************************************************************************/ 00337 00338 char **CSVScanFile( const char * pszFilename, int iKeyField, 00339 const char * pszValue, CSVCompareCriteria eCriteria ) 00340 00341 { 00342 CSVTable *psTable; 00343 00344 /* -------------------------------------------------------------------- */ 00345 /* Get access to the table. */ 00346 /* -------------------------------------------------------------------- */ 00347 CPLAssert( pszFilename != NULL ); 00348 00349 if( iKeyField < 0 ) 00350 return NULL; 00351 00352 psTable = CSVAccess( pszFilename ); 00353 if( psTable == NULL ) 00354 return NULL; 00355 00356 /* -------------------------------------------------------------------- */ 00357 /* Does the current record match the criteria? If so, just */ 00358 /* return it again. */ 00359 /* -------------------------------------------------------------------- */ 00360 if( iKeyField >= 0 00361 && iKeyField < CSLCount(psTable->papszRecFields) 00362 && CSVCompare(pszValue,psTable->papszRecFields[iKeyField],eCriteria) ) 00363 { 00364 return psTable->papszRecFields; 00365 } 00366 00367 /* -------------------------------------------------------------------- */ 00368 /* Scan the file from the beginning, replacing the ``current */ 00369 /* record'' in our structure with the one that is found. */ 00370 /* -------------------------------------------------------------------- */ 00371 VSIRewind( psTable->fp ); 00372 CPLReadLine( psTable->fp ); /* throw away the header line */ 00373 00374 CSLDestroy( psTable->papszRecFields ); 00375 psTable->papszRecFields = 00376 CSVScanLines( psTable->fp, iKeyField, pszValue, eCriteria ); 00377 00378 return( psTable->papszRecFields ); 00379 } 00380 00381 /************************************************************************/ 00382 /* CPLGetFieldId() */ 00383 /* */ 00384 /* Read the first record of a CSV file (rewinding to be sure), */ 00385 /* and find the field with the indicated name. Returns -1 if */ 00386 /* it fails to find the field name. Comparison is case */ 00387 /* insensitive, but otherwise exact. After this function has */ 00388 /* been called the file pointer will be positioned just after */ 00389 /* the first record. */ 00390 /************************************************************************/ 00391 00392 int CSVGetFieldId( FILE * fp, const char * pszFieldName ) 00393 00394 { 00395 char **papszFields; 00396 int i; 00397 00398 CPLAssert( fp != NULL && pszFieldName != NULL ); 00399 00400 VSIRewind( fp ); 00401 00402 papszFields = CSVReadParseLine( fp ); 00403 for( i = 0; papszFields != NULL && papszFields[i] != NULL; i++ ) 00404 { 00405 if( EQUAL(papszFields[i],pszFieldName) ) 00406 { 00407 CSLDestroy( papszFields ); 00408 return i; 00409 } 00410 } 00411 00412 CSLDestroy( papszFields ); 00413 00414 return -1; 00415 } 00416 00417 /************************************************************************/ 00418 /* CSVGetFileFieldId() */ 00419 /* */ 00420 /* Same as CPLGetFieldId(), except that we get the file based */ 00421 /* on filename, rather than having an existing handle. */ 00422 /************************************************************************/ 00423 00424 int CSVGetFileFieldId( const char * pszFilename, const char * pszFieldName ) 00425 00426 { 00427 CSVTable *psTable; 00428 int i; 00429 00430 /* -------------------------------------------------------------------- */ 00431 /* Get access to the table. */ 00432 /* -------------------------------------------------------------------- */ 00433 CPLAssert( pszFilename != NULL ); 00434 00435 psTable = CSVAccess( pszFilename ); 00436 if( psTable == NULL ) 00437 return -1; 00438 00439 /* -------------------------------------------------------------------- */ 00440 /* Find the requested field. */ 00441 /* -------------------------------------------------------------------- */ 00442 for( i = 0; 00443 psTable->papszFieldNames != NULL 00444 && psTable->papszFieldNames[i] != NULL; 00445 i++ ) 00446 { 00447 if( EQUAL(psTable->papszFieldNames[i],pszFieldName) ) 00448 { 00449 return i; 00450 } 00451 } 00452 00453 return -1; 00454 } 00455 00456 00457 /************************************************************************/ 00458 /* CSVScanFileByName() */ 00459 /* */ 00460 /* Same as CSVScanFile(), but using a field name instead of a */ 00461 /* field number. */ 00462 /************************************************************************/ 00463 00464 char **CSVScanFileByName( const char * pszFilename, 00465 const char * pszKeyFieldName, 00466 const char * pszValue, CSVCompareCriteria eCriteria ) 00467 00468 { 00469 int iKeyField; 00470 00471 iKeyField = CSVGetFileFieldId( pszFilename, pszKeyFieldName ); 00472 if( iKeyField == -1 ) 00473 return NULL; 00474 00475 return( CSVScanFile( pszFilename, iKeyField, pszValue, eCriteria ) ); 00476 } 00477 00478 /************************************************************************/ 00479 /* CSVGetField() */ 00480 /* */ 00481 /* The all-in-one function to fetch a particular field value */ 00482 /* from a CSV file. Note this function will return an empty */ 00483 /* string, rather than NULL if it fails to find the desired */ 00484 /* value for some reason. The caller can't establish that the */ 00485 /* fetch failed. */ 00486 /************************************************************************/ 00487 00488 const char *CSVGetField( const char * pszFilename, 00489 const char * pszKeyFieldName, 00490 const char * pszKeyFieldValue, 00491 CSVCompareCriteria eCriteria, 00492 const char * pszTargetField ) 00493 00494 { 00495 CSVTable *psTable; 00496 char **papszRecord; 00497 int iTargetField; 00498 00499 /* -------------------------------------------------------------------- */ 00500 /* Find the table. */ 00501 /* -------------------------------------------------------------------- */ 00502 psTable = CSVAccess( pszFilename ); 00503 if( psTable == NULL ) 00504 return ""; 00505 00506 /* -------------------------------------------------------------------- */ 00507 /* Find the correct record. */ 00508 /* -------------------------------------------------------------------- */ 00509 papszRecord = CSVScanFileByName( pszFilename, pszKeyFieldName, 00510 pszKeyFieldValue, eCriteria ); 00511 00512 if( papszRecord == NULL ) 00513 return ""; 00514 00515 /* -------------------------------------------------------------------- */ 00516 /* Figure out which field we want out of this. */ 00517 /* -------------------------------------------------------------------- */ 00518 iTargetField = CSVGetFileFieldId( pszFilename, pszTargetField ); 00519 if( iTargetField < 0 ) 00520 return ""; 00521 00522 if( iTargetField >= CSLCount( papszRecord ) ) 00523 return ""; 00524 00525 return( papszRecord[iTargetField] ); 00526 } 00527 00528 /************************************************************************/ 00529 /* CSVFilename() */ 00530 /* */ 00531 /* Return the full path to a particular CSV file. This will */ 00532 /* eventually be something the application can override. */ 00533 /************************************************************************/ 00534 00535 static const char *(*pfnCSVFilenameHook)(const char *) = NULL; 00536 00537 const char * CSVFilename( const char *pszBasename ) 00538 00539 { 00540 static char szPath[512]; 00541 00542 if( pfnCSVFilenameHook == NULL ) 00543 { 00544 FILE *fp = NULL; 00545 const char *pszResult = CPLFindFile( "epsg_csv", pszBasename ); 00546 00547 if( pszResult != NULL ) 00548 return pszResult; 00549 00550 if( getenv("GEOTIFF_CSV") != NULL ) 00551 { 00552 sprintf( szPath, "%s/%s", getenv("GEOTIFF_CSV"), pszBasename ); 00553 } 00554 else if( (fp = fopen( "csv/horiz_cs.csv", "rt" )) != NULL ) 00555 { 00556 sprintf( szPath, "csv/%s", pszBasename ); 00557 } 00558 else 00559 { 00560 sprintf( szPath, "/usr/local/share/epsg_csv/%s", pszBasename ); 00561 } 00562 00563 if( fp != NULL ) 00564 fclose( fp ); 00565 00566 return( szPath ); 00567 } 00568 else 00569 return( pfnCSVFilenameHook( pszBasename ) ); 00570 } 00571 00572 /************************************************************************/ 00573 /* SetCSVFilenameHook() */ 00574 /* */ 00575 /* Applications can use this to set a function that will */ 00576 /* massage CSV filenames. */ 00577 /************************************************************************/ 00578 00623 void SetCSVFilenameHook( const char *(*pfnNewHook)( const char * ) ) 00624 00625 { 00626 pfnCSVFilenameHook = pfnNewHook; 00627 }