stringUtils.cpp

Go to the documentation of this file.
00001 /**
00002  *  @file stringUtils.cpp
00003  * Contains definitions for string manipulation functions
00004  *       within Cantera.
00005  */
00006 
00007 /*
00008  * $Id: stringUtils.cpp 368 2010-01-04 00:46:26Z hkmoffa $
00009  */
00010 
00011 // Copyright 2001  California Institute of Technology
00012 
00013 //@{
00014 #ifdef WIN32
00015 #pragma warning(disable:4786)
00016 #pragma warning(disable:4503)
00017 #pragma warning(disable:4996)
00018 #define SNPRINTF _snprintf
00019 #else
00020 #define SNPRINTF snprintf
00021 #endif
00022 //@}
00023 #include "ct_defs.h"
00024 #include "stringUtils.h"
00025 #include "ctexceptions.h"
00026 #include "global.h"
00027 #include "ctml.h"
00028 
00029 #include <string>
00030 
00031 #include <cstdio>
00032 #include <cstring>
00033 #include <cstdlib>
00034 #include <cctype>
00035 
00036 namespace Cantera {
00037 
00038 
00039   // Convert a double into a c++ string
00040   /*
00041    *  This routine doesn't assume a formatting. You
00042    *  must supply the formatting
00043    *
00044    * @param x double to be converted
00045    * @param fmt   Format to be used (printf style)
00046    */
00047   std::string fp2str(const double x, const std::string &fmt) {
00048     char buf[64];
00049     int n = SNPRINTF(buf, 63, fmt.c_str(), x);
00050     if (n > 0) {
00051       buf[63] = '\0';
00052       return std::string(buf);
00053     } 
00054     return std::string(" ");
00055   }
00056   std::string fp2str(const double x) {
00057     char buf[64];
00058     int n = SNPRINTF(buf, 64, "%g" , x);
00059     if (n > 0) {
00060       buf[29] = '\0';
00061       return std::string(buf);
00062     } 
00063     return std::string(" ");
00064   }
00065 
00066   /*
00067    * Convert an integer number to a std::string using sprintf.
00068    */
00069   std::string int2str(const int n, const std::string &fmt) {
00070     char buf[30];
00071     int m = SNPRINTF(buf, 30, fmt.c_str(), n);
00072     if (m > 0) {
00073       buf[29] = '\0';
00074       return std::string(buf);
00075     }
00076     return std::string(" ");
00077   }
00078 
00079   //  Convert an int to a string 
00080   /*
00081    *  @param n          int to be converted
00082    */
00083   std::string int2str(const int n) {
00084     char buf[30];
00085     int m = SNPRINTF(buf, 30, "%d", n);
00086     if (m > 0) {
00087       buf[29] = '\0';
00088       return std::string(buf);
00089     }
00090     return std::string(" ");
00091   }
00092 
00093   std::string lowercase(const std::string &s) {
00094     int n = static_cast<int>(s.size());
00095     std::string lc(s);
00096     for (int i = 0; i < n; i++) lc[i] = tolower(s[i]);
00097     return lc;
00098   }
00099 
00100   //! Return the position of the first printable
00101   //! character in the string
00102   /*!
00103    *    @param  s    input string
00104    *    @return      Returns an int representing the first 
00105    *                 printable string. If none returns the
00106    *                 size of the string.
00107    */
00108   static int firstChar(const std::string &s) {
00109     int i;
00110     int n = static_cast<int>(s.size());
00111     for (i = 0; i < n; i++) {
00112       if (s[i] != ' ' && isprint(s[i])) break;
00113     }
00114     return i;
00115   }
00116 
00117   //! Return the position of the last printable
00118   //! character in the string
00119   /*!
00120    *    @param  s    input string
00121    *    @return      Returns an int representing the first 
00122    *                 printable string. If none returns 
00123    *                 -1.
00124    */
00125   static int lastChar(const std::string &s) {
00126     int i;
00127     int n = static_cast<int>(s.size());
00128     for (i = n-1; i >= 0; i--) 
00129       if (s[i] != ' ' && isprint(s[i])) break;
00130     return i;
00131   }
00132 
00133   // Strip the leading and trailing white space
00134   // from a string
00135   /*
00136    *  The command isprint() is used to determine printable
00137    *  characters.
00138    *
00139    *    @param   s       Input string
00140    *    @return  Returns a copy of the string, stripped
00141    *             of leading and trailing white space
00142    */
00143   std::string stripws(const std::string &s) {
00144     int ifirst = firstChar(s);
00145     int ilast = lastChar(s);
00146     return s.substr(ifirst, ilast - ifirst + 1); 
00147   }
00148 
00149   // Strip non-printing characters wherever they are
00150   /*
00151    *   @param s        Input string
00152    *   @return         Returns a copy of the string,
00153    *                   stripped of all non-printing characters.
00154    */
00155   std::string stripnonprint(const std::string &s) {
00156     int i;
00157     int n = static_cast<int>(s.size());
00158     std::string ss = "";
00159     for (i = 0; i < n; i++) {
00160       if (isprint(s[i])) {
00161         ss += s[i];
00162       }
00163     }
00164     return ss;
00165   }
00166 
00167             
00168   // Parse a composition string into a map consisting of individual key:composition
00169   // pairs.
00170   /*
00171    *  The composition is a double.
00172    * Example
00173    *
00174    *  Input is
00175    *
00176    *    "fire:0   ice:1   snow:2"
00177    *
00178    *  Output is
00179    *             x["fire"] = 0
00180    *             x["ice"]  = 1
00181    *             x["snow"] = 2
00182    *
00183    *     @param ss   original string consisting of multiple key:composition
00184    *                 pairs on multiple lines
00185    *     @param x    Output map consisting of a composition
00186    *                 map, which is a string to double map
00187    */
00188   void parseCompString(const std::string &ss, Cantera::compositionMap& x) {
00189     std::string s = ss;
00190     std::string::size_type icolon, ibegin, iend;
00191     std::string name, num, nm;
00192     do {
00193       ibegin = s.find_first_not_of(", ;\n\t");
00194       if (ibegin != std::string::npos) {
00195         s = s.substr(ibegin,s.size());
00196         icolon = s.find(':');
00197         iend = s.find_first_of(", ;\n\t");
00198         //icomma = s.find(',');
00199         if (icolon != std::string::npos) {
00200           name = s.substr(0, icolon);
00201           if (iend != std::string::npos) {
00202             num = s.substr(icolon+1, iend-icolon);
00203             s = s.substr(iend+1, s.size());
00204           }
00205           else {
00206             num = s.substr(icolon+1, s.size());
00207             s = "";
00208           }
00209           nm = stripws(name);
00210           if (x.find(nm) == x.end()) {
00211             throw CanteraError("parseCompString",
00212                                "unknown species " + nm);
00213           }
00214           x[nm] = atof(num.c_str());
00215         }
00216         else s = "";
00217       }
00218     }
00219     while (s != "");
00220   }
00221             
00222   //   Parse a composition string into individual key:composition
00223   //   pairs
00224   /*
00225    *
00226    *     @param ss   original string consisting of multiple key:composition
00227    *                 pairs on multiple lines
00228    *     @param w    Output vector consisting of single key:composition
00229    *                 items in each index.
00230    */
00231   void split(const std::string &ss, std::vector<std::string>& w) {
00232     std::string s = ss;
00233     std::string::size_type ibegin, iend;
00234     std::string name, num, nm;
00235     do {
00236       ibegin = s.find_first_not_of(", ;\n\t");
00237       if (ibegin != std::string::npos) {
00238         s = s.substr(ibegin,s.size());
00239         iend = s.find_first_of(", ;\n\t");
00240         if (iend != std::string::npos) {
00241           w.push_back(s.substr(0, iend));
00242           s = s.substr(iend+1, s.size());
00243         }
00244         else {
00245           w.push_back(s.substr(0, s.size()));
00246           return;
00247         }
00248       }
00249     }
00250     while (s != "");
00251   }
00252 
00253   int fillArrayFromString(const std::string& str,
00254                           doublereal* const a, const char delim) {
00255     std::string::size_type iloc;
00256     int count = 0;
00257     std::string num;
00258     std::string s = str;
00259     while (s.size() > 0) {
00260       iloc = s.find(delim);
00261       if (iloc > 0) {
00262         num = s.substr(0, iloc);
00263         s = s.substr(iloc+1,s.size());
00264       }
00265       else {
00266         num = s;
00267         s = "";
00268       }
00269       a[count] = atofCheck(num.c_str());
00270       count++;
00271     }
00272     return count;
00273   }
00274 
00275   // Get the file name without the path or extension
00276   /*
00277    *   @param fullPath   Input file name consisting
00278    *                     of the full file name
00279    *
00280    *  @return Returns the basename
00281    */
00282   std::string getBaseName(const std::string& path) {
00283     std::string file;
00284     size_t idot = path.find_last_of('.');
00285     size_t islash = path.find_last_of('/');
00286     if (idot > 0 && idot < path.size()) {
00287       if (islash > 0 && islash < idot) {
00288         file = path.substr(islash+1, idot-islash-1);
00289       }
00290       else {
00291         file = path.substr(0,idot);
00292       }
00293     }
00294     else {
00295       file = path;
00296     }       
00297     return file;
00298   }
00299 
00300   
00301   int intValue(std::string val) {
00302     return std::atoi(stripws(val).c_str());
00303   }
00304 
00305   doublereal fpValue(std::string val) {
00306     return std::atof(stripws(val).c_str());
00307   }
00308 
00309   doublereal fpValueCheck(std::string val) {
00310     return atofCheck(stripws(val).c_str());
00311   }
00312 
00313   //  Generate a logfile name based on an input file name
00314   /*
00315    *   It tries to find the basename. Then, it appends a .log
00316    *   to it.
00317    *
00318    *   @param infile      Input file name
00319    *
00320    *  @return Returns a logfile name
00321    */
00322   std::string logfileName(const std::string& infile) {
00323     std::string logfile = getBaseName(infile);
00324     logfile += ".log";
00325     return logfile;
00326   }
00327 
00328   //    Line wrap a string via a copy operation
00329   /*
00330    *   @param s   Input string to be line wrapped
00331    *   @paramlen  Length at which to wrap. The 
00332    *              default is 70.
00333    */
00334   std::string wrapString(const std::string& s, const int len) {
00335     int nc = s.size();
00336     int n, count=0;
00337     std::string r;
00338     for (n = 0; n < nc; n++) {
00339       if (s[n] == '\n') count = 0;
00340       else count++;
00341       if (count > len && s[n] == ' ') {
00342         r += "\n     ";
00343         count = 0;
00344       }
00345       r += s[n];
00346     }
00347     return r;
00348   }
00349 
00350 
00351   // Routine strips off white space from a c character string
00352   /*
00353    *     This routine strips off blanks and tabs (only leading and trailing
00354    *     characters) in 'str'.  On return, it returns the number of
00355    *     characters still included in the string (excluding the null character).
00356    *
00357    *      Comments are excluded -> All instances of the comment character, '!',
00358    *                               are replaced by '\0' thereby terminating
00359    *                               the string
00360    *
00361    *     Parameter list:
00362    *
00363    * @param  str   On output 'str' contains the same characters as on
00364    *               input except the leading and trailing white space and
00365    *               comments have been removed.
00366    */
00367   int stripLTWScstring(char str[]) {
00368     int  i = 0, j = 0;
00369     char ch;
00370     const char COM_CHAR='\0';
00371     /*
00372      *    Quick Returns
00373      */
00374     if ((str == 0) || (str[0] == '\0')) return (0);
00375 
00376     /* Find first non-space character character */
00377     while(((ch = str[i]) != '\0') && isspace(ch)) i++;
00378         
00379     /*
00380      * Move real part of str to the front by copying the string
00381      *   - Comments are handled here, by terminating the copy at the
00382      *     first comment indicator, and inserting the null character at
00383      *     that point.
00384      */
00385  
00386     while ( (ch = str[j+i]) != '\0' &&
00387             (ch != COM_CHAR)) {
00388       str[j] = ch;
00389       j++;
00390     }
00391     str[j] = '\0';
00392     j--;
00393     /* Remove trailing white space by inserting a null character */    
00394     while( (j != -1 ) && isspace(str[j])) j--;
00395     j++;
00396     str[j] = '\0';
00397     return (j);
00398   }
00399 
00400   // Translate a char string into a single double
00401   /*
00402    * atofCheck is a wrapper around the C stdlib routine atof().
00403    * It does quite a bit more error checking than atof() or
00404    * strtod(), and is quite a bit more restrictive.
00405    *
00406    *   First it interprets both E, e, d, and D as exponents.
00407    *   atof() only interprets e or E as an exponent character.
00408    *
00409    *   It only accepts a string as well formed if it consists as a 
00410    *   single token. Multiple words will produce an error message
00411    *
00412    *   It will produce an error for NAN and inf entries as well,
00413    *   in contrast to atof() or strtod().
00414    *   The user needs to know that a serious numerical issue
00415    *   has occurred.
00416    *
00417    *   It does not accept hexadecimal numbers.
00418    *
00419    *  @param dptr  pointer to the input c string
00420    *  @return      Returns the double
00421    *
00422    * On any error, it will throw a CanteraError signal.
00423    */
00424   doublereal atofCheck(const char * const dptr) {
00425     if (!dptr) {
00426       throw CanteraError("atofCheck", "null pointer to string");
00427     }
00428     char *eptr = (char *) malloc(strlen(dptr)+1);
00429     strcpy(eptr, dptr);
00430     int ll = stripLTWScstring(eptr);
00431     if (ll == 0) {
00432       throw CanteraError("atofCheck", "string has zero length");
00433     }
00434     int numDot = 0;
00435     int numExp = 0;
00436     char ch;
00437     int istart = 0;
00438     ch = eptr[0];
00439     if (ch == '+' || ch == '-') {
00440       istart = 1;
00441     }
00442     for (int i = istart; i < ll; i++) {
00443       ch = eptr[i];
00444       if (isdigit(ch)) {
00445       } else if (ch == '.') {
00446         numDot++;
00447         if (numDot > 1) {
00448           free(eptr);
00449           throw CanteraError("atofCheck",
00450                              "string has more than one .");
00451         }
00452       } else if (ch == 'e' || ch == 'E' || ch == 'd' || ch == 'D') {
00453         numExp++;
00454         eptr[i] = 'E';
00455         if (numExp > 1) {
00456           free(eptr);
00457           throw CanteraError("atofCheck", 
00458                              "string has more than one exp char");
00459         }
00460         ch = eptr[i+1];
00461         if (ch == '+' || ch == '-') {
00462           i++;
00463         }
00464       } else {
00465         std::string hh(dptr);
00466         free(eptr);
00467         throw CanteraError("atofCheck",
00468                            "Trouble processing string, " + hh);
00469       }
00470     }
00471     doublereal rval = atof(eptr);
00472     free(eptr);
00473     return rval;
00474   }
00475 
00476   // Interpret one or two token string as a single double
00477   /*
00478    *   This is similar to atof(). However, the second token
00479    *   is interpreted as an MKS units string and a conversion
00480    *   factor to MKS is applied.
00481    *
00482    *   Example
00483    *  " 1.0 atm"
00484    *
00485    *   results in the number 1.01325e5 
00486    *
00487    *   @param strSI string to be converted. One or two tokens
00488    *
00489    *   @return returns a converted double  
00490    */
00491   doublereal strSItoDbl(const std::string& strSI) {
00492     std::vector<std::string> v;
00493     tokenizeString(strSI, v);
00494     doublereal fp = 1.0;
00495     int n = v.size();
00496     if (n > 2 || n < 1) {
00497       throw CanteraError("strSItoDbl",
00498                          "number of tokens is too high");
00499     } else if (n == 2) {
00500       fp = toSI(v[1]);
00501     }
00502     doublereal val = atofCheck(v[0].c_str());
00503     return (val * fp);
00504   }
00505 
00506   static std::string::size_type findFirstWS(const std::string& val) {
00507     std::string::size_type ibegin = std::string::npos;
00508     int j = 0;
00509     std::string::const_iterator i = val.begin();
00510     for ( ; i != val.end(); i++) {
00511       char ch = *i;
00512       int ll = (int) ch;
00513       if (isspace(ll)) {
00514         ibegin = (std::string::size_type) j;
00515         break;
00516       }
00517       j++;
00518     }
00519     return ibegin;  
00520   }
00521 
00522   static std::string::size_type findFirstNotOfWS(const std::string& val) {
00523     std::string::size_type ibegin = std::string::npos;
00524     int j = 0;
00525     std::string::const_iterator i = val.begin();
00526     for ( ; i != val.end(); i++) {
00527       char ch = *i;
00528       int ll = (int) ch;
00529       if (!isspace(ll)) {
00530         ibegin = (std::string::size_type) j;
00531         break;
00532       }
00533       j++;
00534     }
00535     return ibegin;  
00536   }
00537 
00538   // This function  separates a string up into tokens
00539   // according to the location of white space.
00540   /*
00541    *    The separate tokens are returned in a string vector, v.
00542    *
00543    *  @param oval   String to be broken up
00544    *  @param v     Output vector of tokens.
00545    */
00546   void tokenizeString(const std::string& oval,
00547                       std::vector<std::string>& v) {
00548     std::string val(oval);
00549     std::string::size_type ibegin, iend;
00550     v.clear();
00551     while (1 > 0) {
00552       ibegin = findFirstNotOfWS(val);
00553       if (ibegin != std::string::npos) {
00554         val = val.substr(ibegin,val.size());
00555         iend = findFirstWS(val);
00556         if (iend == std::string::npos) {
00557           v.push_back(val);
00558           break;
00559         } else {
00560           v.push_back(val.substr(0,iend));
00561           val = val.substr(iend+1,val.size());
00562         }
00563       }
00564       else {
00565         break;
00566       }
00567     }
00568   }
00569 
00570 
00571 }