User Manual, Developers Guide and API Documentation

readerInl.hpp

Go to the documentation of this file.
00001 /**********************************************
00002 
00003 License: BSD
00004 Project Webpage: http://cajun-jsonapi.sourceforge.net/
00005 Author: Terry Caton
00006 
00007 ***********************************************/
00008 
00009 #include <cassert>
00010 #include <set>
00011 #include <sstream>
00012 
00013 /*  
00014 
00015 TODO:
00016 * better documentation
00017 * unicode character decoding
00018 
00019 */
00020 
00021 namespace wns{ namespace probe  { namespace bus { namespace json{
00022 
00023 
00024    inline std::istream& operator >> (std::istream& istr, UnknownElement& elementRoot) {
00025    Reader::Read(elementRoot, istr);
00026    return istr;
00027 }
00028 
00029 inline Reader::Location::Location() :
00030    m_nLine(0),
00031    m_nLineOffset(0),
00032    m_nDocOffset(0)
00033 {}
00034 
00035 
00037 // Reader::InputStream
00038 
00039 class Reader::InputStream // would be cool if we could inherit from std::istream & override "get"
00040 {
00041 public:
00042    InputStream(std::istream& iStr) :
00043       m_iStr(iStr) {}
00044 
00045    // protect access to the input stream, so we can keeep track of document/line offsets
00046    char Get(); // big, define outside
00047    char Peek() {
00048       assert(m_iStr.eof() == false); // enforce reading of only valid stream data 
00049       return m_iStr.peek();
00050    }
00051 
00052    bool EOS() {
00053       m_iStr.peek(); // apparently eof flag isn't set until a character read is attempted. whatever.
00054       return m_iStr.eof();
00055    }
00056 
00057    const Location& GetLocation() const { return m_Location; }
00058 
00059 private:
00060    std::istream& m_iStr;
00061    Location m_Location;
00062 };
00063 
00064 
00065 inline char Reader::InputStream::Get()
00066 {
00067    assert(m_iStr.eof() == false); // enforce reading of only valid stream data 
00068    char c = m_iStr.get();
00069    
00070    ++m_Location.m_nDocOffset;
00071    if (c == '\n') {
00072       ++m_Location.m_nLine;
00073       m_Location.m_nLineOffset = 0;
00074    }
00075    else {
00076       ++m_Location.m_nLineOffset;
00077    }
00078 
00079    return c;
00080 }
00081 
00082 
00083 
00085 // Reader::TokenStream
00086 
00087 class Reader::TokenStream
00088 {
00089 public:
00090    TokenStream(const Tokens& tokens);
00091 
00092    const Token& Peek();
00093    const Token& Get();
00094 
00095    bool EOS() const;
00096 
00097 private:
00098    const Tokens& m_Tokens;
00099    Tokens::const_iterator m_itCurrent;
00100 };
00101 
00102 
00103 inline Reader::TokenStream::TokenStream(const Tokens& tokens) :
00104    m_Tokens(tokens),
00105    m_itCurrent(tokens.begin())
00106 {}
00107 
00108 inline const Reader::Token& Reader::TokenStream::Peek() {
00109    assert(m_itCurrent != m_Tokens.end());
00110    return *(m_itCurrent); 
00111 }
00112 
00113 inline const Reader::Token& Reader::TokenStream::Get() {
00114    assert(m_itCurrent != m_Tokens.end());
00115    return *(m_itCurrent++); 
00116 }
00117 
00118 inline bool Reader::TokenStream::EOS() const {
00119    return m_itCurrent == m_Tokens.end(); 
00120 }
00121 
00123 // Reader (finally)
00124 
00125 
00126 inline void Reader::Read(Object& object, std::istream& istr)                { Read_i(object, istr); }
00127 inline void Reader::Read(Array& array, std::istream& istr)                  { Read_i(array, istr); }
00128 inline void Reader::Read(String& string, std::istream& istr)                { Read_i(string, istr); }
00129 inline void Reader::Read(Number& number, std::istream& istr)                { Read_i(number, istr); }
00130 inline void Reader::Read(Boolean& boolean, std::istream& istr)              { Read_i(boolean, istr); }
00131 inline void Reader::Read(Null& null, std::istream& istr)                    { Read_i(null, istr); }
00132 inline void Reader::Read(UnknownElement& unknown, std::istream& istr)       { Read_i(unknown, istr); }
00133 
00134 
00135 template <typename ElementTypeT>   
00136 void Reader::Read_i(ElementTypeT& element, std::istream& istr)
00137 {
00138    Reader reader;
00139 
00140    Tokens tokens;
00141    InputStream inputStream(istr);
00142    reader.Scan(tokens, inputStream);
00143 
00144    TokenStream tokenStream(tokens);
00145    reader.Parse(element, tokenStream);
00146 
00147    if (tokenStream.EOS() == false)
00148    {
00149       const Token& token = tokenStream.Peek();
00150       std::string sMessage = "Expected End of token stream; found " + token.sValue;
00151       throw ParseException(sMessage, token.locBegin, token.locEnd);
00152    }
00153 }
00154 
00155 
00156 inline void Reader::Scan(Tokens& tokens, InputStream& inputStream)
00157 {
00158    while (EatWhiteSpace(inputStream),              // ignore any leading white space...
00159           inputStream.EOS() == false) // ...before checking for EOS
00160    {
00161       // if all goes well, we'll create a token each pass
00162       Token token;
00163       token.locBegin = inputStream.GetLocation();
00164 
00165       // gives us null-terminated string
00166       std::string sChar;
00167       sChar.push_back(inputStream.Peek());
00168 
00169       switch (sChar[0])
00170       {
00171          case '{':
00172             token.sValue = sChar[0];
00173             MatchExpectedString(sChar, inputStream);
00174             token.nType = Token::TOKEN_OBJECT_BEGIN;
00175             break;
00176 
00177          case '}':
00178             token.sValue = sChar[0];
00179             MatchExpectedString(sChar, inputStream);
00180             token.nType = Token::TOKEN_OBJECT_END;
00181             break;
00182 
00183          case '[':
00184             token.sValue = sChar[0];
00185             MatchExpectedString(sChar, inputStream);
00186             token.nType = Token::TOKEN_ARRAY_BEGIN;
00187             break;
00188 
00189          case ']':
00190             token.sValue = sChar[0];
00191             MatchExpectedString(sChar, inputStream);
00192             token.nType = Token::TOKEN_ARRAY_END;
00193             break;
00194 
00195          case ',':
00196             token.sValue = sChar[0];
00197             MatchExpectedString(sChar, inputStream);
00198             token.nType = Token::TOKEN_NEXT_ELEMENT;
00199             break;
00200 
00201          case ':':
00202             token.sValue = sChar[0];
00203             MatchExpectedString(sChar, inputStream);
00204             token.nType = Token::TOKEN_MEMBER_ASSIGN;
00205             break;
00206 
00207          case '"':
00208             MatchString(token.sValue, inputStream);
00209             token.nType = Token::TOKEN_STRING;
00210             break;
00211 
00212          case '-':
00213          case '0':
00214          case '1':
00215          case '2':
00216          case '3':
00217          case '4':
00218          case '5':
00219          case '6':
00220          case '7':
00221          case '8':
00222          case '9':
00223             MatchNumber(token.sValue, inputStream);
00224             token.nType = Token::TOKEN_NUMBER;
00225             break;
00226 
00227          case 't':
00228             token.sValue = "true";
00229             MatchExpectedString(token.sValue, inputStream);
00230             token.nType = Token::TOKEN_BOOLEAN;
00231             break;
00232 
00233          case 'f':
00234             token.sValue = "false";
00235             MatchExpectedString(token.sValue, inputStream);
00236             token.nType = Token::TOKEN_BOOLEAN;
00237             break;
00238 
00239          case 'n':
00240             token.sValue = "null";
00241             MatchExpectedString(token.sValue, inputStream);
00242             token.nType = Token::TOKEN_NULL;
00243             break;
00244 
00245          default: {
00246             std::string sErrorMessage = "Unexpected character in stream: " + sChar;
00247             throw ScanException(sErrorMessage, inputStream.GetLocation());
00248          }
00249       }
00250 
00251       token.locEnd = inputStream.GetLocation();
00252       tokens.push_back(token);
00253    }
00254 }
00255 
00256 
00257 inline void Reader::EatWhiteSpace(InputStream& inputStream)
00258 {
00259    while (inputStream.EOS() == false && 
00260           ::isspace(inputStream.Peek()))
00261       inputStream.Get();
00262 }
00263 
00264 inline void Reader::MatchExpectedString(const std::string& sExpected, InputStream& inputStream)
00265 {
00266    std::string::const_iterator it(sExpected.begin()),
00267                                itEnd(sExpected.end());
00268    for ( ; it != itEnd; ++it) {
00269       if (inputStream.EOS() ||      // did we reach the end before finding what we're looking for...
00270           inputStream.Get() != *it) // ...or did we find something different?
00271       {
00272          std::string sMessage = "Expected string: " + sExpected;
00273          throw ScanException(sMessage, inputStream.GetLocation());
00274       }
00275    }
00276 
00277    // all's well if we made it here, return quietly
00278 }
00279 
00280 
00281 inline void Reader::MatchString(std::string& string, InputStream& inputStream)
00282 {
00283    MatchExpectedString("\"", inputStream);
00284    
00285    while (inputStream.EOS() == false &&
00286           inputStream.Peek() != '"')
00287    {
00288       char c = inputStream.Get();
00289 
00290       // escape?
00291       if (c == '\\' &&
00292           inputStream.EOS() == false) // shouldn't have reached the end yet
00293       {
00294          c = inputStream.Get();
00295          switch (c) {
00296             case '/':      string.push_back('/');     break;
00297             case '"':      string.push_back('"');     break;
00298             case '\\':     string.push_back('\\');    break;
00299             case 'b':      string.push_back('\b');    break;
00300             case 'f':      string.push_back('\f');    break;
00301             case 'n':      string.push_back('\n');    break;
00302             case 'r':      string.push_back('\r');    break;
00303             case 't':      string.push_back('\t');    break;
00304             case 'u':      // TODO: what do we do with this?
00305             default: {
00306                std::string sMessage = "Unrecognized escape sequence found in string: \\" + c;
00307                throw ScanException(sMessage, inputStream.GetLocation());
00308             }
00309          }
00310       }
00311       else {
00312          string.push_back(c);
00313       }
00314    }
00315 
00316    // eat the last '"' that we just peeked
00317    MatchExpectedString("\"", inputStream);
00318 }
00319 
00320 
00321 inline void Reader::MatchNumber(std::string& sNumber, InputStream& inputStream)
00322 {
00323    const char sNumericChars[] = "0123456789.eE-+";
00324    std::set<char> numericChars;
00325    numericChars.insert(sNumericChars, sNumericChars + sizeof(sNumericChars));
00326 
00327    while (inputStream.EOS() == false &&
00328           numericChars.find(inputStream.Peek()) != numericChars.end())
00329    {
00330       sNumber.push_back(inputStream.Get());   
00331    }
00332 }
00333 
00334 
00335 inline void Reader::Parse(UnknownElement& element, Reader::TokenStream& tokenStream) 
00336 {
00337    if (tokenStream.EOS()) {
00338       std::string sMessage = "Unexpected end of token stream";
00339       throw ParseException(sMessage, Location(), Location()); // nowhere to point to
00340    }
00341 
00342    const Token& token = tokenStream.Peek();
00343    switch (token.nType) {
00344       case Token::TOKEN_OBJECT_BEGIN:
00345       {
00346          // implicit non-const cast will perform conversion for us (if necessary)
00347          Object& object = element;
00348          Parse(object, tokenStream);
00349          break;
00350       }
00351 
00352       case Token::TOKEN_ARRAY_BEGIN:
00353       {
00354          Array& array = element;
00355          Parse(array, tokenStream);
00356          break;
00357       }
00358 
00359       case Token::TOKEN_STRING:
00360       {
00361          String& string = element;
00362          Parse(string, tokenStream);
00363          break;
00364       }
00365 
00366       case Token::TOKEN_NUMBER:
00367       {
00368          Number& number = element;
00369          Parse(number, tokenStream);
00370          break;
00371       }
00372 
00373       case Token::TOKEN_BOOLEAN:
00374       {
00375          Boolean& boolean = element;
00376          Parse(boolean, tokenStream);
00377          break;
00378       }
00379 
00380       case Token::TOKEN_NULL:
00381       {
00382          Null& null = element;
00383          Parse(null, tokenStream);
00384          break;
00385       }
00386 
00387       default:
00388       {
00389          std::string sMessage = "Unexpected token: " + token.sValue;
00390          throw ParseException(sMessage, token.locBegin, token.locEnd);
00391       }
00392    }
00393 }
00394 
00395 
00396 inline void Reader::Parse(Object& object, Reader::TokenStream& tokenStream)
00397 {
00398    MatchExpectedToken(Token::TOKEN_OBJECT_BEGIN, tokenStream);
00399 
00400    bool bContinue = (tokenStream.EOS() == false &&
00401                      tokenStream.Peek().nType != Token::TOKEN_OBJECT_END);
00402    while (bContinue)
00403    {
00404       Object::Member member;
00405 
00406       // first the member name. save the token in case we have to throw an exception
00407       const Token& tokenName = tokenStream.Peek();
00408       member.name = MatchExpectedToken(Token::TOKEN_STRING, tokenStream);
00409 
00410       // ...then the key/value separator...
00411       MatchExpectedToken(Token::TOKEN_MEMBER_ASSIGN, tokenStream);
00412 
00413       // ...then the value itself (can be anything).
00414       Parse(member.element, tokenStream);
00415 
00416       // try adding it to the object (this could throw)
00417       try
00418       {
00419          object.Insert(member);
00420       }
00421       catch (Exception&)
00422       {
00423          // must be a duplicate name
00424          std::string sMessage = "Duplicate object member token: " + member.name; 
00425          throw ParseException(sMessage, tokenName.locBegin, tokenName.locEnd);
00426       }
00427 
00428       bContinue = (tokenStream.EOS() == false &&
00429                    tokenStream.Peek().nType == Token::TOKEN_NEXT_ELEMENT);
00430       if (bContinue)
00431          MatchExpectedToken(Token::TOKEN_NEXT_ELEMENT, tokenStream);
00432    }
00433 
00434    MatchExpectedToken(Token::TOKEN_OBJECT_END, tokenStream);
00435 }
00436 
00437 
00438 inline void Reader::Parse(Array& array, Reader::TokenStream& tokenStream)
00439 {
00440    MatchExpectedToken(Token::TOKEN_ARRAY_BEGIN, tokenStream);
00441 
00442    bool bContinue = (tokenStream.EOS() == false &&
00443                      tokenStream.Peek().nType != Token::TOKEN_ARRAY_END);
00444    while (bContinue)
00445    {
00446       // ...what's next? could be anything
00447       Array::iterator itElement = array.Insert(UnknownElement());
00448       UnknownElement& element = *itElement;
00449       Parse(element, tokenStream);
00450 
00451       bContinue = (tokenStream.EOS() == false &&
00452                    tokenStream.Peek().nType == Token::TOKEN_NEXT_ELEMENT);
00453       if (bContinue)
00454          MatchExpectedToken(Token::TOKEN_NEXT_ELEMENT, tokenStream);
00455    }
00456 
00457    MatchExpectedToken(Token::TOKEN_ARRAY_END, tokenStream);
00458 }
00459 
00460 
00461 inline void Reader::Parse(String& string, Reader::TokenStream& tokenStream)
00462 {
00463    string = MatchExpectedToken(Token::TOKEN_STRING, tokenStream);
00464 }
00465 
00466 
00467 inline void Reader::Parse(Number& number, Reader::TokenStream& tokenStream)
00468 {
00469    const Token& currentToken = tokenStream.Peek(); // might need this later for throwing exception
00470    const std::string& sValue = MatchExpectedToken(Token::TOKEN_NUMBER, tokenStream);
00471 
00472    std::istringstream iStr(sValue);
00473    double dValue;
00474    iStr >> dValue;
00475 
00476    // did we consume all characters in the token?
00477    if (iStr.eof() == false)
00478    {
00479       std::string sMessage = "Unexpected character in NUMBER token: " + iStr.peek();
00480       throw ParseException(sMessage, currentToken.locBegin, currentToken.locEnd);
00481    }
00482 
00483    number = dValue;
00484 }
00485 
00486 
00487 inline void Reader::Parse(Boolean& boolean, Reader::TokenStream& tokenStream)
00488 {
00489    const std::string& sValue = MatchExpectedToken(Token::TOKEN_BOOLEAN, tokenStream);
00490    boolean = (sValue == "true" ? true : false);
00491 }
00492 
00493 
00494 inline void Reader::Parse(Null&, Reader::TokenStream& tokenStream)
00495 {
00496    MatchExpectedToken(Token::TOKEN_NULL, tokenStream);
00497 }
00498 
00499 
00500 inline const std::string& Reader::MatchExpectedToken(Token::Type nExpected, Reader::TokenStream& tokenStream)
00501 {
00502    if (tokenStream.EOS())
00503    {
00504       std::string sMessage = "Unexpected End of token stream";
00505       throw ParseException(sMessage, Location(), Location()); // nowhere to point to
00506    }
00507 
00508    const Token& token = tokenStream.Get();
00509    if (token.nType != nExpected)
00510    {
00511       std::string sMessage = "Unexpected token: " + token.sValue;
00512       throw ParseException(sMessage, token.locBegin, token.locEnd);
00513    }
00514 
00515    return token.sValue;
00516 }
00517 
00518 } // json
00519 } // bus
00520 } // probe
00521 } // wns
00522 

Generated on Sun May 27 03:31:50 2012 for openWNS by  doxygen 1.5.5