diff options
Diffstat (limited to 'usr.bin/dtc/input_buffer.hh')
-rw-r--r-- | usr.bin/dtc/input_buffer.hh | 574 |
1 files changed, 574 insertions, 0 deletions
diff --git a/usr.bin/dtc/input_buffer.hh b/usr.bin/dtc/input_buffer.hh new file mode 100644 index 000000000000..395c7b044df3 --- /dev/null +++ b/usr.bin/dtc/input_buffer.hh @@ -0,0 +1,574 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2013 David Chisnall + * All rights reserved. + * + * This software was developed by SRI International and the University of + * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) + * ("CTSRD"), as part of the DARPA CRASH research programme. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _INPUT_BUFFER_HH_ +#define _INPUT_BUFFER_HH_ +#include "util.hh" +#include <assert.h> +#include <stack> +#include <string> +#include <unordered_set> + +namespace dtc +{ + +namespace { +struct expression; +typedef std::unique_ptr<expression> expression_ptr; +} + +/** + * Class encapsulating the input file. Can be used as a const char*, but has + * range checking. Attempting to access anything out of range will return a 0 + * byte. The input buffer can be cheaply copied, without copying the + * underlying memory, however it is the user's responsibility to ensure that + * such copies do not persist beyond the lifetime of the underlying memory. + * + * This also contains methods for reporting errors and for consuming the token + * stream. + */ +class input_buffer +{ + friend class text_input_buffer; + protected: + /** + * The buffer. This class doesn't own the buffer, but the + * mmap_input_buffer subclass does. + */ + const char* buffer; + /** + * The size of the buffer. + */ + int size; + private: + /** + * The current place in the buffer where we are reading. This class + * keeps a separate size, pointer, and cursor so that we can move + * forwards and backwards and still have checks that we haven't fallen + * off either end. + */ + int cursor; + /** + * Private constructor. This is used to create input buffers that + * refer to the same memory, but have different cursors. + */ + input_buffer(const char* b, int s, int c) : buffer(b), size(s), + cursor(c) {} + public: + /** + * Returns the file name associated with this buffer. + */ + virtual const std::string &filename() const + { + static std::string s; + return s; + } + static std::unique_ptr<input_buffer> buffer_for_file(const std::string &path, + bool warn=true); + /** + * Skips all characters in the input until the specified character is + * encountered. + */ + void skip_to(char); + /** + * Parses up to a specified character and returns the intervening + * characters as a string. + */ + std::string parse_to(char); + /** + * Return whether all input has been consumed. + */ + bool finished() { return cursor >= size; } + /** + * Virtual destructor. Does nothing, but exists so that subclasses + * that own the memory can run cleanup code for deallocating it. + */ + virtual ~input_buffer() {}; + /** + * Constructs an empty buffer. + */ + input_buffer() : buffer(0), size(0), cursor(0) {} + /** + * Constructs a new buffer with a specified memory region and size. + */ + input_buffer(const char* b, int s) : buffer(b), size(s), cursor(0){} + /** + * Returns a new input buffer referring into this input, clamped to the + * specified size. If the requested buffer would fall outside the + * range of this one, then it returns an empty buffer. + * + * The returned buffer shares the same underlying storage as the + * original. This is intended to be used for splitting up the various + * sections of a device tree blob. Requesting a size of 0 will give a + * buffer that extends to the end of the available memory. + */ + input_buffer buffer_from_offset(int offset, int s=0); + /** + * Dereferencing operator, allows the buffer to be treated as a char* + * and dereferenced to give a character. This returns a null byte if + * the cursor is out of range. + */ + inline char operator*() + { + if (cursor >= size) { return '\0'; } + if (cursor < 0) { return '\0'; } + return buffer[cursor]; + } + /** + * Array subscripting operator, returns a character at the specified + * index offset from the current cursor. The offset may be negative, + * to reread characters that have already been read. If the current + * cursor plus offset is outside of the range, this returns a nul + * byte. + */ + inline char operator[](int offset) + { + if (cursor + offset >= size) { return '\0'; } + if (cursor + offset < 0) { return '\0'; } + return buffer[cursor + offset]; + } + /** + * Increments the cursor, iterating forward in the buffer. + */ + inline input_buffer &operator++() + { + cursor++; + return *this; + } + const char *begin() + { + return buffer; + } + const char *end() + { + return buffer + size; + } + /** + * Consumes a character. Moves the cursor one character forward if the + * next character matches the argument, returning true. If the current + * character does not match the argument, returns false. + */ + inline bool consume(char c) + { + if (*(*this) == c) + { + ++(*this); + return true; + } + return false; + } + /** + * Consumes a string. If the (null-terminated) string passed as the + * argument appears in the input, advances the cursor to the end and + * returns true. Returns false if the string does not appear at the + * current point in the input. + */ + bool consume(const char *str); + /** + * Reads unsigned from char literal. Returns true and advances + * the cursor to next char. + * + * The parsed value is returned via the argument. + */ + bool consume_char_literal(unsigned long long &outInt); + /** + * Reads an integer in base 8, 10, or 16. Returns true and advances + * the cursor to the end of the integer if the cursor points to an + * integer, returns false and does not move the cursor otherwise. + * + * The parsed value is returned via the argument. + */ + bool consume_integer(unsigned long long &outInt); + /** + * Reads an arithmetic expression (containing any of the normal C + * operators), evaluates it, and returns the result. + */ + bool consume_integer_expression(unsigned long long &outInt); + /** + * Consumes two hex digits and return the resulting byte via the first + * argument. If the next two characters are hex digits, returns true + * and advances the cursor. If not, then returns false and leaves the + * cursor in place. + */ + bool consume_hex_byte(uint8_t &outByte); + /** + * Template function that consumes a binary value in big-endian format + * from the input stream. Returns true and advances the cursor if + * there is a value of the correct size. This function assumes that + * all values must be natively aligned, and so advances the cursor to + * the correct alignment before reading. + */ + template<typename T> + bool consume_binary(T &out) + { + int align = 0; + int type_size = sizeof(T); + if (cursor % type_size != 0) + { + align = type_size - (cursor % type_size); + } + if (size < cursor + align + type_size) + { + return false; + } + cursor += align; + assert(cursor % type_size == 0); + out = 0; + for (int i=0 ; i<type_size ; ++i) + { + if (size < cursor) + { + return false; + } + out <<= 8; + out |= (((T)buffer[cursor++]) & 0xff); + } + return true; + } +#ifndef NDEBUG + /** + * Dumps the current cursor value and the unconsumed values in the + * input buffer to the standard error. This method is intended solely + * for debugging. + */ + void dump(); +#endif +}; +/** + * Explicit specialisation for reading a single byte. + */ +template<> +inline bool input_buffer::consume_binary(uint8_t &out) +{ + if (size < cursor + 1) + { + return false; + } + out = buffer[cursor++]; + return true; +} + +/** + * An input buffer subclass used for parsing DTS files. This manages a stack + * of input buffers to handle /input/ operations. + */ +class text_input_buffer +{ + std::unordered_set<std::string> defines; + /** + * The cursor is the input into the input stream where we are currently reading. + */ + int cursor = 0; + /** + * The current stack of includes. The current input is always from the top + * of the stack. + */ + std::stack<std::shared_ptr<input_buffer>> input_stack; + /** + * + */ + const std::vector<std::string> include_paths; + /** + * Reads forward past any spaces. The DTS format is not whitespace + * sensitive and so we want to scan past whitespace when reading it. + */ + void skip_spaces(); + /** + * Returns the character immediately after the current one. + * + * This method does not look between files. + */ + char peek(); + /** + * If a /include/ token is encountered, then look up the corresponding + * input file, push it onto the input stack, and continue. + */ + void handle_include(); + /** + * The base directory for this file. + */ + const std::string dir; + /** + * The file where dependencies should be output. + */ + FILE *depfile; + public: + /** + * Construct a new text input buffer with the specified buffer as the start + * of parsing and the specified set of input paths for handling new + * inclusions. + */ + text_input_buffer(std::unique_ptr<input_buffer> &&b, + std::unordered_set<std::string> &&d, + std::vector<std::string> &&i, + const std::string directory, + FILE *deps) + : defines(d), include_paths(i), dir(directory), depfile(deps) + { + input_stack.push(std::move(b)); + } + /** + * Skips all characters in the input until the specified character is + * encountered. + */ + void skip_to(char); + /** + * Parse an expression. If `stopAtParen` is set, then only parse a number + * or a parenthetical expression, otherwise assume that either is the + * left-hand side of a binary expression and try to parse the right-hand + * side. + */ + expression_ptr parse_expression(bool stopAtParen=false); + /** + * Parse a binary expression, having already parsed the right-hand side. + */ + expression_ptr parse_binary_expression(expression_ptr lhs); + /** + * Return whether all input has been consumed. + */ + bool finished() + { + return input_stack.empty() || + ((input_stack.size() == 1) && input_stack.top()->finished()); + } + /** + * Dereferencing operator. Returns the current character in the top input buffer. + */ + inline char operator*() + { + if (input_stack.empty()) + { + return 0; + } + return *(*input_stack.top()); + } + /** + * Increments the cursor, iterating forward in the buffer. + */ + inline text_input_buffer &operator++() + { + if (input_stack.empty()) + { + return *this; + } + cursor++; + auto &top = *input_stack.top(); + ++top; + if (top.finished()) + { + input_stack.pop(); + } + return *this; + } + /** + * Consumes a character. Moves the cursor one character forward if the + * next character matches the argument, returning true. If the current + * character does not match the argument, returns false. + */ + inline bool consume(char c) + { + if (*(*this) == c) + { + ++(*this); + return true; + } + return false; + } + /** + * Consumes a string. If the (null-terminated) string passed as the + * argument appears in the input, advances the cursor to the end and + * returns true. Returns false if the string does not appear at the + * current point in the input. + * + * This method does not scan between files. + */ + bool consume(const char *str) + { + if (input_stack.empty()) + { + return false; + } + return input_stack.top()->consume(str); + } + /** + * Converts next char into unsigned + * + * The parsed value is returned via the argument. + * + * This method does not scan between files. + */ + bool consume_char_literal(unsigned long long &outInt) + { + if (input_stack.empty()) + { + return false; + } + return input_stack.top()->consume_char_literal(outInt); + } + /** + * Reads an integer in base 8, 10, or 16. Returns true and advances + * the cursor to the end of the integer if the cursor points to an + * integer, returns false and does not move the cursor otherwise. + * + * The parsed value is returned via the argument. + * + * This method does not scan between files. + */ + bool consume_integer(unsigned long long &outInt) + { + if (input_stack.empty()) + { + return false; + } + return input_stack.top()->consume_integer(outInt); + } + /** + * Reads an arithmetic expression (containing any of the normal C + * operators), evaluates it, and returns the result. + */ + bool consume_integer_expression(unsigned long long &outInt); + /** + * Consumes two hex digits and return the resulting byte via the first + * argument. If the next two characters are hex digits, returns true + * and advances the cursor. If not, then returns false and leaves the + * cursor in place. + * + * This method does not scan between files. + */ + bool consume_hex_byte(uint8_t &outByte) + { + if (input_stack.empty()) + { + return false; + } + return input_stack.top()->consume_hex_byte(outByte); + } + /** + * Returns the longest string in the input buffer starting at the + * current cursor and composed entirely of characters that are valid in + * node names. + */ + std::string parse_node_name(); + /** + * Returns the longest string in the input buffer starting at the + * current cursor and composed entirely of characters that are valid in + * property names. + */ + std::string parse_property_name(); + /** + * Parses either a node or a property name. If is_property is true on + * entry, then only property names are parsed. If it is false, then it + * will be set, on return, to indicate whether the parsed name is only + * valid as a property. + */ + std::string parse_node_or_property_name(bool &is_property); + /** + * Parses up to a specified character and returns the intervening + * characters as a string. + */ + std::string parse_to(char); + /** + * Advances the cursor to the start of the next token, skipping + * comments and whitespace. If the cursor already points to the start + * of a token, then this function does nothing. + */ + text_input_buffer &next_token(); + /** + * Location in the source file. This should never be interpreted by + * anything other than error reporting functions of this class. It will + * eventually become something more complex than an `int`. + */ + class source_location + { + friend class text_input_buffer; + /** + * The text buffer object that included `b`. + */ + text_input_buffer &buffer; + /** + * The underlying buffer that contains this location. + */ + std::shared_ptr<input_buffer> b; + /** + * The offset within the current buffer of the source location. + */ + int cursor; + source_location(text_input_buffer &buf) + : buffer(buf), + b(buf.input_stack.empty() ? nullptr : buf.input_stack.top()), + cursor(b ? b->cursor : 0) {} + public: + /** + * Report an error at this location. + */ + void report_error(const char *msg) + { + if (b) + { + buffer.parse_error(msg, *b, cursor); + } + else + { + buffer.parse_error(msg); + } + } + }; + /** + * Returns the current source location. + */ + source_location location() + { + return { *this }; + } + /** + * Prints a message indicating the location of a parse error. + */ + void parse_error(const char *msg); + /** + * Reads the contents of a binary file into `b`. The file name is assumed + * to be relative to one of the include paths. + * + * Returns true if the file exists and can be read, false otherwise. + */ + bool read_binary_file(const std::string &filename, byte_buffer &b); + private: + /** + * Prints a message indicating the location of a parse error, given a + * specified location. This is used when input has already moved beyond + * the location that caused the failure. + */ + void parse_error(const char *msg, input_buffer &b, int loc); +}; + +} // namespace dtc + +#endif // !_INPUT_BUFFER_HH_ |