Files
yamal/yamal.hpp
2025-11-26 14:52:58 +01:00

527 lines
21 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#pragma once
#include <string>
#include <vector>
#include <unordered_map>
#include <sstream>
#include <algorithm>
#include <cctype>
#include <fstream>
#include <iostream>
#include <string.h>
template<typename Value>
class ordered_map {
std::unordered_map<std::string, Value> map;
std::vector<std::string> keys;
public:
Value& operator[](const std::string& key) {
if (!map.contains(key)) keys.push_back(key);
return map[key];
}
bool contains(const std::string& key) const {
return map.find(key) != map.end();
}
void erase(const std::string& key) {
if (map.erase(key)) {
keys.erase(std::remove(keys.begin(), keys.end(), key), keys.end());
}
}
bool is_last(const std::string& key) const {
return !keys.empty() && keys.back() == key;
}
void clear() {
map.clear();
keys.clear();
}
size_t size() const { return keys.size(); }
std::vector<std::pair<std::string, Value>> items() const {
std::vector<std::pair<std::string, Value>> result;
for (const auto& k : keys) result.emplace_back(k, map.at(k));
return result;
}
const Value& at(const std::string& key) const { return map.at(key); }
Value& at(const std::string& key) { return map.at(key); }
};
class yamal {
public:
enum Mode { SCALAR, VECTOR, MAP };
Mode mode = SCALAR;
std::string value;
std::vector<yamal> vec_data;
ordered_map<yamal> map_data;
std::string pre_comment;
std::string inline_comment;
bool quoted = false;
bool inline_map = false;
bool blank_line = false;
// Mode checks
bool isScalar() const { return mode == SCALAR; }
bool isVector() const { return mode == VECTOR; }
bool isMap() const { return mode == MAP; }
// Mode setters
void clearToScalar() { mode = SCALAR; vec_data.clear(); map_data.clear(); }
void clearToVector() { mode = VECTOR; value.clear(); map_data.clear(); }
void clearToMap() { mode = MAP; value.clear(); vec_data.clear(); }
yamal& operator=(const char* v) { clearToScalar(); value = std::string(v); return *this; }
yamal& operator=(const std::string& v) { clearToScalar(); value = v; return *this; }
yamal& operator=(int v) { clearToScalar(); value = std::to_string(v); return *this; }
yamal& operator=(float v) { clearToScalar(); value = std::to_string(v); return *this; }
yamal& operator=(bool b) { clearToScalar(); value = b ? "true" : "false"; return *this; }
std::string asString() const { return value; }
int asInt() const { return std::stoi(value); }
float asFloat() const { return std::stof(value); }
bool asBool() const {
std::string s = value;
std::transform(s.begin(), s.end(), s.begin(), ::tolower);
if (s == "true") return true;
if (s == "false") return false;
try {
size_t idx;
float f = std::stof(s, &idx);
return (idx != 0 && f != 0.0f);
} catch (...) {
return false;
}
}
// Comment accessors
const std::string& getComment() const { return pre_comment; }
yamal& setComment(const std::string& c) { pre_comment = c; /*blank_line = true;*/ return *this; }
yamal& appendComment(const std::string& c) {
if (c.empty()) {
pre_comment += "\n";
} else {
std::string trimmed = pre_comment;
while (trimmed[0]=='\n') trimmed = trimmed.substr(1);
if (!trimmed.empty()) pre_comment += "\n";
pre_comment += c;
}
return *this;
}
// Inline comment accessors
const std::string& getInlineComment() const {
return inline_comment;
}
yamal& setInlineComment(const std::string& c) {
inline_comment = c; return *this;
}
// Formatting
yamal& setQuoted(bool q = true) { quoted = q; return *this; }
yamal& setInline(bool in = true) { inline_map = in; return *this; }
yamal& setBlankLine(bool b = true) { blank_line = b; return *this; }
// Vector and map access
void push_back(const yamal& item) {
clearToVector();
vec_data.push_back(item);
}
yamal& emplace_back() {
clearToVector();
return vec_data.emplace_back();
}
yamal& operator[](size_t i) {
clearToVector();
if (vec_data.size() <= i) vec_data.resize(i+1);
return vec_data[i];
}
yamal& operator[](const std::string& key) {
clearToMap();
return map_data[key];
}
std::vector<std::pair<std::string, yamal>> attributes() const {
return map_data.items();
}
std::string serialize(int indent = 0, bool emit_self_comment = true) const {
std::ostringstream out;
switch (mode) {
case SCALAR: {
if (emit_self_comment && !pre_comment.empty()) {
out << formatComment(pre_comment, indent);
}
std::string val = quoted ? "\"" + value + "\"" : value;
out << std::string(indent, ' ') << val;
if (!inline_comment.empty()) out << " # " << inline_comment;
break;
}
case MAP: {
if (inline_map) {
//if (emit_self_comment && blank_line) { out << "\n"; }
if (emit_self_comment && !pre_comment.empty()) {
out << formatComment(pre_comment, indent);
}
out << std::string(indent, ' ') << "{ ";
bool first = true;
for (auto& kv : map_data.items()) {
if (!first) out << ", ";
out << kv.first << ": " << kv.second.serialize(0, false);
first = false;
}
out << " }";
if (!inline_comment.empty()) out << " # " << inline_comment;
} else {
//if (emit_self_comment && blank_line) { out << "\n"; }
// Only emit this nodes own comment at the start if requested
if (emit_self_comment && !pre_comment.empty()) {
out << formatComment(pre_comment, indent);
}
for (auto& kv : map_data.items()) {
// Emit childs pre-comment BEFORE the key line
//if (kv.second.blank_line) { out << "\n"; }
if (!kv.second.pre_comment.empty()) {
out << formatComment(kv.second.pre_comment, indent);
}
out << std::string(indent, ' ') << kv.first << ": ";
if (kv.second.inline_map || kv.second.isScalar()) {
out << kv.second.serialize(0, false);
} else {
out << "\n" << kv.second.serialize(indent + 2, false);
}
//if (!kv.second.inline_comment.empty()) {
// out << " # " << kv.second.inline_comment;
//}
if (!map_data.is_last(kv.first)) out << "\n";
}
}
break;
}
case VECTOR: {
if (inline_map) {
//if (emit_self_comment && blank_line) { out << "\n"; }
if (emit_self_comment && !pre_comment.empty()) {
out << formatComment(pre_comment, indent);
}
out << std::string(indent, ' ') << "[ ";
for (size_t i = 0; i < vec_data.size(); ++i) {
if (i > 0) out << ", ";
out << vec_data[i].serialize(0, false);
}
out << " ]";
if (!inline_comment.empty()) out << " # " << inline_comment;
} else {
//if (emit_self_comment && blank_line) { out << "\n"; }
if (emit_self_comment && !pre_comment.empty()) {
out << formatComment(pre_comment, indent);
}
if (vec_data.empty()) {
out << std::string(indent, ' ') << "[]";
if (!inline_comment.empty()) out << " # " << inline_comment;
} else {
for (size_t i = 0; i < vec_data.size(); ++i) {
const yamal& item = vec_data[i];
//if (emit_self_comment && item.blank_line) { out << "\n"; }
// Emit items comment BEFORE the dash line
if (!item.pre_comment.empty()) {
out << formatComment(item.pre_comment, indent);
}
if (i > 0) out << "\n";
out << std::string(indent, ' ') << "- ";
if (item.isScalar() || item.inline_map) {
out << item.serialize(0, false);
} else if (item.isMap()) {
auto attrs = item.attributes();
if (!attrs.empty()) {
out << attrs[0].first << ": " << attrs[0].second.serialize(0, false);
for (size_t j = 1; j < attrs.size(); ++j) {
out << "\n" << std::string(indent + 2, ' ')
<< attrs[j].first << ": ";
if (attrs[j].second.inline_map || attrs[j].second.isScalar()) {
out << attrs[j].second.serialize(0, false);
} else {
out << "\n" << attrs[j].second.serialize(indent + 4, false);
}
}
}
} else {
out << "\n" << item.serialize(indent + 2, false);
}
}
if (!inline_comment.empty()) out << " # " << inline_comment;
}
}
break;
}
}
return out.str();
}
void deserialize(std::string buffer) {
tokenizer::init(buffer);
deserialize(-1);
}
inline static std::string current_comment = "";
//inline static bool current_blank_line = false;
void deserialize(int indent) {
if (tokenizer::error()) return;
int current_indent = -2;
tokenizer::getNextToken();
while(tokenizer::type != tokenizer::types::ENDOFFILE) {
switch (tokenizer::type) {
case tokenizer::types::COMMENT:
//if (!current_comment.empty()) current_comment += "\n";
current_comment += tokenizer::string + "\n";
tokenizer::getNextToken();
// Ignore for now
break;
case tokenizer::types::BLANKLINE:
tokenizer::getNextToken();
if (tokenizer::type == tokenizer::types::BLANKLINE) {
current_comment += "\n"; //current_blank_line = true;
tokenizer::getNextToken();
}
break;
case tokenizer::types::KEY: {
if (tokenizer::indent <= indent)
return;
if (current_indent<0) current_indent = tokenizer::indent;
if (tokenizer::indent != current_indent) { tokenizer::error("Wrong indent"); return; }
yamal& key = (*this)[tokenizer::string];
if (!current_comment.empty()) { key.setComment(current_comment); current_comment = ""; }
//if (current_blank_line) { key.setBlankLine(true); current_blank_line = false; }
key.deserialize(current_indent);
break;
}
case tokenizer::types::STRINGSCALAR:
this->setQuoted(true);
case tokenizer::types::SCALAR:
*(this) = tokenizer::string;
tokenizer::getNextToken();
return;
case tokenizer::types::VECTOR: {
if (tokenizer::indent <= indent) return;
if (current_indent<0) current_indent = tokenizer::indent;
if (tokenizer::indent != current_indent) { tokenizer::error("Wrong indent"); return; }
yamal& vec = (*this).emplace_back();
if (!current_comment.empty()) { vec.setComment(current_comment); current_comment = ""; }
//if (current_blank_line) { vec.setBlankLine(true); current_blank_line = false; }
vec.deserialize(current_indent);
break;
}
case tokenizer::types::INLINEVEC: {
(*this).setInline(true);
(*this).clearToVector();
tokenizer::getNextToken();
if (tokenizer::type == tokenizer::types::INLINEVECOFF) return;
while (tokenizer::type != tokenizer::types::ENDOFFILE) {
if (tokenizer::type != tokenizer::types::SCALAR && tokenizer::type != tokenizer::types::STRINGSCALAR) { tokenizer::error("Scalar expected"); return; }
(*this).emplace_back() = tokenizer::string;
tokenizer::getNextToken();
if (tokenizer::type == tokenizer::types::INLINEVECOFF) return;
if (tokenizer::type != tokenizer::types::COMMA) { tokenizer::error("Comma expected"); return; }
tokenizer::getNextToken();
}
tokenizer::error("Unexpected end of file"); return;
break;
}
case tokenizer::types::INLINEKEY: {
(*this).setInline(true);
(*this).clearToMap();
tokenizer::getNextToken();
if (tokenizer::type == tokenizer::types::INLINEKEYOFF) return;
while (tokenizer::type != tokenizer::types::ENDOFFILE) {
if (tokenizer::type != tokenizer::types::KEY) { tokenizer::error("Key expected"); return; }
std::string key = tokenizer::string;
tokenizer::getNextToken();
if (tokenizer::type != tokenizer::types::SCALAR && tokenizer::type != tokenizer::types::STRINGSCALAR) { tokenizer::error("Scalar expected"); return; }
(*this)[key] = tokenizer::string;
tokenizer::getNextToken();
if (tokenizer::type == tokenizer::types::INLINEKEYOFF) return;
if (tokenizer::type != tokenizer::types::COMMA) { tokenizer::error("Comma expected"); return; }
tokenizer::getNextToken();
}
tokenizer::error("Unexpected end of file"); return;
break;
}
default:
tokenizer::getNextToken();
}
}
}
private:
class tokenizer
{
public:
enum class types { COMMENT, BLANKLINE, KEY, SCALAR, STRINGSCALAR, VECTOR, INLINEVEC, COMMA, INLINEVECOFF,
INLINEKEY, INLINEKEYOFF, ENDOFFILE };
inline static types type;
inline static int indent;
inline static std::string string;
inline static std::string buffer_string;
inline static const char *buffer;
inline static int line;
inline static int col;
inline static bool inlined;
inline static bool error_state;
static void init(std::string file) {
buffer_string = file;
buffer = buffer_string.c_str();
col = line = 0;
inlined = false;
error_state = false;
}
static bool error() { return error_state; }
static void error(std::string message) {
error_state = true;
printf("YAML ERROR: %s at line %i col %i\n", message.c_str(), line+1, col+1);
}
static char next() {
if (*buffer==0) return *buffer;
if (*buffer==13) buffer++;
if (*buffer==10) { col=0; line++; } else { col++; }
return *(buffer++);
}
static void ignoreWhiteSpace() { while (*buffer==32 || *buffer==9) next(); }
static void getEverythingUntilEOL() {
char tmp[256]; int i=0;
next(); ignoreWhiteSpace();
while (*buffer!=10 && *buffer!=13 && *buffer!=0) tmp[i++]=next();
//if (*buffer==13) next(); next();
tmp[i]=0; string = tmp;
}
static void getEverythingUntilQuote() {
char tmp[256]; int i=0; next();
while (*buffer!='"' && *buffer!=0) tmp[i++]=next();
next(); tmp[i]=0; string = tmp;
}
static types getWord() {
char tmp[256]; int i=0;
const char *delimiters = inlined ? " \t\r\n[]{}," : " \t\r\n";
while (!strchr(delimiters,*buffer)) tmp[i++]=next();
if (tmp[i-1]==':') {
//printf("KEY at line %i col %i\n", line, indent);
tmp[i-1]=0; string=tmp; return types::KEY;
}
else {
//printf("SCALAR at line %i col %i\n", line, indent);
tmp[i]=0; string = tmp;; return types::SCALAR;
}
}
static void getNextToken()
{
ignoreWhiteSpace();
indent = col;
switch (*buffer) {
case 0: {
//printf("ENDOFFILE at line %i col %i\n", line, indent);
type = types::ENDOFFILE;
break;
}
case '#': {
//printf("COMMENT at line %i col %i\n", line, indent);
getEverythingUntilEOL();
type = types::COMMENT;
break;
}
case '"': {
//printf("STRING SCALAR at line %i col %i\n", line, indent);
getEverythingUntilQuote();
type = types::STRINGSCALAR;
break;
}
case '\r':
case '\n': {
//printf("BLANKLINE at line %i col %i\n", line, indent);
type = types::BLANKLINE;
if (*buffer==13) next(); next();
break;
}
case '-': {
if (*(buffer+1)==' ') {
//printf("VECTOR at line %i col %i\n", line, indent);
type = types::VECTOR;
next();
} else {
type = getWord();
}
break;
}
case '[': {
//printf("INLINEVEC at line %i col %i\n", line, indent);
type = types::INLINEVEC; inlined = true;
next();
break;
}
case ']': {
//printf("INLINEVECOFF at line %i col %i\n", line, indent);
type = types::INLINEVECOFF; inlined = false;
next();
break;
}
case '{': {
//printf("INLINEKEY at line %i col %i\n", line, indent);
type = types::INLINEKEY; inlined = true;
next();
break;
}
case '}': {
//printf("INLINEKEYOFF at line %i col %i\n", line, indent);
type = types::INLINEKEYOFF; inlined = false;
next();
break;
}
case ',': {
//printf("COMMA at line %i col %i\n", line, indent);
type = types::COMMA;
next();
break;
}
default: {
type = getWord();
break;
}
}
}
};
static std::string formatComment(const std::string& c, int indent) {
std::ostringstream out;
std::istringstream in(c);
std::string line;
while (std::getline(in, line)) {
if (line.empty()) {
// preserve blank line
out << "\n";
} else {
out << std::string(indent, ' ') << "# " << line << "\n";
}
}
return out.str();
}
};