Refactor project to focus on Markdown parser development. Removed unused database and template-related code, added Markdown parsing functionality, and updated build configuration.

This commit is contained in:
2025-12-02 19:44:45 +01:00
parent a8fcabc571
commit 63c24f6355
19 changed files with 661 additions and 253 deletions

View File

@@ -0,0 +1,77 @@
package nl.astraeus.wiki.parser
sealed class MarkdownPart {
data object NewLine : MarkdownPart()
data object PageBreak : MarkdownPart()
sealed class ParagraphPart() {
data class Text(
val text: String
) : ParagraphPart()
data object LineBreak : ParagraphPart()
data class Link(
val url: String,
val label: String? = null,
val title: String? = null,
) : ParagraphPart()
data class Image(
val alt: String,
val src: String,
val url: String? = null,
) : ParagraphPart()
data class Bold(
val text: String
) : ParagraphPart()
data class Italic(
val text: String
) : ParagraphPart()
class BoldItalic(
val text: String
) : ParagraphPart()
class StrikeThrough(
val text: String
) : ParagraphPart()
class InlineCode(
val text: String
) : ParagraphPart()
}
data class Paragraph(
val parts: List<ParagraphPart>
) : MarkdownPart()
data class Header(
val text: String,
val size: Int
) : MarkdownPart()
data class UnorderedList(
val lines: List<String>,
) : MarkdownPart()
data class OrderedList(
val lines: List<String>,
) : MarkdownPart()
data class CodeBlock(
val text: String,
val language: String
) : MarkdownPart()
data class Table(
val headers: List<String>,
val rows: List<List<String>>,
) : MarkdownPart()
class Ruler() : MarkdownPart()
}

View File

@@ -0,0 +1,174 @@
package nl.astraeus.wiki.parser
import nl.astraeus.wiki.parser.MarkdownPart.ParagraphPart.*
private enum class ParType {
TEXT,
LINK_LABEL,
LINK_URL,
LINK_TITLE,
LINK_END,
BOLD,
ITALIC,
BOLD_ITALIC,
STRIKETHROUGH,
INLINE_CODE,
IMAGE_ALT,
IMAGE_SRC,
LINK_IMAGE_ALT,
LINK_IMAGE_SRC,
LINK_IMAGE_LINK,
}
private typealias ParagraphData = MutableMap<ParType, String>
private data class ParState(
val fromType: ParType,
val text: String,
val toType: ParType,
val out: (ParagraphData) -> MarkdownPart.ParagraphPart? = { _ -> null }
)
private val states = listOf(
// Image with link
ParState(ParType.TEXT, "[![", ParType.LINK_IMAGE_ALT) { data ->
Text(data[ParType.TEXT]!!)
},
ParState(ParType.LINK_IMAGE_ALT, "](", ParType.LINK_IMAGE_SRC),
ParState(ParType.LINK_IMAGE_SRC, ")](", ParType.LINK_IMAGE_LINK),
ParState(ParType.LINK_IMAGE_LINK, ")", ParType.TEXT) { data ->
Image(
data[ParType.LINK_IMAGE_ALT]!!,
data[ParType.LINK_IMAGE_SRC]!!,
data[ParType.LINK_IMAGE_LINK],
)
},
// Image without link
ParState(ParType.TEXT, "![", ParType.IMAGE_ALT) { data ->
Text(data[ParType.TEXT]!!)
},
ParState(ParType.IMAGE_ALT, "](", ParType.IMAGE_SRC),
ParState(ParType.IMAGE_SRC, ")", ParType.TEXT) { data ->
Image(
data[ParType.IMAGE_ALT]!!,
data[ParType.IMAGE_SRC]!!,
)
},
// Links
ParState(ParType.TEXT, "[", ParType.LINK_LABEL) { data ->
Text(data[ParType.TEXT]!!)
},
ParState(ParType.LINK_LABEL, "](", ParType.LINK_URL),
ParState(ParType.LINK_LABEL, "]", ParType.LINK_URL) { data ->
Text(data[ParType.LINK_LABEL]!!)
},
ParState(ParType.LINK_URL, ")", ParType.TEXT) { data ->
Link(data[ParType.LINK_URL]!!, data[ParType.LINK_LABEL])
},
ParState(ParType.LINK_URL, "\"", ParType.LINK_TITLE),
ParState(ParType.LINK_TITLE, "\"", ParType.LINK_END),
ParState(ParType.LINK_END, ")", ParType.TEXT) { data ->
Link(
data[ParType.LINK_URL]!!,
data[ParType.LINK_LABEL],
data[ParType.LINK_TITLE],
)
},
ParState(ParType.TEXT, "***", ParType.BOLD_ITALIC) { data ->
Text(data[ParType.TEXT]!!)
},
ParState(ParType.BOLD_ITALIC, "***", ParType.TEXT) { data ->
BoldItalic(data[ParType.BOLD_ITALIC]!!)
},
ParState(ParType.TEXT, "~~", ParType.STRIKETHROUGH) { data ->
Text(data[ParType.TEXT]!!)
},
ParState(ParType.STRIKETHROUGH, "~~", ParType.TEXT) { data ->
StrikeThrough(data[ParType.STRIKETHROUGH]!!)
},
ParState(ParType.TEXT, "**", ParType.BOLD) { data ->
Text(data[ParType.TEXT]!!)
},
ParState(ParType.BOLD, "**", ParType.TEXT) { data ->
Bold(data[ParType.BOLD]!!)
},
ParState(ParType.TEXT, "*", ParType.ITALIC) { data ->
Text(data[ParType.TEXT]!!)
},
ParState(ParType.ITALIC, "*", ParType.TEXT) { data ->
BoldItalic(data[ParType.ITALIC]!!)
},
ParState(ParType.TEXT, "`", ParType.INLINE_CODE) { data ->
Text(data[ParType.TEXT]!!)
},
ParState(ParType.INLINE_CODE, "`", ParType.TEXT) { data ->
InlineCode(data[ParType.INLINE_CODE]!!)
},
)
private fun String.test(index: Int, value: String): Boolean {
return this.length > index + value.length && this.substring(index, index + value.length) == value
}
fun parseParagraph(text: String): MarkdownPart.Paragraph {
val result = mutableListOf<MarkdownPart.ParagraphPart>()
val buffer = StringBuilder()
var type = ParType.TEXT
val data: ParagraphData = mutableMapOf()
var index = 0
var activeStates = states.filter { it.fromType == type }
while (index < text.length) {
var found = false
for (state in activeStates) {
if (state.fromType == type && text.test(index, state.text)) {
data[state.fromType] = buffer.toString()
buffer.clear()
state.out(data)?.let {
if (it !is Text || it.text.isNotBlank()) {
result.add(it)
}
}
type = state.toType
index += state.text.length
found = true
activeStates = states.filter { it.fromType == type }
break
}
}
if (!found) {
val ch = text[index]
if (ch == '\n') {
// Markdown hard line break: two or more spaces at end of line
if (buffer.length >= 2 && buffer.endsWith(" ")) {
val textBefore = buffer.substring(0, buffer.length - 2)
if (textBefore.isNotEmpty()) {
result.add(Text(textBefore))
}
result.add(LineBreak)
buffer.clear()
} else {
// Keep original behavior for soft breaks (collapse later in HTML)
buffer.append(ch)
}
} else {
buffer.append(ch)
}
index++
}
}
if (buffer.isNotEmpty()) {
result.add(Text(buffer.toString()))
}
return MarkdownPart.Paragraph(result)
}

View File

@@ -0,0 +1,166 @@
package nl.astraeus.wiki.parser
enum class MarkdownType {
CODE,
PARAGRAPH,
ORDERED_LIST,
UNORDERED_LIST,
TABLE,
}
fun markdown(text: String): List<MarkdownPart> {
val lines = text.lines()
val parts = mutableListOf<MarkdownPart>()
var language = ""
var type = MarkdownType.PARAGRAPH
var listIndex = 1
var index = 0
val buffer = StringBuilder()
fun parseBuffer() {
if (buffer.isNotBlank()) {
parts.addAll(handleBuffer(type, buffer.toString(), language))
}
buffer.clear()
type = MarkdownType.PARAGRAPH
language = ""
}
while (index < lines.size) {
val rawLine = lines[index]
val line = rawLine.trim()
//println("BUFFER [${buffer.length}] TYPE ${type} \t LINE - ${line}")
when {
type == MarkdownType.ORDERED_LIST -> {
if (!line.startsWith("${listIndex++}.")) {
parseBuffer()
continue
} else {
buffer.append(line.substring(2))
buffer.append("\n")
}
}
type == MarkdownType.UNORDERED_LIST -> {
if (!line.startsWith("- ") &&
!line.startsWith("* ")
) {
parseBuffer()
continue
} else {
buffer.append(line.substring(2))
buffer.append("\n")
}
}
type == MarkdownType.TABLE -> {
if (!line.startsWith("|")) {
parseBuffer()
continue
} else {
buffer.append(line)
buffer.append("\n")
}
}
type == MarkdownType.PARAGRAPH && line.isBlank() -> {
buffer.append("\n")
parseBuffer()
}
line.startsWith("```") -> {
if (type != MarkdownType.CODE) {
parseBuffer()
type = MarkdownType.CODE
language = line.substring(3).trim()
} else {
parseBuffer()
}
}
type == MarkdownType.CODE -> {
buffer.append(rawLine)
buffer.append("\n")
index++
continue
}
line.startsWith("1.") -> {
parseBuffer()
type = MarkdownType.ORDERED_LIST
listIndex = 2
buffer.append(line.substring(2))
buffer.append("\n")
}
line.startsWith("- ") || line.startsWith("* ") -> {
parseBuffer()
type = MarkdownType.UNORDERED_LIST
buffer.append(line.substring(2))
buffer.append("\n")
}
line.startsWith("|") -> {
parseBuffer()
type = MarkdownType.TABLE
buffer.append(line)
buffer.append("\n")
}
line.startsWith("---") -> {
parseBuffer()
parts.add(MarkdownPart.Ruler())
}
line.startsWith("#") -> {
parseBuffer()
val headerLevel = line.takeWhile { it == '#' }.length
val headerText = line.substring(headerLevel).trim()
parts.add(MarkdownPart.Header(headerText, headerLevel))
}
line == "[break]" -> {
parseBuffer()
parts.add(MarkdownPart.PageBreak)
}
else -> {
// Preserve trailing spaces for hard line breaks (two spaces at end of line)
buffer.append(rawLine)
buffer.append("\n")
}
}
index++
}
parseBuffer()
return parts
}
private fun handleBuffer(
type: MarkdownType,
text: String,
language: String = ""
): List<MarkdownPart> = when (type) {
MarkdownType.CODE -> {
listOf(MarkdownPart.CodeBlock(text, language))
}
MarkdownType.PARAGRAPH -> {
listOf(parseParagraph(text))
}
MarkdownType.ORDERED_LIST -> {
listOf(MarkdownPart.OrderedList(text.lines()))
}
MarkdownType.UNORDERED_LIST -> {
listOf(MarkdownPart.UnorderedList(text.lines()))
}
MarkdownType.TABLE -> {
parseTable(text)
}
}

View File

@@ -0,0 +1,48 @@
package nl.astraeus.wiki.parser
fun parseTable(text: String): List<MarkdownPart> {
val lines = text.lines().map { it.trim() }.filter { it.isNotEmpty() }
fun parseCells(line: String): List<String> {
val trimmed = line.trim().trim('|')
return if (trimmed.isEmpty()) emptyList() else trimmed.split("|").map { it.trim() }
}
fun isSeparatorRow(cells: List<String>): Boolean {
if (cells.isEmpty()) return false
return cells.all { cell ->
val dashCount = cell.count { it == '-' }
val cleaned = cell.replace("-", "").replace("|", "").trim()
dashCount >= 3 && cleaned.isEmpty()
}
}
return if (lines.size < 2) {
// Not enough lines to be a table, fallback to code block
listOf(MarkdownPart.CodeBlock(text, "table"))
} else {
val headerCells = parseCells(lines.first())
val sepCells = parseCells(lines[1])
if (headerCells.isEmpty() || !isSeparatorRow(sepCells)) {
// Invalid table format, fallback to code block
listOf(MarkdownPart.CodeBlock(text, "table"))
} else {
val colCount = headerCells.size
val rows = mutableListOf<List<String>>()
for (i in 2 until lines.size) {
val rowCells = parseCells(lines[i]).toMutableList()
// Normalize column count to headers size
if (rowCells.size < colCount) {
while (rowCells.size < colCount) rowCells.add("")
} else if (rowCells.size > colCount) {
// Trim extras
while (rowCells.size > colCount) rowCells.removeAt(rowCells.lastIndex)
}
rows.add(rowCells)
}
listOf(MarkdownPart.Table(headers = headerCells, rows = rows))
}
}
}