Update parser

2026-03-05 12:51:31 -06:00
parent b20d68c9f0
commit c283444d68
7 changed files with 603 additions and 103 deletions
--- a/scripts/rsw/go.mod
+++ b/scripts/rsw/go.mod
@@ -1,10 +1,11 @@
 module github.com/runescape-wiki/rsw

-go 1.22
+go 1.25.0

 require github.com/spf13/cobra v1.8.1

 require (
 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
 	github.com/spf13/pflag v1.0.5 // indirect
+	golang.org/x/net v0.51.0 // indirect
 )
--- a/scripts/rsw/go.sum
+++ b/scripts/rsw/go.sum
@@ -6,5 +6,7 @@ github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
 github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
 github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
 github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+golang.org/x/net v0.51.0 h1:94R/GTO7mt3/4wIKpcR5gkGmRLOuE/2hNGeWq/GBIFo=
+golang.org/x/net v0.51.0/go.mod h1:aamm+2QF5ogm02fjy5Bb7CQ0WMt1/WVM7FtyaTLlA9Y=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/scripts/rsw/internal/cmd/page.go
+++ b/scripts/rsw/internal/cmd/page.go
@@ -4,7 +4,7 @@ import (
 	"fmt"
 	"strings"

-	"github.com/runescape-wiki/rsw/internal/extract"
+	"github.com/runescape-wiki/rsw/internal/htmlconv"
 	"github.com/runescape-wiki/rsw/internal/render"
 	"github.com/runescape-wiki/rsw/internal/wiki"
 	"github.com/spf13/cobra"
@@ -27,80 +27,54 @@ Examples:
 			title := args[0]
 			client := wiki.NewClient(GameBaseURL())

-			page, err := client.GetPage(title)
+			if Raw() {
+				page, err := client.GetPage(title)
+				if err != nil {
+					return fmt.Errorf("failed to fetch page: %w", err)
+				}
+				fmt.Println(page.Wikitext)
+				return nil
+			}
+
+			page, err := client.GetPageHTML(title)
 			if err != nil {
 				return fmt.Errorf("failed to fetch page: %w", err)
 			}

-			wikitext := page.Wikitext
-
 			if pageSection != "" {
-				idx := wiki.FindSectionIndex(page.Sections, pageSection)
-				if idx == -1 {
-					needle := strings.ToLower(pageSection)
-					// Case-insensitive exact match
-					for _, s := range page.Sections {
-						if strings.ToLower(s.Line) == needle {
-							fmt.Sscanf(s.Index, "%d", &idx)
-							break
-						}
-					}
-					// Case-insensitive prefix match (e.g. "Location" → "Locations")
-					if idx == -1 {
-						for _, s := range page.Sections {
-							if strings.HasPrefix(strings.ToLower(s.Line), needle) {
-								fmt.Sscanf(s.Index, "%d", &idx)
-								break
-							}
-						}
-					}
-					// Case-insensitive contains match
-					if idx == -1 {
-						for _, s := range page.Sections {
-							if strings.Contains(strings.ToLower(s.Line), needle) {
-								fmt.Sscanf(s.Index, "%d", &idx)
-								break
-							}
-						}
-					}
-				}
-				if idx == -1 {
+				body := htmlconv.ExtractSection(page.HTML, pageSection)
+				if body == "" {
+					sections := htmlconv.ListSections(page.HTML)
 					return fmt.Errorf("section %q not found. Available sections: %s",
-						pageSection, listSections(page.Sections))
+						pageSection, formatSectionNames(sections))
 				}
-				sectionPage, err := client.GetPageSection(title, idx)
-				if err != nil {
-					return fmt.Errorf("failed to fetch section: %w", err)
-				}
-				wikitext = sectionPage.Wikitext
-			}
-
-			if Raw() {
-				fmt.Println(wikitext)
+				md := render.New()
+				md.H1(page.Title)
+				md.Line(body)
+				fmt.Print(md.String())
 				return nil
 			}

 			md := render.New()
 			md.H1(page.Title)

-			if pageSection == "" && len(page.Sections) > 0 {
+			sections := htmlconv.ListSections(page.HTML)
+			if len(sections) > 0 {
 				md.H2("Sections")
-				for _, s := range page.Sections {
+				for _, s := range sections {
 					indent := ""
-					if s.Level == "3" {
+					if s.Level == 3 {
 						indent = "  "
-					} else if s.Level == "4" {
+					} else if s.Level >= 4 {
 						indent = "    "
 					}
-					md.Line(fmt.Sprintf("%s- %s", indent, s.Line))
+					md.Line(fmt.Sprintf("%s- %s", indent, s.Name))
 				}
 				md.Newline()
 				md.HR()
 			}

-			plain := extract.ExtractPlainText(wikitext)
-			md.P(plain)
-
+			md.Line(htmlconv.Convert(page.HTML))
 			fmt.Print(md.String())
 			return nil
 		},
@@ -110,10 +84,10 @@ Examples:
 	return cmd
 }

-func listSections(sections []wiki.Section) string {
+func formatSectionNames(sections []htmlconv.SectionInfo) string {
 	names := make([]string, len(sections))
 	for i, s := range sections {
-		names[i] = s.Line
+		names[i] = s.Name
 	}
 	return strings.Join(names, ", ")
 }
--- a/scripts/rsw/internal/cmd/skill.go
+++ b/scripts/rsw/internal/cmd/skill.go
@@ -4,7 +4,7 @@ import (
 	"fmt"
 	"strings"

-	"github.com/runescape-wiki/rsw/internal/extract"
+	"github.com/runescape-wiki/rsw/internal/htmlconv"
 	"github.com/runescape-wiki/rsw/internal/render"
 	"github.com/runescape-wiki/rsw/internal/wiki"
 	"github.com/spf13/cobra"
@@ -31,19 +31,27 @@ Examples:
 			wikiClient := wiki.NewClient(GameBaseURL())

 			trainingTitle := capitalizeFirst(strings.ToLower(skillName)) + " training"
-			page, err := wikiClient.GetPage(trainingTitle)
+
+			if Raw() {
+				page, err := wikiClient.GetPage(trainingTitle)
+				if err != nil {
+					page, err = wikiClient.GetPage(capitalizeFirst(strings.ToLower(skillName)))
+					if err != nil {
+						return fmt.Errorf("failed to fetch skill page: %w", err)
+					}
+				}
+				fmt.Println(page.Wikitext)
+				return nil
+			}
+
+			page, err := wikiClient.GetPageHTML(trainingTitle)
 			if err != nil {
-				page, err = wikiClient.GetPage(capitalizeFirst(strings.ToLower(skillName)))
+				page, err = wikiClient.GetPageHTML(capitalizeFirst(strings.ToLower(skillName)))
 				if err != nil {
 					return fmt.Errorf("failed to fetch skill page: %w", err)
 				}
 			}

-			if Raw() {
-				fmt.Println(page.Wikitext)
-				return nil
-			}
-
 			md := render.New()
 			md.H1(fmt.Sprintf("%s Training Guide", page.Title))

@@ -51,11 +59,13 @@ Examples:
 				md.P("*Showing methods suitable for ironman accounts (no GE access).*")
 			}

-			if len(page.Sections) > 0 {
+			sections := htmlconv.ListSections(page.HTML)
+
+			if len(sections) > 0 {
 				md.H2("Contents")
-				for _, s := range page.Sections {
-					if s.Level == "2" {
-						md.Bullet(s.Line)
+				for _, s := range sections {
+					if s.Level == 2 {
+						md.Bullet(s.Name)
 					}
 				}
 				md.Newline()
@@ -63,30 +73,23 @@ Examples:

 			if levelRange != "" {
 				found := false
-				for _, s := range page.Sections {
-					if strings.Contains(strings.ToLower(s.Line), strings.ToLower(levelRange)) ||
-						sectionMatchesLevelRange(s.Line, levelRange) {
-						idx := 0
-						fmt.Sscanf(s.Index, "%d", &idx)
-						if idx > 0 {
-							sectionPage, err := wikiClient.GetPageSection(page.Title, idx)
-							if err == nil {
-								plain := extract.ExtractPlainText(sectionPage.Wikitext)
-								if strings.TrimSpace(plain) != "" {
-									md.H2(s.Line)
-									md.P(plain)
-									found = true
-								}
-							}
+				for _, s := range sections {
+					if strings.Contains(strings.ToLower(s.Name), strings.ToLower(levelRange)) ||
+						sectionMatchesLevelRange(s.Name, levelRange) {
+						body := htmlconv.ExtractSection(page.HTML, s.Name)
+						if strings.TrimSpace(body) != "" {
+							md.H2(s.Name)
+							md.Line(body)
+							found = true
 						}
 					}
 				}
 				if !found {
 					md.P(fmt.Sprintf("*No section found matching level range %q. Showing full guide.*", levelRange))
-					renderFullGuide(md, page, wikiClient)
+					renderFullGuideHTML(md, page.HTML, sections)
 				}
 			} else {
-				renderFullGuide(md, page, wikiClient)
+				renderFullGuideHTML(md, page.HTML, sections)
 			}

 			fmt.Print(md.String())
@@ -98,24 +101,15 @@ Examples:
 	return cmd
 }

-func renderFullGuide(md *render.Builder, page *wiki.ParsedPage, client *wiki.Client) {
-	for _, s := range page.Sections {
-		if s.Level != "2" {
+func renderFullGuideHTML(md *render.Builder, pageHTML string, sections []htmlconv.SectionInfo) {
+	for _, s := range sections {
+		if s.Level != 2 {
 			continue
 		}
-		idx := 0
-		fmt.Sscanf(s.Index, "%d", &idx)
-		if idx <= 0 {
-			continue
-		}
-		sectionPage, err := client.GetPageSection(page.Title, idx)
-		if err != nil {
-			continue
-		}
-		plain := extract.ExtractPlainText(sectionPage.Wikitext)
-		if strings.TrimSpace(plain) != "" {
-			md.H2(s.Line)
-			md.P(plain)
+		body := htmlconv.ExtractSection(pageHTML, s.Name)
+		if strings.TrimSpace(body) != "" {
+			md.H2(s.Name)
+			md.Line(body)
 		}
 	}
 }
--- a/scripts/rsw/internal/extract/infobox.go
+++ b/scripts/rsw/internal/extract/infobox.go
@@ -202,6 +202,7 @@ func cleanWikiLinks(s string) string {
 // CleanWikitext strips templates, wiki links, and HTML but preserves line structure
 // and converts wiki list markers (* items) to readable bullet points.
 func CleanWikitext(s string) string {
+	s = StripTransclusionTags(s)
 	s = expandKnownTemplates(s)
 	s = removeTemplates(s)
 	s = cleanWikiLinks(s)
@@ -239,6 +240,7 @@ func CleanWikitext(s string) string {
 // ExtractPlainText strips all wikitext markup to produce plain text.
 func ExtractPlainText(wikitext string) string {
 	s := wikitext
+	s = StripTransclusionTags(s)
 	s = expandKnownTemplates(s)
 	s = removeTemplates(s)
 	s = cleanWikiLinks(s)
@@ -371,6 +373,48 @@ func tryExpandTemplate(inner string) (string, bool) {
 	return "", false
 }

+// StripTransclusionTags handles MediaWiki transclusion directives in raw wikitext.
+// When viewing a page directly (not transcluding):
+//   - <noinclude>content</noinclude> → keep content (strip tags only)
+//   - <includeonly>content</includeonly> → remove entirely (tags + content)
+//   - <onlyinclude>content</onlyinclude> → keep content (strip tags only)
+func StripTransclusionTags(s string) string {
+	// Remove <includeonly>...</includeonly> blocks entirely
+	for {
+		lower := strings.ToLower(s)
+		start := strings.Index(lower, "<includeonly>")
+		if start == -1 {
+			break
+		}
+		end := strings.Index(lower[start:], "</includeonly>")
+		if end == -1 {
+			// Unclosed tag — remove to end of string
+			s = s[:start]
+			break
+		}
+		s = s[:start] + s[start+end+len("</includeonly>"):]
+	}
+
+	// Unwrap <noinclude> and <onlyinclude> (keep content, remove tags)
+	for _, tag := range []string{"noinclude", "onlyinclude"} {
+		s = removeTagKeepContent(s, tag)
+	}
+	return s
+}
+
+func removeTagKeepContent(s, tag string) string {
+	open := "<" + tag + ">"
+	close := "</" + tag + ">"
+	s = strings.ReplaceAll(s, open, "")
+	s = strings.ReplaceAll(s, close, "")
+	// Case-insensitive variants
+	upper := "<" + strings.ToUpper(tag) + ">"
+	upperClose := "</" + strings.ToUpper(tag) + ">"
+	s = strings.ReplaceAll(s, upper, "")
+	s = strings.ReplaceAll(s, upperClose, "")
+	return s
+}
+
 func removeTemplates(s string) string {
 	var b strings.Builder
 	depth := 0
--- a/scripts/rsw/internal/htmlconv/htmlconv.go
+++ b/scripts/rsw/internal/htmlconv/htmlconv.go
@@ -0,0 +1,483 @@
+package htmlconv
+
+import (
+	"fmt"
+	"strings"
+
+	"golang.org/x/net/html"
+	"golang.org/x/net/html/atom"
+)
+
+// Convert takes MediaWiki-rendered HTML and returns clean markdown.
+func Convert(rawHTML string) string {
+	doc, err := html.Parse(strings.NewReader(rawHTML))
+	if err != nil {
+		return rawHTML
+	}
+
+	var w walker
+	w.walk(doc)
+	return w.finish()
+}
+
+// ExtractSection extracts a single section by heading text and converts it to markdown.
+// Matches case-insensitively. Returns empty string if not found.
+func ExtractSection(rawHTML string, sectionName string) string {
+	doc, err := html.Parse(strings.NewReader(rawHTML))
+	if err != nil {
+		return ""
+	}
+
+	needle := strings.ToLower(strings.TrimSpace(sectionName))
+
+	// Find the mw-parser-output container.
+	container := findParserOutput(doc)
+	if container == nil {
+		return ""
+	}
+
+	// Walk top-level children to find the target heading div, then collect
+	// siblings until the next heading of equal or lesser depth.
+	var (
+		collecting bool
+		targetLvl  int
+		collected  []*html.Node
+	)
+	for c := container.FirstChild; c != nil; c = c.NextSibling {
+		if isHeadingDiv(c) {
+			lvl, text := headingInfo(c)
+			if collecting {
+				if lvl <= targetLvl {
+					break
+				}
+			}
+			if !collecting && strings.ToLower(strings.TrimSpace(text)) == needle {
+				collecting = true
+				targetLvl = lvl
+				continue
+			}
+		}
+		if collecting {
+			collected = append(collected, c)
+		}
+	}
+
+	if len(collected) == 0 {
+		return ""
+	}
+
+	var w walker
+	for _, n := range collected {
+		w.walk(n)
+	}
+	return w.finish()
+}
+
+// ListSections returns section names and levels from the HTML.
+func ListSections(rawHTML string) []SectionInfo {
+	doc, err := html.Parse(strings.NewReader(rawHTML))
+	if err != nil {
+		return nil
+	}
+
+	container := findParserOutput(doc)
+	if container == nil {
+		return nil
+	}
+
+	var sections []SectionInfo
+	for c := container.FirstChild; c != nil; c = c.NextSibling {
+		if isHeadingDiv(c) {
+			lvl, text := headingInfo(c)
+			if text != "" {
+				sections = append(sections, SectionInfo{Name: text, Level: lvl})
+			}
+		}
+	}
+	return sections
+}
+
+// SectionInfo holds a section heading name and level.
+type SectionInfo struct {
+	Name  string
+	Level int
+}
+
+// --- DOM helpers ---
+
+func findParserOutput(n *html.Node) *html.Node {
+	if n.Type == html.ElementNode && n.DataAtom == atom.Div && hasClass(n, "mw-parser-output") {
+		return n
+	}
+	for c := n.FirstChild; c != nil; c = c.NextSibling {
+		if found := findParserOutput(c); found != nil {
+			return found
+		}
+	}
+	return nil
+}
+
+func isHeadingDiv(n *html.Node) bool {
+	return n.Type == html.ElementNode && n.DataAtom == atom.Div && hasClassPrefix(n, "mw-heading")
+}
+
+func headingInfo(div *html.Node) (level int, text string) {
+	for c := div.FirstChild; c != nil; c = c.NextSibling {
+		if c.Type == html.ElementNode {
+			switch c.DataAtom {
+			case atom.H1:
+				return 1, textContent(c)
+			case atom.H2:
+				return 2, textContent(c)
+			case atom.H3:
+				return 3, textContent(c)
+			case atom.H4:
+				return 4, textContent(c)
+			case atom.H5:
+				return 5, textContent(c)
+			case atom.H6:
+				return 6, textContent(c)
+			}
+		}
+	}
+	return 0, ""
+}
+
+func textContent(n *html.Node) string {
+	if n.Type == html.TextNode {
+		return n.Data
+	}
+	// Skip edit section spans.
+	if n.Type == html.ElementNode && hasClass(n, "mw-editsection") {
+		return ""
+	}
+	var sb strings.Builder
+	for c := n.FirstChild; c != nil; c = c.NextSibling {
+		sb.WriteString(textContent(c))
+	}
+	return sb.String()
+}
+
+func hasClass(n *html.Node, cls string) bool {
+	for _, a := range n.Attr {
+		if a.Key == "class" {
+			for _, c := range strings.Fields(a.Val) {
+				if c == cls {
+					return true
+				}
+			}
+		}
+	}
+	return false
+}
+
+func hasClassPrefix(n *html.Node, prefix string) bool {
+	for _, a := range n.Attr {
+		if a.Key == "class" {
+			for _, c := range strings.Fields(a.Val) {
+				if strings.HasPrefix(c, prefix) {
+					return true
+				}
+			}
+		}
+	}
+	return false
+}
+
+func getAttr(n *html.Node, key string) string {
+	for _, a := range n.Attr {
+		if a.Key == key {
+			return a.Val
+		}
+	}
+	return ""
+}
+
+func isHidden(n *html.Node) bool {
+	style := getAttr(n, "style")
+	return strings.Contains(style, "display:none") || strings.Contains(style, "display: none")
+}
+
+// --- walker: recursive HTML-to-markdown converter ---
+
+type walker struct {
+	sb       strings.Builder
+	listCtx  []listContext
+}
+
+type listContext struct {
+	ordered bool
+	index   int
+}
+
+func (w *walker) finish() string {
+	out := w.sb.String()
+	// Collapse runs of 3+ newlines to 2.
+	for strings.Contains(out, "\n\n\n") {
+		out = strings.ReplaceAll(out, "\n\n\n", "\n\n")
+	}
+	return strings.TrimSpace(out) + "\n"
+}
+
+func (w *walker) walk(n *html.Node) {
+	switch n.Type {
+	case html.TextNode:
+		w.sb.WriteString(n.Data)
+		return
+	case html.ElementNode:
+		// skip
+	case html.DocumentNode:
+		w.walkChildren(n)
+		return
+	default:
+		return
+	}
+
+	if isHidden(n) {
+		return
+	}
+
+	if w.shouldSkip(n) {
+		return
+	}
+
+	switch n.DataAtom {
+	case atom.H1, atom.H2, atom.H3, atom.H4, atom.H5, atom.H6:
+		w.renderHeading(n)
+	case atom.P:
+		w.ensureNewline()
+		w.walkChildren(n)
+		w.sb.WriteString("\n\n")
+	case atom.Br:
+		w.sb.WriteString("\n")
+	case atom.B, atom.Strong:
+		w.sb.WriteString("**")
+		w.walkChildren(n)
+		w.sb.WriteString("**")
+	case atom.I, atom.Em:
+		w.sb.WriteString("*")
+		w.walkChildren(n)
+		w.sb.WriteString("*")
+	case atom.A:
+		w.walkChildren(n)
+	case atom.Ul:
+		w.renderList(n, false)
+	case atom.Ol:
+		w.renderList(n, true)
+	case atom.Li:
+		w.renderListItem(n)
+	case atom.Table:
+		w.renderTable(n)
+	case atom.Img:
+		alt := getAttr(n, "alt")
+		if alt != "" {
+			w.sb.WriteString(alt)
+		}
+	case atom.Div:
+		if isHeadingDiv(n) {
+			lvl, text := headingInfo(n)
+			if text != "" {
+				w.ensureNewline()
+				w.sb.WriteString(strings.Repeat("#", lvl))
+				w.sb.WriteString(" ")
+				w.sb.WriteString(text)
+				w.sb.WriteString("\n\n")
+			}
+			return
+		}
+		w.walkChildren(n)
+	default:
+		w.walkChildren(n)
+	}
+}
+
+func (w *walker) walkChildren(n *html.Node) {
+	for c := n.FirstChild; c != nil; c = c.NextSibling {
+		w.walk(c)
+	}
+}
+
+func (w *walker) shouldSkip(n *html.Node) bool {
+	if n.Type != html.ElementNode {
+		return false
+	}
+	if getAttr(n, "id") == "toc" || getAttr(n, "id") == "toctemplate" {
+		return true
+	}
+	if hasClass(n, "navigation-not-searchable") {
+		return true
+	}
+	if hasClass(n, "mw-editsection") {
+		return true
+	}
+	// Skip infobox tables — noisy in CLI. RS3 uses "rsw-infobox", OSRS uses "infobox".
+	if n.DataAtom == atom.Table && (hasClass(n, "infobox") || hasClassPrefix(n, "rsw-infobox")) {
+		return true
+	}
+	// Skip navbox (bottom-of-page navigation templates).
+	if n.DataAtom == atom.Table && hasClass(n, "navbox") {
+		return true
+	}
+	// Skip messagebox / disambig boxes.
+	if n.DataAtom == atom.Table && hasClass(n, "messagebox") {
+		return true
+	}
+	// Skip "clear" divs.
+	if n.DataAtom == atom.Div && hasClass(n, "clear-template") {
+		return true
+	}
+	return false
+}
+
+func (w *walker) ensureNewline() {
+	s := w.sb.String()
+	if len(s) > 0 && s[len(s)-1] != '\n' {
+		w.sb.WriteString("\n")
+	}
+}
+
+func (w *walker) renderHeading(n *html.Node) {
+	level := 0
+	switch n.DataAtom {
+	case atom.H1:
+		level = 1
+	case atom.H2:
+		level = 2
+	case atom.H3:
+		level = 3
+	case atom.H4:
+		level = 4
+	case atom.H5:
+		level = 5
+	case atom.H6:
+		level = 6
+	}
+	w.ensureNewline()
+	w.sb.WriteString(strings.Repeat("#", level))
+	w.sb.WriteString(" ")
+	w.sb.WriteString(textContent(n))
+	w.sb.WriteString("\n\n")
+}
+
+func (w *walker) renderList(n *html.Node, ordered bool) {
+	w.listCtx = append(w.listCtx, listContext{ordered: ordered})
+	w.ensureNewline()
+	for c := n.FirstChild; c != nil; c = c.NextSibling {
+		w.walk(c)
+	}
+	w.listCtx = w.listCtx[:len(w.listCtx)-1]
+	if len(w.listCtx) == 0 {
+		w.sb.WriteString("\n")
+	}
+}
+
+func (w *walker) renderListItem(n *html.Node) {
+	depth := len(w.listCtx)
+	if depth == 0 {
+		depth = 1
+	}
+	indent := strings.Repeat("  ", depth-1)
+
+	ctx := &w.listCtx[len(w.listCtx)-1]
+	if ctx.ordered {
+		ctx.index++
+		w.sb.WriteString(fmt.Sprintf("%s%d. ", indent, ctx.index))
+	} else {
+		w.sb.WriteString(indent + "- ")
+	}
+
+	// Walk children inline — but nested lists should go on their own lines.
+	for c := n.FirstChild; c != nil; c = c.NextSibling {
+		if c.Type == html.ElementNode && (c.DataAtom == atom.Ul || c.DataAtom == atom.Ol) {
+			w.sb.WriteString("\n")
+			w.walk(c)
+		} else {
+			w.walk(c)
+		}
+	}
+	w.ensureNewline()
+}
+
+func (w *walker) renderTable(n *html.Node) {
+	rows := collectTableRows(n)
+	if len(rows) == 0 {
+		return
+	}
+
+	w.ensureNewline()
+	w.sb.WriteString("\n")
+
+	var headers []string
+	var dataRows [][]string
+
+	for i, row := range rows {
+		var cells []string
+		allHeaders := true
+		for _, cell := range row {
+			text := strings.TrimSpace(textContent(cell))
+			text = strings.ReplaceAll(text, "|", "/")
+			text = strings.ReplaceAll(text, "\n", " ")
+			cells = append(cells, text)
+			if cell.DataAtom != atom.Th {
+				allHeaders = false
+			}
+		}
+		if i == 0 && allHeaders {
+			headers = cells
+		} else {
+			dataRows = append(dataRows, cells)
+		}
+	}
+
+	if headers == nil && len(dataRows) > 0 {
+		headers = dataRows[0]
+		dataRows = dataRows[1:]
+	}
+
+	if len(headers) == 0 {
+		return
+	}
+
+	// Header row.
+	w.sb.WriteString("| " + strings.Join(headers, " | ") + " |\n")
+	seps := make([]string, len(headers))
+	for i := range seps {
+		seps[i] = "---"
+	}
+	w.sb.WriteString("| " + strings.Join(seps, " | ") + " |\n")
+
+	for _, row := range dataRows {
+		for len(row) < len(headers) {
+			row = append(row, "")
+		}
+		w.sb.WriteString("| " + strings.Join(row[:len(headers)], " | ") + " |\n")
+	}
+	w.sb.WriteString("\n")
+}
+
+func collectTableRows(table *html.Node) [][]*html.Node {
+	var rows [][]*html.Node
+	var visit func(*html.Node)
+	visit = func(n *html.Node) {
+		if n.Type == html.ElementNode && n.DataAtom == atom.Tr {
+			if isHidden(n) {
+				return
+			}
+			var cells []*html.Node
+			for c := n.FirstChild; c != nil; c = c.NextSibling {
+				if c.Type == html.ElementNode && (c.DataAtom == atom.Th || c.DataAtom == atom.Td) {
+					cells = append(cells, c)
+				}
+			}
+			if len(cells) > 0 {
+				rows = append(rows, cells)
+			}
+			return
+		}
+		for c := n.FirstChild; c != nil; c = c.NextSibling {
+			visit(c)
+		}
+	}
+	visit(table)
+	return rows
+}
--- a/scripts/rsw/internal/wiki/parse.go
+++ b/scripts/rsw/internal/wiki/parse.go
@@ -97,9 +97,10 @@ func (c *Client) GetPageSection(title string, sectionIndex int) (*ParsedPage, er
 // GetPageHTML fetches the rendered HTML for a page.
 func (c *Client) GetPageHTML(title string) (*ParsedPage, error) {
 	params := url.Values{
-		"action": {"parse"},
-		"page":   {title},
-		"prop":   {"text|sections"},
+		"action":    {"parse"},
+		"page":      {title},
+		"prop":      {"text|sections"},
+		"redirects": {"1"},
 	}

 	var resp parseResponse
@@ -133,3 +134,4 @@ func FindSectionIndex(sections []Section, name string) int {
 	}
 	return -1
 }
+