diff --git a/scripts/rsw/go.mod b/scripts/rsw/go.mod
index 05f05b0..cbe59cb 100644
--- a/scripts/rsw/go.mod
+++ b/scripts/rsw/go.mod
@@ -1,10 +1,11 @@
module github.com/runescape-wiki/rsw
-go 1.22
+go 1.25.0
require github.com/spf13/cobra v1.8.1
require (
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
+ golang.org/x/net v0.51.0 // indirect
)
diff --git a/scripts/rsw/go.sum b/scripts/rsw/go.sum
index 912390a..5e5a1ee 100644
--- a/scripts/rsw/go.sum
+++ b/scripts/rsw/go.sum
@@ -6,5 +6,7 @@ github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+golang.org/x/net v0.51.0 h1:94R/GTO7mt3/4wIKpcR5gkGmRLOuE/2hNGeWq/GBIFo=
+golang.org/x/net v0.51.0/go.mod h1:aamm+2QF5ogm02fjy5Bb7CQ0WMt1/WVM7FtyaTLlA9Y=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/scripts/rsw/internal/cmd/page.go b/scripts/rsw/internal/cmd/page.go
index 9d41469..8681fb5 100644
--- a/scripts/rsw/internal/cmd/page.go
+++ b/scripts/rsw/internal/cmd/page.go
@@ -4,7 +4,7 @@ import (
"fmt"
"strings"
- "github.com/runescape-wiki/rsw/internal/extract"
+ "github.com/runescape-wiki/rsw/internal/htmlconv"
"github.com/runescape-wiki/rsw/internal/render"
"github.com/runescape-wiki/rsw/internal/wiki"
"github.com/spf13/cobra"
@@ -27,80 +27,54 @@ Examples:
title := args[0]
client := wiki.NewClient(GameBaseURL())
- page, err := client.GetPage(title)
+ if Raw() {
+ page, err := client.GetPage(title)
+ if err != nil {
+ return fmt.Errorf("failed to fetch page: %w", err)
+ }
+ fmt.Println(page.Wikitext)
+ return nil
+ }
+
+ page, err := client.GetPageHTML(title)
if err != nil {
return fmt.Errorf("failed to fetch page: %w", err)
}
- wikitext := page.Wikitext
-
if pageSection != "" {
- idx := wiki.FindSectionIndex(page.Sections, pageSection)
- if idx == -1 {
- needle := strings.ToLower(pageSection)
- // Case-insensitive exact match
- for _, s := range page.Sections {
- if strings.ToLower(s.Line) == needle {
- fmt.Sscanf(s.Index, "%d", &idx)
- break
- }
- }
- // Case-insensitive prefix match (e.g. "Location" → "Locations")
- if idx == -1 {
- for _, s := range page.Sections {
- if strings.HasPrefix(strings.ToLower(s.Line), needle) {
- fmt.Sscanf(s.Index, "%d", &idx)
- break
- }
- }
- }
- // Case-insensitive contains match
- if idx == -1 {
- for _, s := range page.Sections {
- if strings.Contains(strings.ToLower(s.Line), needle) {
- fmt.Sscanf(s.Index, "%d", &idx)
- break
- }
- }
- }
- }
- if idx == -1 {
+ body := htmlconv.ExtractSection(page.HTML, pageSection)
+ if body == "" {
+ sections := htmlconv.ListSections(page.HTML)
return fmt.Errorf("section %q not found. Available sections: %s",
- pageSection, listSections(page.Sections))
+ pageSection, formatSectionNames(sections))
}
- sectionPage, err := client.GetPageSection(title, idx)
- if err != nil {
- return fmt.Errorf("failed to fetch section: %w", err)
- }
- wikitext = sectionPage.Wikitext
- }
-
- if Raw() {
- fmt.Println(wikitext)
+ md := render.New()
+ md.H1(page.Title)
+ md.Line(body)
+ fmt.Print(md.String())
return nil
}
md := render.New()
md.H1(page.Title)
- if pageSection == "" && len(page.Sections) > 0 {
+ sections := htmlconv.ListSections(page.HTML)
+ if len(sections) > 0 {
md.H2("Sections")
- for _, s := range page.Sections {
+ for _, s := range sections {
indent := ""
- if s.Level == "3" {
+ if s.Level == 3 {
indent = " "
- } else if s.Level == "4" {
+ } else if s.Level >= 4 {
indent = " "
}
- md.Line(fmt.Sprintf("%s- %s", indent, s.Line))
+ md.Line(fmt.Sprintf("%s- %s", indent, s.Name))
}
md.Newline()
md.HR()
}
- plain := extract.ExtractPlainText(wikitext)
- md.P(plain)
-
+ md.Line(htmlconv.Convert(page.HTML))
fmt.Print(md.String())
return nil
},
@@ -110,10 +84,10 @@ Examples:
return cmd
}
-func listSections(sections []wiki.Section) string {
+func formatSectionNames(sections []htmlconv.SectionInfo) string {
names := make([]string, len(sections))
for i, s := range sections {
- names[i] = s.Line
+ names[i] = s.Name
}
return strings.Join(names, ", ")
}
diff --git a/scripts/rsw/internal/cmd/skill.go b/scripts/rsw/internal/cmd/skill.go
index 8f1a55d..307b86a 100644
--- a/scripts/rsw/internal/cmd/skill.go
+++ b/scripts/rsw/internal/cmd/skill.go
@@ -4,7 +4,7 @@ import (
"fmt"
"strings"
- "github.com/runescape-wiki/rsw/internal/extract"
+ "github.com/runescape-wiki/rsw/internal/htmlconv"
"github.com/runescape-wiki/rsw/internal/render"
"github.com/runescape-wiki/rsw/internal/wiki"
"github.com/spf13/cobra"
@@ -31,19 +31,27 @@ Examples:
wikiClient := wiki.NewClient(GameBaseURL())
trainingTitle := capitalizeFirst(strings.ToLower(skillName)) + " training"
- page, err := wikiClient.GetPage(trainingTitle)
+
+ if Raw() {
+ page, err := wikiClient.GetPage(trainingTitle)
+ if err != nil {
+ page, err = wikiClient.GetPage(capitalizeFirst(strings.ToLower(skillName)))
+ if err != nil {
+ return fmt.Errorf("failed to fetch skill page: %w", err)
+ }
+ }
+ fmt.Println(page.Wikitext)
+ return nil
+ }
+
+ page, err := wikiClient.GetPageHTML(trainingTitle)
if err != nil {
- page, err = wikiClient.GetPage(capitalizeFirst(strings.ToLower(skillName)))
+ page, err = wikiClient.GetPageHTML(capitalizeFirst(strings.ToLower(skillName)))
if err != nil {
return fmt.Errorf("failed to fetch skill page: %w", err)
}
}
- if Raw() {
- fmt.Println(page.Wikitext)
- return nil
- }
-
md := render.New()
md.H1(fmt.Sprintf("%s Training Guide", page.Title))
@@ -51,11 +59,13 @@ Examples:
md.P("*Showing methods suitable for ironman accounts (no GE access).*")
}
- if len(page.Sections) > 0 {
+ sections := htmlconv.ListSections(page.HTML)
+
+ if len(sections) > 0 {
md.H2("Contents")
- for _, s := range page.Sections {
- if s.Level == "2" {
- md.Bullet(s.Line)
+ for _, s := range sections {
+ if s.Level == 2 {
+ md.Bullet(s.Name)
}
}
md.Newline()
@@ -63,30 +73,23 @@ Examples:
if levelRange != "" {
found := false
- for _, s := range page.Sections {
- if strings.Contains(strings.ToLower(s.Line), strings.ToLower(levelRange)) ||
- sectionMatchesLevelRange(s.Line, levelRange) {
- idx := 0
- fmt.Sscanf(s.Index, "%d", &idx)
- if idx > 0 {
- sectionPage, err := wikiClient.GetPageSection(page.Title, idx)
- if err == nil {
- plain := extract.ExtractPlainText(sectionPage.Wikitext)
- if strings.TrimSpace(plain) != "" {
- md.H2(s.Line)
- md.P(plain)
- found = true
- }
- }
+ for _, s := range sections {
+ if strings.Contains(strings.ToLower(s.Name), strings.ToLower(levelRange)) ||
+ sectionMatchesLevelRange(s.Name, levelRange) {
+ body := htmlconv.ExtractSection(page.HTML, s.Name)
+ if strings.TrimSpace(body) != "" {
+ md.H2(s.Name)
+ md.Line(body)
+ found = true
}
}
}
if !found {
md.P(fmt.Sprintf("*No section found matching level range %q. Showing full guide.*", levelRange))
- renderFullGuide(md, page, wikiClient)
+ renderFullGuideHTML(md, page.HTML, sections)
}
} else {
- renderFullGuide(md, page, wikiClient)
+ renderFullGuideHTML(md, page.HTML, sections)
}
fmt.Print(md.String())
@@ -98,24 +101,15 @@ Examples:
return cmd
}
-func renderFullGuide(md *render.Builder, page *wiki.ParsedPage, client *wiki.Client) {
- for _, s := range page.Sections {
- if s.Level != "2" {
+func renderFullGuideHTML(md *render.Builder, pageHTML string, sections []htmlconv.SectionInfo) {
+ for _, s := range sections {
+ if s.Level != 2 {
continue
}
- idx := 0
- fmt.Sscanf(s.Index, "%d", &idx)
- if idx <= 0 {
- continue
- }
- sectionPage, err := client.GetPageSection(page.Title, idx)
- if err != nil {
- continue
- }
- plain := extract.ExtractPlainText(sectionPage.Wikitext)
- if strings.TrimSpace(plain) != "" {
- md.H2(s.Line)
- md.P(plain)
+ body := htmlconv.ExtractSection(pageHTML, s.Name)
+ if strings.TrimSpace(body) != "" {
+ md.H2(s.Name)
+ md.Line(body)
}
}
}
diff --git a/scripts/rsw/internal/extract/infobox.go b/scripts/rsw/internal/extract/infobox.go
index ce48621..a99cf4e 100644
--- a/scripts/rsw/internal/extract/infobox.go
+++ b/scripts/rsw/internal/extract/infobox.go
@@ -202,6 +202,7 @@ func cleanWikiLinks(s string) string {
// CleanWikitext strips templates, wiki links, and HTML but preserves line structure
// and converts wiki list markers (* items) to readable bullet points.
func CleanWikitext(s string) string {
+ s = StripTransclusionTags(s)
s = expandKnownTemplates(s)
s = removeTemplates(s)
s = cleanWikiLinks(s)
@@ -239,6 +240,7 @@ func CleanWikitext(s string) string {
// ExtractPlainText strips all wikitext markup to produce plain text.
func ExtractPlainText(wikitext string) string {
s := wikitext
+ s = StripTransclusionTags(s)
s = expandKnownTemplates(s)
s = removeTemplates(s)
s = cleanWikiLinks(s)
@@ -371,6 +373,48 @@ func tryExpandTemplate(inner string) (string, bool) {
return "", false
}
+// StripTransclusionTags handles MediaWiki transclusion directives in raw wikitext.
+// When viewing a page directly (not transcluding):
+// - content → keep content (strip tags only)
+// - content → remove entirely (tags + content)
+// - content → keep content (strip tags only)
+func StripTransclusionTags(s string) string {
+ // Remove ... blocks entirely
+ for {
+ lower := strings.ToLower(s)
+ start := strings.Index(lower, "")
+ if start == -1 {
+ break
+ }
+ end := strings.Index(lower[start:], "")
+ if end == -1 {
+ // Unclosed tag — remove to end of string
+ s = s[:start]
+ break
+ }
+ s = s[:start] + s[start+end+len(""):]
+ }
+
+ // Unwrap and (keep content, remove tags)
+ for _, tag := range []string{"noinclude", "onlyinclude"} {
+ s = removeTagKeepContent(s, tag)
+ }
+ return s
+}
+
+func removeTagKeepContent(s, tag string) string {
+ open := "<" + tag + ">"
+ close := "" + tag + ">"
+ s = strings.ReplaceAll(s, open, "")
+ s = strings.ReplaceAll(s, close, "")
+ // Case-insensitive variants
+ upper := "<" + strings.ToUpper(tag) + ">"
+ upperClose := "" + strings.ToUpper(tag) + ">"
+ s = strings.ReplaceAll(s, upper, "")
+ s = strings.ReplaceAll(s, upperClose, "")
+ return s
+}
+
func removeTemplates(s string) string {
var b strings.Builder
depth := 0
diff --git a/scripts/rsw/internal/htmlconv/htmlconv.go b/scripts/rsw/internal/htmlconv/htmlconv.go
new file mode 100644
index 0000000..b435da9
--- /dev/null
+++ b/scripts/rsw/internal/htmlconv/htmlconv.go
@@ -0,0 +1,483 @@
+package htmlconv
+
+import (
+ "fmt"
+ "strings"
+
+ "golang.org/x/net/html"
+ "golang.org/x/net/html/atom"
+)
+
+// Convert takes MediaWiki-rendered HTML and returns clean markdown.
+func Convert(rawHTML string) string {
+ doc, err := html.Parse(strings.NewReader(rawHTML))
+ if err != nil {
+ return rawHTML
+ }
+
+ var w walker
+ w.walk(doc)
+ return w.finish()
+}
+
+// ExtractSection extracts a single section by heading text and converts it to markdown.
+// Matches case-insensitively. Returns empty string if not found.
+func ExtractSection(rawHTML string, sectionName string) string {
+ doc, err := html.Parse(strings.NewReader(rawHTML))
+ if err != nil {
+ return ""
+ }
+
+ needle := strings.ToLower(strings.TrimSpace(sectionName))
+
+ // Find the mw-parser-output container.
+ container := findParserOutput(doc)
+ if container == nil {
+ return ""
+ }
+
+ // Walk top-level children to find the target heading div, then collect
+ // siblings until the next heading of equal or lesser depth.
+ var (
+ collecting bool
+ targetLvl int
+ collected []*html.Node
+ )
+ for c := container.FirstChild; c != nil; c = c.NextSibling {
+ if isHeadingDiv(c) {
+ lvl, text := headingInfo(c)
+ if collecting {
+ if lvl <= targetLvl {
+ break
+ }
+ }
+ if !collecting && strings.ToLower(strings.TrimSpace(text)) == needle {
+ collecting = true
+ targetLvl = lvl
+ continue
+ }
+ }
+ if collecting {
+ collected = append(collected, c)
+ }
+ }
+
+ if len(collected) == 0 {
+ return ""
+ }
+
+ var w walker
+ for _, n := range collected {
+ w.walk(n)
+ }
+ return w.finish()
+}
+
+// ListSections returns section names and levels from the HTML.
+func ListSections(rawHTML string) []SectionInfo {
+ doc, err := html.Parse(strings.NewReader(rawHTML))
+ if err != nil {
+ return nil
+ }
+
+ container := findParserOutput(doc)
+ if container == nil {
+ return nil
+ }
+
+ var sections []SectionInfo
+ for c := container.FirstChild; c != nil; c = c.NextSibling {
+ if isHeadingDiv(c) {
+ lvl, text := headingInfo(c)
+ if text != "" {
+ sections = append(sections, SectionInfo{Name: text, Level: lvl})
+ }
+ }
+ }
+ return sections
+}
+
+// SectionInfo holds a section heading name and level.
+type SectionInfo struct {
+ Name string
+ Level int
+}
+
+// --- DOM helpers ---
+
+func findParserOutput(n *html.Node) *html.Node {
+ if n.Type == html.ElementNode && n.DataAtom == atom.Div && hasClass(n, "mw-parser-output") {
+ return n
+ }
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ if found := findParserOutput(c); found != nil {
+ return found
+ }
+ }
+ return nil
+}
+
+func isHeadingDiv(n *html.Node) bool {
+ return n.Type == html.ElementNode && n.DataAtom == atom.Div && hasClassPrefix(n, "mw-heading")
+}
+
+func headingInfo(div *html.Node) (level int, text string) {
+ for c := div.FirstChild; c != nil; c = c.NextSibling {
+ if c.Type == html.ElementNode {
+ switch c.DataAtom {
+ case atom.H1:
+ return 1, textContent(c)
+ case atom.H2:
+ return 2, textContent(c)
+ case atom.H3:
+ return 3, textContent(c)
+ case atom.H4:
+ return 4, textContent(c)
+ case atom.H5:
+ return 5, textContent(c)
+ case atom.H6:
+ return 6, textContent(c)
+ }
+ }
+ }
+ return 0, ""
+}
+
+func textContent(n *html.Node) string {
+ if n.Type == html.TextNode {
+ return n.Data
+ }
+ // Skip edit section spans.
+ if n.Type == html.ElementNode && hasClass(n, "mw-editsection") {
+ return ""
+ }
+ var sb strings.Builder
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ sb.WriteString(textContent(c))
+ }
+ return sb.String()
+}
+
+func hasClass(n *html.Node, cls string) bool {
+ for _, a := range n.Attr {
+ if a.Key == "class" {
+ for _, c := range strings.Fields(a.Val) {
+ if c == cls {
+ return true
+ }
+ }
+ }
+ }
+ return false
+}
+
+func hasClassPrefix(n *html.Node, prefix string) bool {
+ for _, a := range n.Attr {
+ if a.Key == "class" {
+ for _, c := range strings.Fields(a.Val) {
+ if strings.HasPrefix(c, prefix) {
+ return true
+ }
+ }
+ }
+ }
+ return false
+}
+
+func getAttr(n *html.Node, key string) string {
+ for _, a := range n.Attr {
+ if a.Key == key {
+ return a.Val
+ }
+ }
+ return ""
+}
+
+func isHidden(n *html.Node) bool {
+ style := getAttr(n, "style")
+ return strings.Contains(style, "display:none") || strings.Contains(style, "display: none")
+}
+
+// --- walker: recursive HTML-to-markdown converter ---
+
+type walker struct {
+ sb strings.Builder
+ listCtx []listContext
+}
+
+type listContext struct {
+ ordered bool
+ index int
+}
+
+func (w *walker) finish() string {
+ out := w.sb.String()
+ // Collapse runs of 3+ newlines to 2.
+ for strings.Contains(out, "\n\n\n") {
+ out = strings.ReplaceAll(out, "\n\n\n", "\n\n")
+ }
+ return strings.TrimSpace(out) + "\n"
+}
+
+func (w *walker) walk(n *html.Node) {
+ switch n.Type {
+ case html.TextNode:
+ w.sb.WriteString(n.Data)
+ return
+ case html.ElementNode:
+ // skip
+ case html.DocumentNode:
+ w.walkChildren(n)
+ return
+ default:
+ return
+ }
+
+ if isHidden(n) {
+ return
+ }
+
+ if w.shouldSkip(n) {
+ return
+ }
+
+ switch n.DataAtom {
+ case atom.H1, atom.H2, atom.H3, atom.H4, atom.H5, atom.H6:
+ w.renderHeading(n)
+ case atom.P:
+ w.ensureNewline()
+ w.walkChildren(n)
+ w.sb.WriteString("\n\n")
+ case atom.Br:
+ w.sb.WriteString("\n")
+ case atom.B, atom.Strong:
+ w.sb.WriteString("**")
+ w.walkChildren(n)
+ w.sb.WriteString("**")
+ case atom.I, atom.Em:
+ w.sb.WriteString("*")
+ w.walkChildren(n)
+ w.sb.WriteString("*")
+ case atom.A:
+ w.walkChildren(n)
+ case atom.Ul:
+ w.renderList(n, false)
+ case atom.Ol:
+ w.renderList(n, true)
+ case atom.Li:
+ w.renderListItem(n)
+ case atom.Table:
+ w.renderTable(n)
+ case atom.Img:
+ alt := getAttr(n, "alt")
+ if alt != "" {
+ w.sb.WriteString(alt)
+ }
+ case atom.Div:
+ if isHeadingDiv(n) {
+ lvl, text := headingInfo(n)
+ if text != "" {
+ w.ensureNewline()
+ w.sb.WriteString(strings.Repeat("#", lvl))
+ w.sb.WriteString(" ")
+ w.sb.WriteString(text)
+ w.sb.WriteString("\n\n")
+ }
+ return
+ }
+ w.walkChildren(n)
+ default:
+ w.walkChildren(n)
+ }
+}
+
+func (w *walker) walkChildren(n *html.Node) {
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ w.walk(c)
+ }
+}
+
+func (w *walker) shouldSkip(n *html.Node) bool {
+ if n.Type != html.ElementNode {
+ return false
+ }
+ if getAttr(n, "id") == "toc" || getAttr(n, "id") == "toctemplate" {
+ return true
+ }
+ if hasClass(n, "navigation-not-searchable") {
+ return true
+ }
+ if hasClass(n, "mw-editsection") {
+ return true
+ }
+ // Skip infobox tables — noisy in CLI. RS3 uses "rsw-infobox", OSRS uses "infobox".
+ if n.DataAtom == atom.Table && (hasClass(n, "infobox") || hasClassPrefix(n, "rsw-infobox")) {
+ return true
+ }
+ // Skip navbox (bottom-of-page navigation templates).
+ if n.DataAtom == atom.Table && hasClass(n, "navbox") {
+ return true
+ }
+ // Skip messagebox / disambig boxes.
+ if n.DataAtom == atom.Table && hasClass(n, "messagebox") {
+ return true
+ }
+ // Skip "clear" divs.
+ if n.DataAtom == atom.Div && hasClass(n, "clear-template") {
+ return true
+ }
+ return false
+}
+
+func (w *walker) ensureNewline() {
+ s := w.sb.String()
+ if len(s) > 0 && s[len(s)-1] != '\n' {
+ w.sb.WriteString("\n")
+ }
+}
+
+func (w *walker) renderHeading(n *html.Node) {
+ level := 0
+ switch n.DataAtom {
+ case atom.H1:
+ level = 1
+ case atom.H2:
+ level = 2
+ case atom.H3:
+ level = 3
+ case atom.H4:
+ level = 4
+ case atom.H5:
+ level = 5
+ case atom.H6:
+ level = 6
+ }
+ w.ensureNewline()
+ w.sb.WriteString(strings.Repeat("#", level))
+ w.sb.WriteString(" ")
+ w.sb.WriteString(textContent(n))
+ w.sb.WriteString("\n\n")
+}
+
+func (w *walker) renderList(n *html.Node, ordered bool) {
+ w.listCtx = append(w.listCtx, listContext{ordered: ordered})
+ w.ensureNewline()
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ w.walk(c)
+ }
+ w.listCtx = w.listCtx[:len(w.listCtx)-1]
+ if len(w.listCtx) == 0 {
+ w.sb.WriteString("\n")
+ }
+}
+
+func (w *walker) renderListItem(n *html.Node) {
+ depth := len(w.listCtx)
+ if depth == 0 {
+ depth = 1
+ }
+ indent := strings.Repeat(" ", depth-1)
+
+ ctx := &w.listCtx[len(w.listCtx)-1]
+ if ctx.ordered {
+ ctx.index++
+ w.sb.WriteString(fmt.Sprintf("%s%d. ", indent, ctx.index))
+ } else {
+ w.sb.WriteString(indent + "- ")
+ }
+
+ // Walk children inline — but nested lists should go on their own lines.
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ if c.Type == html.ElementNode && (c.DataAtom == atom.Ul || c.DataAtom == atom.Ol) {
+ w.sb.WriteString("\n")
+ w.walk(c)
+ } else {
+ w.walk(c)
+ }
+ }
+ w.ensureNewline()
+}
+
+func (w *walker) renderTable(n *html.Node) {
+ rows := collectTableRows(n)
+ if len(rows) == 0 {
+ return
+ }
+
+ w.ensureNewline()
+ w.sb.WriteString("\n")
+
+ var headers []string
+ var dataRows [][]string
+
+ for i, row := range rows {
+ var cells []string
+ allHeaders := true
+ for _, cell := range row {
+ text := strings.TrimSpace(textContent(cell))
+ text = strings.ReplaceAll(text, "|", "/")
+ text = strings.ReplaceAll(text, "\n", " ")
+ cells = append(cells, text)
+ if cell.DataAtom != atom.Th {
+ allHeaders = false
+ }
+ }
+ if i == 0 && allHeaders {
+ headers = cells
+ } else {
+ dataRows = append(dataRows, cells)
+ }
+ }
+
+ if headers == nil && len(dataRows) > 0 {
+ headers = dataRows[0]
+ dataRows = dataRows[1:]
+ }
+
+ if len(headers) == 0 {
+ return
+ }
+
+ // Header row.
+ w.sb.WriteString("| " + strings.Join(headers, " | ") + " |\n")
+ seps := make([]string, len(headers))
+ for i := range seps {
+ seps[i] = "---"
+ }
+ w.sb.WriteString("| " + strings.Join(seps, " | ") + " |\n")
+
+ for _, row := range dataRows {
+ for len(row) < len(headers) {
+ row = append(row, "")
+ }
+ w.sb.WriteString("| " + strings.Join(row[:len(headers)], " | ") + " |\n")
+ }
+ w.sb.WriteString("\n")
+}
+
+func collectTableRows(table *html.Node) [][]*html.Node {
+ var rows [][]*html.Node
+ var visit func(*html.Node)
+ visit = func(n *html.Node) {
+ if n.Type == html.ElementNode && n.DataAtom == atom.Tr {
+ if isHidden(n) {
+ return
+ }
+ var cells []*html.Node
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ if c.Type == html.ElementNode && (c.DataAtom == atom.Th || c.DataAtom == atom.Td) {
+ cells = append(cells, c)
+ }
+ }
+ if len(cells) > 0 {
+ rows = append(rows, cells)
+ }
+ return
+ }
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ visit(c)
+ }
+ }
+ visit(table)
+ return rows
+}
diff --git a/scripts/rsw/internal/wiki/parse.go b/scripts/rsw/internal/wiki/parse.go
index 2d9d683..9e1fab3 100644
--- a/scripts/rsw/internal/wiki/parse.go
+++ b/scripts/rsw/internal/wiki/parse.go
@@ -97,9 +97,10 @@ func (c *Client) GetPageSection(title string, sectionIndex int) (*ParsedPage, er
// GetPageHTML fetches the rendered HTML for a page.
func (c *Client) GetPageHTML(title string) (*ParsedPage, error) {
params := url.Values{
- "action": {"parse"},
- "page": {title},
- "prop": {"text|sections"},
+ "action": {"parse"},
+ "page": {title},
+ "prop": {"text|sections"},
+ "redirects": {"1"},
}
var resp parseResponse
@@ -133,3 +134,4 @@ func FindSectionIndex(sections []Section, name string) int {
}
return -1
}
+