From c283444d68ebb1607c7b9f2367d17d73f6bad053 Mon Sep 17 00:00:00 2001 From: Sam Myers Date: Thu, 5 Mar 2026 12:51:31 -0600 Subject: [PATCH] Update parser --- scripts/rsw/go.mod | 3 +- scripts/rsw/go.sum | 2 + scripts/rsw/internal/cmd/page.go | 82 ++-- scripts/rsw/internal/cmd/skill.go | 84 ++-- scripts/rsw/internal/extract/infobox.go | 44 ++ scripts/rsw/internal/htmlconv/htmlconv.go | 483 ++++++++++++++++++++++ scripts/rsw/internal/wiki/parse.go | 8 +- 7 files changed, 603 insertions(+), 103 deletions(-) create mode 100644 scripts/rsw/internal/htmlconv/htmlconv.go diff --git a/scripts/rsw/go.mod b/scripts/rsw/go.mod index 05f05b0..cbe59cb 100644 --- a/scripts/rsw/go.mod +++ b/scripts/rsw/go.mod @@ -1,10 +1,11 @@ module github.com/runescape-wiki/rsw -go 1.22 +go 1.25.0 require github.com/spf13/cobra v1.8.1 require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/spf13/pflag v1.0.5 // indirect + golang.org/x/net v0.51.0 // indirect ) diff --git a/scripts/rsw/go.sum b/scripts/rsw/go.sum index 912390a..5e5a1ee 100644 --- a/scripts/rsw/go.sum +++ b/scripts/rsw/go.sum @@ -6,5 +6,7 @@ github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +golang.org/x/net v0.51.0 h1:94R/GTO7mt3/4wIKpcR5gkGmRLOuE/2hNGeWq/GBIFo= +golang.org/x/net v0.51.0/go.mod h1:aamm+2QF5ogm02fjy5Bb7CQ0WMt1/WVM7FtyaTLlA9Y= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/scripts/rsw/internal/cmd/page.go b/scripts/rsw/internal/cmd/page.go index 9d41469..8681fb5 100644 --- a/scripts/rsw/internal/cmd/page.go +++ b/scripts/rsw/internal/cmd/page.go @@ -4,7 +4,7 @@ import ( "fmt" "strings" - "github.com/runescape-wiki/rsw/internal/extract" + "github.com/runescape-wiki/rsw/internal/htmlconv" "github.com/runescape-wiki/rsw/internal/render" "github.com/runescape-wiki/rsw/internal/wiki" "github.com/spf13/cobra" @@ -27,80 +27,54 @@ Examples: title := args[0] client := wiki.NewClient(GameBaseURL()) - page, err := client.GetPage(title) + if Raw() { + page, err := client.GetPage(title) + if err != nil { + return fmt.Errorf("failed to fetch page: %w", err) + } + fmt.Println(page.Wikitext) + return nil + } + + page, err := client.GetPageHTML(title) if err != nil { return fmt.Errorf("failed to fetch page: %w", err) } - wikitext := page.Wikitext - if pageSection != "" { - idx := wiki.FindSectionIndex(page.Sections, pageSection) - if idx == -1 { - needle := strings.ToLower(pageSection) - // Case-insensitive exact match - for _, s := range page.Sections { - if strings.ToLower(s.Line) == needle { - fmt.Sscanf(s.Index, "%d", &idx) - break - } - } - // Case-insensitive prefix match (e.g. "Location" → "Locations") - if idx == -1 { - for _, s := range page.Sections { - if strings.HasPrefix(strings.ToLower(s.Line), needle) { - fmt.Sscanf(s.Index, "%d", &idx) - break - } - } - } - // Case-insensitive contains match - if idx == -1 { - for _, s := range page.Sections { - if strings.Contains(strings.ToLower(s.Line), needle) { - fmt.Sscanf(s.Index, "%d", &idx) - break - } - } - } - } - if idx == -1 { + body := htmlconv.ExtractSection(page.HTML, pageSection) + if body == "" { + sections := htmlconv.ListSections(page.HTML) return fmt.Errorf("section %q not found. Available sections: %s", - pageSection, listSections(page.Sections)) + pageSection, formatSectionNames(sections)) } - sectionPage, err := client.GetPageSection(title, idx) - if err != nil { - return fmt.Errorf("failed to fetch section: %w", err) - } - wikitext = sectionPage.Wikitext - } - - if Raw() { - fmt.Println(wikitext) + md := render.New() + md.H1(page.Title) + md.Line(body) + fmt.Print(md.String()) return nil } md := render.New() md.H1(page.Title) - if pageSection == "" && len(page.Sections) > 0 { + sections := htmlconv.ListSections(page.HTML) + if len(sections) > 0 { md.H2("Sections") - for _, s := range page.Sections { + for _, s := range sections { indent := "" - if s.Level == "3" { + if s.Level == 3 { indent = " " - } else if s.Level == "4" { + } else if s.Level >= 4 { indent = " " } - md.Line(fmt.Sprintf("%s- %s", indent, s.Line)) + md.Line(fmt.Sprintf("%s- %s", indent, s.Name)) } md.Newline() md.HR() } - plain := extract.ExtractPlainText(wikitext) - md.P(plain) - + md.Line(htmlconv.Convert(page.HTML)) fmt.Print(md.String()) return nil }, @@ -110,10 +84,10 @@ Examples: return cmd } -func listSections(sections []wiki.Section) string { +func formatSectionNames(sections []htmlconv.SectionInfo) string { names := make([]string, len(sections)) for i, s := range sections { - names[i] = s.Line + names[i] = s.Name } return strings.Join(names, ", ") } diff --git a/scripts/rsw/internal/cmd/skill.go b/scripts/rsw/internal/cmd/skill.go index 8f1a55d..307b86a 100644 --- a/scripts/rsw/internal/cmd/skill.go +++ b/scripts/rsw/internal/cmd/skill.go @@ -4,7 +4,7 @@ import ( "fmt" "strings" - "github.com/runescape-wiki/rsw/internal/extract" + "github.com/runescape-wiki/rsw/internal/htmlconv" "github.com/runescape-wiki/rsw/internal/render" "github.com/runescape-wiki/rsw/internal/wiki" "github.com/spf13/cobra" @@ -31,19 +31,27 @@ Examples: wikiClient := wiki.NewClient(GameBaseURL()) trainingTitle := capitalizeFirst(strings.ToLower(skillName)) + " training" - page, err := wikiClient.GetPage(trainingTitle) + + if Raw() { + page, err := wikiClient.GetPage(trainingTitle) + if err != nil { + page, err = wikiClient.GetPage(capitalizeFirst(strings.ToLower(skillName))) + if err != nil { + return fmt.Errorf("failed to fetch skill page: %w", err) + } + } + fmt.Println(page.Wikitext) + return nil + } + + page, err := wikiClient.GetPageHTML(trainingTitle) if err != nil { - page, err = wikiClient.GetPage(capitalizeFirst(strings.ToLower(skillName))) + page, err = wikiClient.GetPageHTML(capitalizeFirst(strings.ToLower(skillName))) if err != nil { return fmt.Errorf("failed to fetch skill page: %w", err) } } - if Raw() { - fmt.Println(page.Wikitext) - return nil - } - md := render.New() md.H1(fmt.Sprintf("%s Training Guide", page.Title)) @@ -51,11 +59,13 @@ Examples: md.P("*Showing methods suitable for ironman accounts (no GE access).*") } - if len(page.Sections) > 0 { + sections := htmlconv.ListSections(page.HTML) + + if len(sections) > 0 { md.H2("Contents") - for _, s := range page.Sections { - if s.Level == "2" { - md.Bullet(s.Line) + for _, s := range sections { + if s.Level == 2 { + md.Bullet(s.Name) } } md.Newline() @@ -63,30 +73,23 @@ Examples: if levelRange != "" { found := false - for _, s := range page.Sections { - if strings.Contains(strings.ToLower(s.Line), strings.ToLower(levelRange)) || - sectionMatchesLevelRange(s.Line, levelRange) { - idx := 0 - fmt.Sscanf(s.Index, "%d", &idx) - if idx > 0 { - sectionPage, err := wikiClient.GetPageSection(page.Title, idx) - if err == nil { - plain := extract.ExtractPlainText(sectionPage.Wikitext) - if strings.TrimSpace(plain) != "" { - md.H2(s.Line) - md.P(plain) - found = true - } - } + for _, s := range sections { + if strings.Contains(strings.ToLower(s.Name), strings.ToLower(levelRange)) || + sectionMatchesLevelRange(s.Name, levelRange) { + body := htmlconv.ExtractSection(page.HTML, s.Name) + if strings.TrimSpace(body) != "" { + md.H2(s.Name) + md.Line(body) + found = true } } } if !found { md.P(fmt.Sprintf("*No section found matching level range %q. Showing full guide.*", levelRange)) - renderFullGuide(md, page, wikiClient) + renderFullGuideHTML(md, page.HTML, sections) } } else { - renderFullGuide(md, page, wikiClient) + renderFullGuideHTML(md, page.HTML, sections) } fmt.Print(md.String()) @@ -98,24 +101,15 @@ Examples: return cmd } -func renderFullGuide(md *render.Builder, page *wiki.ParsedPage, client *wiki.Client) { - for _, s := range page.Sections { - if s.Level != "2" { +func renderFullGuideHTML(md *render.Builder, pageHTML string, sections []htmlconv.SectionInfo) { + for _, s := range sections { + if s.Level != 2 { continue } - idx := 0 - fmt.Sscanf(s.Index, "%d", &idx) - if idx <= 0 { - continue - } - sectionPage, err := client.GetPageSection(page.Title, idx) - if err != nil { - continue - } - plain := extract.ExtractPlainText(sectionPage.Wikitext) - if strings.TrimSpace(plain) != "" { - md.H2(s.Line) - md.P(plain) + body := htmlconv.ExtractSection(pageHTML, s.Name) + if strings.TrimSpace(body) != "" { + md.H2(s.Name) + md.Line(body) } } } diff --git a/scripts/rsw/internal/extract/infobox.go b/scripts/rsw/internal/extract/infobox.go index ce48621..a99cf4e 100644 --- a/scripts/rsw/internal/extract/infobox.go +++ b/scripts/rsw/internal/extract/infobox.go @@ -202,6 +202,7 @@ func cleanWikiLinks(s string) string { // CleanWikitext strips templates, wiki links, and HTML but preserves line structure // and converts wiki list markers (* items) to readable bullet points. func CleanWikitext(s string) string { + s = StripTransclusionTags(s) s = expandKnownTemplates(s) s = removeTemplates(s) s = cleanWikiLinks(s) @@ -239,6 +240,7 @@ func CleanWikitext(s string) string { // ExtractPlainText strips all wikitext markup to produce plain text. func ExtractPlainText(wikitext string) string { s := wikitext + s = StripTransclusionTags(s) s = expandKnownTemplates(s) s = removeTemplates(s) s = cleanWikiLinks(s) @@ -371,6 +373,48 @@ func tryExpandTemplate(inner string) (string, bool) { return "", false } +// StripTransclusionTags handles MediaWiki transclusion directives in raw wikitext. +// When viewing a page directly (not transcluding): +// - content → keep content (strip tags only) +// - content → remove entirely (tags + content) +// - content → keep content (strip tags only) +func StripTransclusionTags(s string) string { + // Remove ... blocks entirely + for { + lower := strings.ToLower(s) + start := strings.Index(lower, "") + if start == -1 { + break + } + end := strings.Index(lower[start:], "") + if end == -1 { + // Unclosed tag — remove to end of string + s = s[:start] + break + } + s = s[:start] + s[start+end+len(""):] + } + + // Unwrap and (keep content, remove tags) + for _, tag := range []string{"noinclude", "onlyinclude"} { + s = removeTagKeepContent(s, tag) + } + return s +} + +func removeTagKeepContent(s, tag string) string { + open := "<" + tag + ">" + close := "" + s = strings.ReplaceAll(s, open, "") + s = strings.ReplaceAll(s, close, "") + // Case-insensitive variants + upper := "<" + strings.ToUpper(tag) + ">" + upperClose := "" + s = strings.ReplaceAll(s, upper, "") + s = strings.ReplaceAll(s, upperClose, "") + return s +} + func removeTemplates(s string) string { var b strings.Builder depth := 0 diff --git a/scripts/rsw/internal/htmlconv/htmlconv.go b/scripts/rsw/internal/htmlconv/htmlconv.go new file mode 100644 index 0000000..b435da9 --- /dev/null +++ b/scripts/rsw/internal/htmlconv/htmlconv.go @@ -0,0 +1,483 @@ +package htmlconv + +import ( + "fmt" + "strings" + + "golang.org/x/net/html" + "golang.org/x/net/html/atom" +) + +// Convert takes MediaWiki-rendered HTML and returns clean markdown. +func Convert(rawHTML string) string { + doc, err := html.Parse(strings.NewReader(rawHTML)) + if err != nil { + return rawHTML + } + + var w walker + w.walk(doc) + return w.finish() +} + +// ExtractSection extracts a single section by heading text and converts it to markdown. +// Matches case-insensitively. Returns empty string if not found. +func ExtractSection(rawHTML string, sectionName string) string { + doc, err := html.Parse(strings.NewReader(rawHTML)) + if err != nil { + return "" + } + + needle := strings.ToLower(strings.TrimSpace(sectionName)) + + // Find the mw-parser-output container. + container := findParserOutput(doc) + if container == nil { + return "" + } + + // Walk top-level children to find the target heading div, then collect + // siblings until the next heading of equal or lesser depth. + var ( + collecting bool + targetLvl int + collected []*html.Node + ) + for c := container.FirstChild; c != nil; c = c.NextSibling { + if isHeadingDiv(c) { + lvl, text := headingInfo(c) + if collecting { + if lvl <= targetLvl { + break + } + } + if !collecting && strings.ToLower(strings.TrimSpace(text)) == needle { + collecting = true + targetLvl = lvl + continue + } + } + if collecting { + collected = append(collected, c) + } + } + + if len(collected) == 0 { + return "" + } + + var w walker + for _, n := range collected { + w.walk(n) + } + return w.finish() +} + +// ListSections returns section names and levels from the HTML. +func ListSections(rawHTML string) []SectionInfo { + doc, err := html.Parse(strings.NewReader(rawHTML)) + if err != nil { + return nil + } + + container := findParserOutput(doc) + if container == nil { + return nil + } + + var sections []SectionInfo + for c := container.FirstChild; c != nil; c = c.NextSibling { + if isHeadingDiv(c) { + lvl, text := headingInfo(c) + if text != "" { + sections = append(sections, SectionInfo{Name: text, Level: lvl}) + } + } + } + return sections +} + +// SectionInfo holds a section heading name and level. +type SectionInfo struct { + Name string + Level int +} + +// --- DOM helpers --- + +func findParserOutput(n *html.Node) *html.Node { + if n.Type == html.ElementNode && n.DataAtom == atom.Div && hasClass(n, "mw-parser-output") { + return n + } + for c := n.FirstChild; c != nil; c = c.NextSibling { + if found := findParserOutput(c); found != nil { + return found + } + } + return nil +} + +func isHeadingDiv(n *html.Node) bool { + return n.Type == html.ElementNode && n.DataAtom == atom.Div && hasClassPrefix(n, "mw-heading") +} + +func headingInfo(div *html.Node) (level int, text string) { + for c := div.FirstChild; c != nil; c = c.NextSibling { + if c.Type == html.ElementNode { + switch c.DataAtom { + case atom.H1: + return 1, textContent(c) + case atom.H2: + return 2, textContent(c) + case atom.H3: + return 3, textContent(c) + case atom.H4: + return 4, textContent(c) + case atom.H5: + return 5, textContent(c) + case atom.H6: + return 6, textContent(c) + } + } + } + return 0, "" +} + +func textContent(n *html.Node) string { + if n.Type == html.TextNode { + return n.Data + } + // Skip edit section spans. + if n.Type == html.ElementNode && hasClass(n, "mw-editsection") { + return "" + } + var sb strings.Builder + for c := n.FirstChild; c != nil; c = c.NextSibling { + sb.WriteString(textContent(c)) + } + return sb.String() +} + +func hasClass(n *html.Node, cls string) bool { + for _, a := range n.Attr { + if a.Key == "class" { + for _, c := range strings.Fields(a.Val) { + if c == cls { + return true + } + } + } + } + return false +} + +func hasClassPrefix(n *html.Node, prefix string) bool { + for _, a := range n.Attr { + if a.Key == "class" { + for _, c := range strings.Fields(a.Val) { + if strings.HasPrefix(c, prefix) { + return true + } + } + } + } + return false +} + +func getAttr(n *html.Node, key string) string { + for _, a := range n.Attr { + if a.Key == key { + return a.Val + } + } + return "" +} + +func isHidden(n *html.Node) bool { + style := getAttr(n, "style") + return strings.Contains(style, "display:none") || strings.Contains(style, "display: none") +} + +// --- walker: recursive HTML-to-markdown converter --- + +type walker struct { + sb strings.Builder + listCtx []listContext +} + +type listContext struct { + ordered bool + index int +} + +func (w *walker) finish() string { + out := w.sb.String() + // Collapse runs of 3+ newlines to 2. + for strings.Contains(out, "\n\n\n") { + out = strings.ReplaceAll(out, "\n\n\n", "\n\n") + } + return strings.TrimSpace(out) + "\n" +} + +func (w *walker) walk(n *html.Node) { + switch n.Type { + case html.TextNode: + w.sb.WriteString(n.Data) + return + case html.ElementNode: + // skip + case html.DocumentNode: + w.walkChildren(n) + return + default: + return + } + + if isHidden(n) { + return + } + + if w.shouldSkip(n) { + return + } + + switch n.DataAtom { + case atom.H1, atom.H2, atom.H3, atom.H4, atom.H5, atom.H6: + w.renderHeading(n) + case atom.P: + w.ensureNewline() + w.walkChildren(n) + w.sb.WriteString("\n\n") + case atom.Br: + w.sb.WriteString("\n") + case atom.B, atom.Strong: + w.sb.WriteString("**") + w.walkChildren(n) + w.sb.WriteString("**") + case atom.I, atom.Em: + w.sb.WriteString("*") + w.walkChildren(n) + w.sb.WriteString("*") + case atom.A: + w.walkChildren(n) + case atom.Ul: + w.renderList(n, false) + case atom.Ol: + w.renderList(n, true) + case atom.Li: + w.renderListItem(n) + case atom.Table: + w.renderTable(n) + case atom.Img: + alt := getAttr(n, "alt") + if alt != "" { + w.sb.WriteString(alt) + } + case atom.Div: + if isHeadingDiv(n) { + lvl, text := headingInfo(n) + if text != "" { + w.ensureNewline() + w.sb.WriteString(strings.Repeat("#", lvl)) + w.sb.WriteString(" ") + w.sb.WriteString(text) + w.sb.WriteString("\n\n") + } + return + } + w.walkChildren(n) + default: + w.walkChildren(n) + } +} + +func (w *walker) walkChildren(n *html.Node) { + for c := n.FirstChild; c != nil; c = c.NextSibling { + w.walk(c) + } +} + +func (w *walker) shouldSkip(n *html.Node) bool { + if n.Type != html.ElementNode { + return false + } + if getAttr(n, "id") == "toc" || getAttr(n, "id") == "toctemplate" { + return true + } + if hasClass(n, "navigation-not-searchable") { + return true + } + if hasClass(n, "mw-editsection") { + return true + } + // Skip infobox tables — noisy in CLI. RS3 uses "rsw-infobox", OSRS uses "infobox". + if n.DataAtom == atom.Table && (hasClass(n, "infobox") || hasClassPrefix(n, "rsw-infobox")) { + return true + } + // Skip navbox (bottom-of-page navigation templates). + if n.DataAtom == atom.Table && hasClass(n, "navbox") { + return true + } + // Skip messagebox / disambig boxes. + if n.DataAtom == atom.Table && hasClass(n, "messagebox") { + return true + } + // Skip "clear" divs. + if n.DataAtom == atom.Div && hasClass(n, "clear-template") { + return true + } + return false +} + +func (w *walker) ensureNewline() { + s := w.sb.String() + if len(s) > 0 && s[len(s)-1] != '\n' { + w.sb.WriteString("\n") + } +} + +func (w *walker) renderHeading(n *html.Node) { + level := 0 + switch n.DataAtom { + case atom.H1: + level = 1 + case atom.H2: + level = 2 + case atom.H3: + level = 3 + case atom.H4: + level = 4 + case atom.H5: + level = 5 + case atom.H6: + level = 6 + } + w.ensureNewline() + w.sb.WriteString(strings.Repeat("#", level)) + w.sb.WriteString(" ") + w.sb.WriteString(textContent(n)) + w.sb.WriteString("\n\n") +} + +func (w *walker) renderList(n *html.Node, ordered bool) { + w.listCtx = append(w.listCtx, listContext{ordered: ordered}) + w.ensureNewline() + for c := n.FirstChild; c != nil; c = c.NextSibling { + w.walk(c) + } + w.listCtx = w.listCtx[:len(w.listCtx)-1] + if len(w.listCtx) == 0 { + w.sb.WriteString("\n") + } +} + +func (w *walker) renderListItem(n *html.Node) { + depth := len(w.listCtx) + if depth == 0 { + depth = 1 + } + indent := strings.Repeat(" ", depth-1) + + ctx := &w.listCtx[len(w.listCtx)-1] + if ctx.ordered { + ctx.index++ + w.sb.WriteString(fmt.Sprintf("%s%d. ", indent, ctx.index)) + } else { + w.sb.WriteString(indent + "- ") + } + + // Walk children inline — but nested lists should go on their own lines. + for c := n.FirstChild; c != nil; c = c.NextSibling { + if c.Type == html.ElementNode && (c.DataAtom == atom.Ul || c.DataAtom == atom.Ol) { + w.sb.WriteString("\n") + w.walk(c) + } else { + w.walk(c) + } + } + w.ensureNewline() +} + +func (w *walker) renderTable(n *html.Node) { + rows := collectTableRows(n) + if len(rows) == 0 { + return + } + + w.ensureNewline() + w.sb.WriteString("\n") + + var headers []string + var dataRows [][]string + + for i, row := range rows { + var cells []string + allHeaders := true + for _, cell := range row { + text := strings.TrimSpace(textContent(cell)) + text = strings.ReplaceAll(text, "|", "/") + text = strings.ReplaceAll(text, "\n", " ") + cells = append(cells, text) + if cell.DataAtom != atom.Th { + allHeaders = false + } + } + if i == 0 && allHeaders { + headers = cells + } else { + dataRows = append(dataRows, cells) + } + } + + if headers == nil && len(dataRows) > 0 { + headers = dataRows[0] + dataRows = dataRows[1:] + } + + if len(headers) == 0 { + return + } + + // Header row. + w.sb.WriteString("| " + strings.Join(headers, " | ") + " |\n") + seps := make([]string, len(headers)) + for i := range seps { + seps[i] = "---" + } + w.sb.WriteString("| " + strings.Join(seps, " | ") + " |\n") + + for _, row := range dataRows { + for len(row) < len(headers) { + row = append(row, "") + } + w.sb.WriteString("| " + strings.Join(row[:len(headers)], " | ") + " |\n") + } + w.sb.WriteString("\n") +} + +func collectTableRows(table *html.Node) [][]*html.Node { + var rows [][]*html.Node + var visit func(*html.Node) + visit = func(n *html.Node) { + if n.Type == html.ElementNode && n.DataAtom == atom.Tr { + if isHidden(n) { + return + } + var cells []*html.Node + for c := n.FirstChild; c != nil; c = c.NextSibling { + if c.Type == html.ElementNode && (c.DataAtom == atom.Th || c.DataAtom == atom.Td) { + cells = append(cells, c) + } + } + if len(cells) > 0 { + rows = append(rows, cells) + } + return + } + for c := n.FirstChild; c != nil; c = c.NextSibling { + visit(c) + } + } + visit(table) + return rows +} diff --git a/scripts/rsw/internal/wiki/parse.go b/scripts/rsw/internal/wiki/parse.go index 2d9d683..9e1fab3 100644 --- a/scripts/rsw/internal/wiki/parse.go +++ b/scripts/rsw/internal/wiki/parse.go @@ -97,9 +97,10 @@ func (c *Client) GetPageSection(title string, sectionIndex int) (*ParsedPage, er // GetPageHTML fetches the rendered HTML for a page. func (c *Client) GetPageHTML(title string) (*ParsedPage, error) { params := url.Values{ - "action": {"parse"}, - "page": {title}, - "prop": {"text|sections"}, + "action": {"parse"}, + "page": {title}, + "prop": {"text|sections"}, + "redirects": {"1"}, } var resp parseResponse @@ -133,3 +134,4 @@ func FindSectionIndex(sections []Section, name string) int { } return -1 } +