Update parser

This commit is contained in:
2026-03-05 12:51:31 -06:00
parent b20d68c9f0
commit c283444d68
7 changed files with 603 additions and 103 deletions

View File

@@ -4,7 +4,7 @@ import (
"fmt"
"strings"
"github.com/runescape-wiki/rsw/internal/extract"
"github.com/runescape-wiki/rsw/internal/htmlconv"
"github.com/runescape-wiki/rsw/internal/render"
"github.com/runescape-wiki/rsw/internal/wiki"
"github.com/spf13/cobra"
@@ -27,80 +27,54 @@ Examples:
title := args[0]
client := wiki.NewClient(GameBaseURL())
page, err := client.GetPage(title)
if Raw() {
page, err := client.GetPage(title)
if err != nil {
return fmt.Errorf("failed to fetch page: %w", err)
}
fmt.Println(page.Wikitext)
return nil
}
page, err := client.GetPageHTML(title)
if err != nil {
return fmt.Errorf("failed to fetch page: %w", err)
}
wikitext := page.Wikitext
if pageSection != "" {
idx := wiki.FindSectionIndex(page.Sections, pageSection)
if idx == -1 {
needle := strings.ToLower(pageSection)
// Case-insensitive exact match
for _, s := range page.Sections {
if strings.ToLower(s.Line) == needle {
fmt.Sscanf(s.Index, "%d", &idx)
break
}
}
// Case-insensitive prefix match (e.g. "Location" → "Locations")
if idx == -1 {
for _, s := range page.Sections {
if strings.HasPrefix(strings.ToLower(s.Line), needle) {
fmt.Sscanf(s.Index, "%d", &idx)
break
}
}
}
// Case-insensitive contains match
if idx == -1 {
for _, s := range page.Sections {
if strings.Contains(strings.ToLower(s.Line), needle) {
fmt.Sscanf(s.Index, "%d", &idx)
break
}
}
}
}
if idx == -1 {
body := htmlconv.ExtractSection(page.HTML, pageSection)
if body == "" {
sections := htmlconv.ListSections(page.HTML)
return fmt.Errorf("section %q not found. Available sections: %s",
pageSection, listSections(page.Sections))
pageSection, formatSectionNames(sections))
}
sectionPage, err := client.GetPageSection(title, idx)
if err != nil {
return fmt.Errorf("failed to fetch section: %w", err)
}
wikitext = sectionPage.Wikitext
}
if Raw() {
fmt.Println(wikitext)
md := render.New()
md.H1(page.Title)
md.Line(body)
fmt.Print(md.String())
return nil
}
md := render.New()
md.H1(page.Title)
if pageSection == "" && len(page.Sections) > 0 {
sections := htmlconv.ListSections(page.HTML)
if len(sections) > 0 {
md.H2("Sections")
for _, s := range page.Sections {
for _, s := range sections {
indent := ""
if s.Level == "3" {
if s.Level == 3 {
indent = " "
} else if s.Level == "4" {
} else if s.Level >= 4 {
indent = " "
}
md.Line(fmt.Sprintf("%s- %s", indent, s.Line))
md.Line(fmt.Sprintf("%s- %s", indent, s.Name))
}
md.Newline()
md.HR()
}
plain := extract.ExtractPlainText(wikitext)
md.P(plain)
md.Line(htmlconv.Convert(page.HTML))
fmt.Print(md.String())
return nil
},
@@ -110,10 +84,10 @@ Examples:
return cmd
}
func listSections(sections []wiki.Section) string {
func formatSectionNames(sections []htmlconv.SectionInfo) string {
names := make([]string, len(sections))
for i, s := range sections {
names[i] = s.Line
names[i] = s.Name
}
return strings.Join(names, ", ")
}

View File

@@ -4,7 +4,7 @@ import (
"fmt"
"strings"
"github.com/runescape-wiki/rsw/internal/extract"
"github.com/runescape-wiki/rsw/internal/htmlconv"
"github.com/runescape-wiki/rsw/internal/render"
"github.com/runescape-wiki/rsw/internal/wiki"
"github.com/spf13/cobra"
@@ -31,19 +31,27 @@ Examples:
wikiClient := wiki.NewClient(GameBaseURL())
trainingTitle := capitalizeFirst(strings.ToLower(skillName)) + " training"
page, err := wikiClient.GetPage(trainingTitle)
if Raw() {
page, err := wikiClient.GetPage(trainingTitle)
if err != nil {
page, err = wikiClient.GetPage(capitalizeFirst(strings.ToLower(skillName)))
if err != nil {
return fmt.Errorf("failed to fetch skill page: %w", err)
}
}
fmt.Println(page.Wikitext)
return nil
}
page, err := wikiClient.GetPageHTML(trainingTitle)
if err != nil {
page, err = wikiClient.GetPage(capitalizeFirst(strings.ToLower(skillName)))
page, err = wikiClient.GetPageHTML(capitalizeFirst(strings.ToLower(skillName)))
if err != nil {
return fmt.Errorf("failed to fetch skill page: %w", err)
}
}
if Raw() {
fmt.Println(page.Wikitext)
return nil
}
md := render.New()
md.H1(fmt.Sprintf("%s Training Guide", page.Title))
@@ -51,11 +59,13 @@ Examples:
md.P("*Showing methods suitable for ironman accounts (no GE access).*")
}
if len(page.Sections) > 0 {
sections := htmlconv.ListSections(page.HTML)
if len(sections) > 0 {
md.H2("Contents")
for _, s := range page.Sections {
if s.Level == "2" {
md.Bullet(s.Line)
for _, s := range sections {
if s.Level == 2 {
md.Bullet(s.Name)
}
}
md.Newline()
@@ -63,30 +73,23 @@ Examples:
if levelRange != "" {
found := false
for _, s := range page.Sections {
if strings.Contains(strings.ToLower(s.Line), strings.ToLower(levelRange)) ||
sectionMatchesLevelRange(s.Line, levelRange) {
idx := 0
fmt.Sscanf(s.Index, "%d", &idx)
if idx > 0 {
sectionPage, err := wikiClient.GetPageSection(page.Title, idx)
if err == nil {
plain := extract.ExtractPlainText(sectionPage.Wikitext)
if strings.TrimSpace(plain) != "" {
md.H2(s.Line)
md.P(plain)
found = true
}
}
for _, s := range sections {
if strings.Contains(strings.ToLower(s.Name), strings.ToLower(levelRange)) ||
sectionMatchesLevelRange(s.Name, levelRange) {
body := htmlconv.ExtractSection(page.HTML, s.Name)
if strings.TrimSpace(body) != "" {
md.H2(s.Name)
md.Line(body)
found = true
}
}
}
if !found {
md.P(fmt.Sprintf("*No section found matching level range %q. Showing full guide.*", levelRange))
renderFullGuide(md, page, wikiClient)
renderFullGuideHTML(md, page.HTML, sections)
}
} else {
renderFullGuide(md, page, wikiClient)
renderFullGuideHTML(md, page.HTML, sections)
}
fmt.Print(md.String())
@@ -98,24 +101,15 @@ Examples:
return cmd
}
func renderFullGuide(md *render.Builder, page *wiki.ParsedPage, client *wiki.Client) {
for _, s := range page.Sections {
if s.Level != "2" {
func renderFullGuideHTML(md *render.Builder, pageHTML string, sections []htmlconv.SectionInfo) {
for _, s := range sections {
if s.Level != 2 {
continue
}
idx := 0
fmt.Sscanf(s.Index, "%d", &idx)
if idx <= 0 {
continue
}
sectionPage, err := client.GetPageSection(page.Title, idx)
if err != nil {
continue
}
plain := extract.ExtractPlainText(sectionPage.Wikitext)
if strings.TrimSpace(plain) != "" {
md.H2(s.Line)
md.P(plain)
body := htmlconv.ExtractSection(pageHTML, s.Name)
if strings.TrimSpace(body) != "" {
md.H2(s.Name)
md.Line(body)
}
}
}