Parse HTML to markdown

This commit is contained in:
Melroy van den Berg 2025-04-24 16:22:57 +02:00
parent f97368cc70
commit 756832d8e5
No known key found for this signature in database
GPG key ID: 71D11FF23454B9D7
3 changed files with 95 additions and 57 deletions

View file

@ -8,6 +8,7 @@ import (
"github.com/fatih/color"
"github.com/tidwall/gjson"
"github.com/vlanse/go-term-markdown"
)
// Format takes ActivityPub data and returns a formatted string representation
@ -87,7 +88,8 @@ func formatActor(jsonStr string, parts []string, bold, cyan, green, red, yellow
}
if summary := gjson.Get(jsonStr, "summary").String(); summary != "" {
parts = append(parts, fmt.Sprintf("%s: %s", bold("Summary"), summary))
md := htmlToMarkdown(summary)
parts = append(parts, fmt.Sprintf("%s:\n%s", bold("Summary"), renderMarkdown(md)))
}
if published := gjson.Get(jsonStr, "published").String(); published != "" {
@ -108,12 +110,11 @@ func formatActor(jsonStr string, parts []string, bold, cyan, green, red, yellow
// formatContent formats content-type objects (Note, Article, etc.)
func formatContent(jsonStr string, parts []string, bold, green, yellow func(a ...interface{}) string) []string {
if content := gjson.Get(jsonStr, "content").String(); content != "" {
// Strip HTML tags for display
content = stripHTML(content)
if len(content) > 300 {
content = content[:297] + "..."
md := htmlToMarkdown(content)
if len(md) > 300 {
md = md[:297] + "..."
}
parts = append(parts, fmt.Sprintf("%s: %s", bold("Content"), content))
parts = append(parts, fmt.Sprintf("%s:\n%s", bold("Content"), renderMarkdown(md)))
}
// Check for attachments (images, videos, etc.)
@ -204,11 +205,8 @@ func formatActivity(jsonStr string, parts []string, bold, green, yellow func(a .
parts = append(parts, fmt.Sprintf("%s: %s", bold("Object Type"), yellow(objectType)))
if content := gjson.Get(jsonStr, "object.content").String(); content != "" {
content = stripHTML(content)
if len(content) > 300 {
content = content[:297] + "..."
}
parts = append(parts, fmt.Sprintf("%s: %s", bold("Content"), content))
md := htmlToMarkdown(content)
parts = append(parts, fmt.Sprintf("%s:\n%s", bold("Content"), renderMarkdown(md)))
}
// Check for attachments in the object
@ -324,11 +322,8 @@ func formatEvent(jsonStr string, parts []string, bold, green, yellow func(a ...i
}
if content := gjson.Get(jsonStr, "content").String(); content != "" {
content = stripHTML(content)
if len(content) > 300 {
content = content[:297] + "..."
}
parts = append(parts, fmt.Sprintf("%s: %s", bold("Description"), content))
md := htmlToMarkdown(content)
parts = append(parts, fmt.Sprintf("%s:\n%s", bold("Description"), renderMarkdown(md)))
}
if startTime := gjson.Get(jsonStr, "startTime").String(); startTime != "" {
@ -359,6 +354,34 @@ func formatTombstone(jsonStr string, parts []string, bold, green, yellow func(a
return parts
}
// Helper to convert HTML to Markdown and render to terminal
func renderMarkdown(md string) string {
// width=80, no color override, no emoji, no images
return string(markdown.Render(md, 80, 6))
}
// Replace stripHTML with htmlToMarkdown
func htmlToMarkdown(html string) string {
// For now, a basic replacement (optionally, use a library for better conversion)
html = strings.ReplaceAll(html, "<br>", "\n")
html = strings.ReplaceAll(html, "<br/>", "\n")
html = strings.ReplaceAll(html, "<p>", "\n")
html = strings.ReplaceAll(html, "</p>", "\n")
// Remove all other tags
for {
startIdx := strings.Index(html, "<")
if startIdx == -1 {
break
}
endIdx := strings.Index(html[startIdx:], ">")
if endIdx == -1 {
break
}
html = html[:startIdx] + html[startIdx+endIdx+1:]
}
return html
}
// formatDate formats an ISO 8601 date string to a more readable format
func formatDate(isoDate string) string {
t, err := time.Parse(time.RFC3339, isoDate)
@ -368,46 +391,6 @@ func formatDate(isoDate string) string {
return t.Format("Jan 02, 2006 15:04:05")
}
// stripHTML removes HTML tags from a string
func stripHTML(html string) string {
// Simple HTML tag stripping - in a real implementation, you might want to use a proper HTML parser
result := html
// Replace common HTML entities
replacements := map[string]string{
"&amp;": "&",
"&lt;": "<",
"&gt;": ">",
"&quot;": "\"",
"&#39;": "'",
"&nbsp;": " ",
}
for entity, replacement := range replacements {
result = strings.ReplaceAll(result, entity, replacement)
}
// Remove HTML tags
for {
startIdx := strings.Index(result, "<")
if startIdx == -1 {
break
}
endIdx := strings.Index(result[startIdx:], ">")
if endIdx == -1 {
break
}
result = result[:startIdx] + result[startIdx+endIdx+1:]
}
// Normalize whitespace
result = strings.Join(strings.Fields(result), " ")
return result
}
// formatArray formats an array of values into a readable string
func formatArray(values []gjson.Result) string {
if len(values) == 0 {

15
go.mod
View file

@ -9,13 +9,26 @@ require (
)
require (
github.com/MichaelMure/go-term-text v0.3.1 // indirect
github.com/alecthomas/chroma v0.10.0 // indirect
github.com/disintegration/imaging v1.6.2 // indirect
github.com/dlclark/regexp2 v1.10.0 // indirect
github.com/eliukblau/pixterm v1.3.1 // indirect
github.com/go-fed/httpsig v1.1.0 // indirect
github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/kyokomi/emoji/v2 v2.2.12 // indirect
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-runewidth v0.0.15 // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.1 // indirect
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 // indirect
github.com/vlanse/go-term-markdown v0.0.1-dev2 // indirect
golang.org/x/crypto v0.19.0 // indirect
golang.org/x/image v0.15.0 // indirect
golang.org/x/net v0.21.0 // indirect
golang.org/x/sys v0.18.0 // indirect
)

42
go.sum
View file

@ -1,20 +1,50 @@
github.com/MichaelMure/go-term-text v0.3.1 h1:Kw9kZanyZWiCHOYu9v/8pWEgDQ6UVN9/ix2Vd2zzWf0=
github.com/MichaelMure/go-term-text v0.3.1/go.mod h1:QgVjAEDUnRMlzpS6ky5CGblux7ebeiLnuy9dAaFZu8o=
github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek=
github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s=
github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/disintegration/imaging v1.6.2 h1:w1LecBlG2Lnp8B3jk5zSuNqd7b4DXhcjwek1ei82L+c=
github.com/disintegration/imaging v1.6.2/go.mod h1:44/5580QXChDfwIclfc/PCwrr44amcmDAg8hxG0Ewe4=
github.com/dlclark/regexp2 v1.4.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/eliukblau/pixterm v1.3.1 h1:XeouQViH+lmzCa7sMUoK2cd7qlgHYGLIjwRKaOdJbKA=
github.com/eliukblau/pixterm v1.3.1/go.mod h1:on5ueknFt+ZFVvIVVzQ7/JXwPjv5fJd8Q1Ybh7XixfU=
github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM=
github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
github.com/go-fed/httpsig v1.1.0 h1:9M+hb0jkEICD8/cAiNqEB66R87tTINszBRTjwjQzWcI=
github.com/go-fed/httpsig v1.1.0/go.mod h1:RCMrTZvN1bJYtofsG4rd5NaO5obxQ5xBkdiS7xsT7bM=
github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47 h1:k4Tw0nt6lwro3Uin8eqoET7MDA4JnT8YgbCjc/g5E3k=
github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/kyokomi/emoji/v2 v2.2.12 h1:sSVA5nH9ebR3Zji1o31wu3yOwD1zKXQA2z0zUyeit60=
github.com/kyokomi/emoji/v2 v2.2.12/go.mod h1:JUcn42DTdsXJo1SWanHh4HKDEyPaR5CqkmoirZZP9qE=
github.com/lucasb-eyer/go-colorful v1.0.3/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0=
github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/tidwall/gjson v1.17.1 h1:wlYEnwqAHgzmhNUFfw7Xalt2JzQvsMx2Se4PcoFCT/U=
github.com/tidwall/gjson v1.17.1/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
@ -22,10 +52,21 @@ github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JT
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/vlanse/go-term-markdown v0.0.1-dev2 h1:sisNMYZSc2zdetAo7/kK5DRqzwfShlbuMdXEPAYlviQ=
github.com/vlanse/go-term-markdown v0.0.1-dev2/go.mod h1:ujQ7UdQuyzdk827VWflQknUMr7qyQHPHIQA0wDgVWwc=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191206172530-e9b2fee46413/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 h1:psW17arqaxU48Z5kZ0CQnkZWQJsqcURM6tKiBApRjXI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.19.0 h1:ENy+Az/9Y1vSrlrvBSyna3PITt4tiZLf7sgCjZBX7Wo=
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/image v0.0.0-20191206065243-da761ea9ff43/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/image v0.15.0 h1:kOELfmgrmJlw4Cdb7g/QGuB3CvDrXbqEIww/pNtNBm8=
golang.org/x/image v0.15.0/go.mod h1:HUYqC05R2ZcZ3ejNQsIHQDQiwWM4JBqmm6MKANTp4LE=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.21.0 h1:AQyQV4dYCvJ7vGmJyKki9+PBdyvhkSd8EIx/qb0AYv4=
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@ -34,4 +75,5 @@ golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=