114 lines
2.4 KiB
Go
114 lines
2.4 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"net/http"
|
|
"os"
|
|
"strings"
|
|
|
|
"encoding/csv"
|
|
|
|
"time"
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
type Book struct {
|
|
Title string
|
|
Price string
|
|
Available bool
|
|
}
|
|
|
|
func FindFirstElement(root *html.Node, name string) *html.Node {
|
|
if root.Type == html.ElementNode && root.Data == name {
|
|
return root
|
|
}
|
|
for c := root.FirstChild; c != nil; c = c.NextSibling {
|
|
el := FindFirstElement(c, name)
|
|
if el != nil {
|
|
return el
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func Scrape(url string) {
|
|
res, err := http.Get(url)
|
|
if err != nil {
|
|
LogList.Log(err.Error())
|
|
return
|
|
}
|
|
defer res.Body.Close()
|
|
|
|
root, err := html.Parse(res.Body)
|
|
if err != nil {
|
|
LogList.Log(err.Error())
|
|
return
|
|
}
|
|
|
|
ol := FindFirstElement(root, "ol")
|
|
|
|
books := []Book{}
|
|
|
|
for c := ol.FirstChild; c != nil; c = c.NextSibling {
|
|
if c.Type == html.ElementNode && c.Data == "li" {
|
|
article := FindFirstElement(c, "article")
|
|
article.RemoveChild(FindFirstElement(article, "div"))
|
|
h3 := FindFirstElement(article, "h3")
|
|
title := h3.FirstChild.Attr[1].Val
|
|
productPrice := FindFirstElement(article, "div")
|
|
price := productPrice.FirstChild.NextSibling.FirstChild.Data
|
|
isAvailable := productPrice.FirstChild.NextSibling.NextSibling.NextSibling.FirstChild.NextSibling.NextSibling.Data
|
|
isAvailable = strings.ReplaceAll(isAvailable, "\n", "")
|
|
isAvailable = strings.ReplaceAll(isAvailable, " ", "")
|
|
|
|
if len(title) > 0 && len(price) > 0 && len(isAvailable) > 0 {
|
|
books = append(books, Book{Title: title, Price: price, Available: isAvailable == "Instock"})
|
|
} else {
|
|
LogList.Log(fmt.Sprintf("Error: %s %s %s", title, price, isAvailable))
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
file, err := os.Create("books.csv")
|
|
if err != nil {
|
|
LogList.Log(err.Error())
|
|
return
|
|
}
|
|
defer file.Close()
|
|
|
|
writer := csv.NewWriter(file)
|
|
defer writer.Flush()
|
|
|
|
err = writer.Write([]string{"Title", "Price", "Available"})
|
|
if err != nil {
|
|
LogList.Log(err.Error())
|
|
return
|
|
}
|
|
|
|
for _, book := range books {
|
|
err = writer.Write([]string{book.Title, book.Price, fmt.Sprintf("%t", book.Available)})
|
|
if err != nil {
|
|
LogList.Log(err.Error())
|
|
return
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
func main() {
|
|
url := "https://books.toscrape.com/catalogue/category/books/travel_2/index.html"
|
|
Scrape(url)
|
|
LogList.Log("Done!")
|
|
f, err := os.Create("logs.txt")
|
|
if err != nil {
|
|
fmt.Println(err)
|
|
}
|
|
defer f.Close()
|
|
_, err = f.Write([]byte(time.Now().String() + ":" + url + "\n" + LogList.ToString()))
|
|
if err != nil {
|
|
fmt.Println(err)
|
|
}
|
|
}
|
|
|