Skip to content

403 Forbidden but postman return 200 #831

@C-L-STARK

Description

@C-L-STARK

https://pixabay.com/zh/photos/search/?order=ec&pagi=1

we want use colly to get some images from this website.
but we got 403; use postman return 200. why ?

package main

import (
	"strconv"

	"github.com/gocolly/colly"
)

func main() {
	c := colly.NewCollector(
		// MaxDepth is 2, so only the links on the scraped page
		// and links on those pages are visited
		colly.Async(true),
	)
	c.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60"
	c.Limit(&colly.LimitRule{DomainGlob: "*", Parallelism: 4})

	// Find and visit all links
	c.OnHTML("script", func(e *colly.HTMLElement) {
		if e.Attr("type") == "application/ld+json" {
			// parse inner content
			content := e.Text
			println(content)
		} else {
			println(e.Text)
		}
		e.Request.Visit(e.Attr("src"))
	})

	c.OnRequest(func(r *colly.Request) {
		println(r.URL.String())
	})

	c.OnError(func(r *colly.Response, e error) {
		println(r.StatusCode)
		println(e.Error())
	})

	for i := 1; i < 2; i++ {
		c.Visit("https://pixabay.com/zh/photos/search/?order=ec&pagi=" + strconv.Itoa(i))
	}

	c.Wait()
}
[Running] go run "~/pixabay_spider/main.go"
https://pixabay.com/zh/photos/search/?order=ec&pagi=1
403
Forbidden

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions