@@ -44,13 +44,15 @@ Disallow: /disallowed
44
44
Disallow: /allowed*q=
45
45
`
46
46
47
- const testXml = `<?xml version="1.0" encoding="UTF-8"?>
47
+ const testXML = `<?xml version="1.0" encoding="UTF-8"?>
48
48
<page>
49
49
<title>Test Page</title>
50
50
<paragraph type="description">This is a test page</paragraph>
51
51
<paragraph type="description">This is a test paragraph</paragraph>
52
52
</page>`
53
53
54
+ const custom404 = `404 not found`
55
+
54
56
func newTestServer () * httptest.Server {
55
57
mux := http .NewServeMux ()
56
58
@@ -77,13 +79,17 @@ func newTestServer() *httptest.Server {
77
79
78
80
mux .HandleFunc ("/xml" , func (w http.ResponseWriter , r * http.Request ) {
79
81
w .Header ().Set ("Content-Type" , "application/xml" )
80
- w .Write ([]byte (testXml ))
82
+ w .Write ([]byte (testXML ))
81
83
})
82
84
83
85
mux .HandleFunc ("/test.xml.gz" , func (w http.ResponseWriter , r * http.Request ) {
84
86
ww := gzip .NewWriter (w )
85
87
defer ww .Close ()
86
- ww .Write ([]byte (testXml ))
88
+ ww .Write ([]byte (testXML ))
89
+ })
90
+
91
+ mux .HandleFunc ("/nonexistent.xml.gz" , func (w http.ResponseWriter , r * http.Request ) {
92
+ http .Error (w , custom404 , http .StatusNotFound )
87
93
})
88
94
89
95
mux .HandleFunc ("/login" , func (w http.ResponseWriter , r * http.Request ) {
@@ -1431,6 +1437,35 @@ func TestCollectorOnXMLWithXMLCompressed(t *testing.T) {
1431
1437
testCollectorOnXMLWithXML (t , "/test.xml.gz" )
1432
1438
}
1433
1439
1440
+ func TestCollectorNonexistentXMLGZ (t * testing.T ) {
1441
+ // This is a regression test for colly
1442
+ // attempting to decompress all .xml.gz URLs
1443
+ // even if they're not compressed.
1444
+ ts := newTestServer ()
1445
+ defer ts .Close ()
1446
+
1447
+ c := NewCollector (ParseHTTPErrorResponse ())
1448
+
1449
+ onResponseCalled := false
1450
+
1451
+ c .OnResponse (func (resp * Response ) {
1452
+ onResponseCalled = true
1453
+ if got , want := strings .TrimSpace (string (resp .Body )), custom404 ; got != want {
1454
+ t .Errorf ("wrong response body got=%q want=%q" , got , want )
1455
+ }
1456
+ })
1457
+
1458
+ c .OnError (func (resp * Response , err error ) {
1459
+ t .Errorf ("called on OnError: err=%v" , err )
1460
+ })
1461
+
1462
+ c .Visit (ts .URL + "/nonexistent.xml.gz" )
1463
+
1464
+ if ! onResponseCalled {
1465
+ t .Error ("OnResponse was not called" )
1466
+ }
1467
+ }
1468
+
1434
1469
func TestCollectorVisitWithTrace (t * testing.T ) {
1435
1470
ts := newTestServer ()
1436
1471
defer ts .Close ()
0 commit comments