@@ -5,44 +5,48 @@ module Squirm
55 getter spiders : Synchronized (Array (Spider )) = Synchronized (Array (Spider )).new
66
77 def add_spider (spider : Spider )
8- RequestStorage .instance.store(spider, spider.start_urls) if spider.start_requests.empty?
9- RequestStorage .instance.store(spider, spider.start_requests)
8+ RequestStorage .instance.store(spider.id , spider.start_urls) if spider.start_requests.empty?
9+ RequestStorage .instance.store(spider.id , spider.start_requests)
1010
1111 @spiders .push(spider)
12+ end
1213
13- spawn do
14- pool = Pool .new(spider.concurrent_requests_per_domain)
15- fetcher = spider.fetcher
14+ def run
15+ spiders.each do |spider |
16+ spawn do
17+ pool = Pool .new(spider.concurrent_requests_per_domain)
18+ fetcher = spider.fetcher
1619
17- loop do
18- unless RequestStorage .instance.empty?(spider)
19- request = RequestStorage .instance.pop!(spider)
20- request.spider = spider
20+ loop do
21+ unless RequestStorage .instance.empty?(spider.id )
22+ request = RequestStorage .instance.pop!(spider.id )
23+ request.spider = spider
2124
22- pool.spawn do
23- begin
24- response = fetcher.fetch(request)
25+ pool.spawn do
26+ begin
27+ response = fetcher.fetch(request)
2528
26- parsed_item = spider.parse_item(request, response)
27- parse(spider, parsed_item)
29+ parsed_item = spider.parse_item(request, response)
30+ parse(spider, parsed_item)
2831
29- sleep(spider.timeout )
30- rescue exception : Crest ::RequestFailed
31- status_code = exception.response.status_code.to_i
32+ sleep(spider.request_timeout )
33+ rescue exception : Crest ::RequestFailed
34+ status_code = exception.response.status_code.to_i
3235
33- case status_code
34- when 429 , 500 ..511
35- Log .error(exception: exception) { exception.message }
36+ case status_code
37+ when 429 , 500 ..511
38+ Log .error(exception: exception) { exception.message }
3639
37- if request.retriable?
38- request.retry
39- RequestStorage .instance.store(spider, request)
40+ if request.retriable?
41+ request.retry
42+ RequestStorage .instance.store(spider.id, request)
43+ end
44+ else
45+ Log .error(exception: exception) { " Dropping the request, failed to get a response status code which could be used to recover a request." }
4046 end
41- else
42- Log .error(exception: exception) { " Dropping the request, failed to get a response status code which could be used to recover a request ." }
47+ rescue exception : Exception
48+ Log .error(exception: exception) { " Dropping the request, a non HTTP error occured ." }
4349 end
44- rescue exception : Exception
45- Log .error(exception: exception) { " Dropping the request, a non HTTP error occured." }
4650 end
4751 end
4852 end
@@ -52,7 +56,7 @@ module Squirm
5256
5357 def remove_spider (spider : Spider )
5458 spider.cache.flush
55- RequestStorage .instance.flush(spider)
59+ RequestStorage .instance.flush(spider.id )
5660 @spiders .delete(spider)
5761 end
5862
@@ -62,7 +66,7 @@ module Squirm
6266 end
6367
6468 private def parse_requests (spider : Spider , parsed_item : ParsedItem )
65- RequestStorage .instance.store(spider, parsed_item.requests)
69+ RequestStorage .instance.store(spider.id , parsed_item.requests)
6670 end
6771
6872 private def parse_items (spider : Spider , parsed_item : ParsedItem )
0 commit comments