Made all Python files PEP-8 Compatible
This commit is contained in:
parent
5b17627504
commit
87d1041517
24
Fourmi.py
24
Fourmi.py
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Fourmi - An internet webcrawler searching for information on chemical compounds.
|
||||
[todo] - Add some more useful text here.
|
||||
Fourmi - An internet webcrawler searching for information on chemical
|
||||
compounds. [todo] - Add some more useful text here.
|
||||
"""
|
||||
|
||||
from twisted.internet import reactor
|
||||
@ -10,17 +10,19 @@ from scrapy import log, signals
|
||||
from FourmiCrawler.spiders.Fourmispider import FourmiSpider
|
||||
from scrapy.utils.project import get_project_settings
|
||||
|
||||
|
||||
def setup_crawler(searchable):
|
||||
spider = FourmiSpider(compound=searchable) # [todo] - Do something smart to get the different spiders to work here.
|
||||
settings = get_project_settings()
|
||||
crawler = Crawler(settings)
|
||||
crawler.configure()
|
||||
crawler.crawl(spider)
|
||||
crawler.start()
|
||||
spider = FourmiSpider(compound=searchable)
|
||||
settings = get_project_settings()
|
||||
crawler = Crawler(settings)
|
||||
crawler.configure()
|
||||
crawler.crawl(spider)
|
||||
crawler.start()
|
||||
|
||||
|
||||
def start():
|
||||
setup_crawler("Methane")
|
||||
log.start()
|
||||
reactor.run()
|
||||
setup_crawler("Methane")
|
||||
log.start()
|
||||
reactor.run()
|
||||
|
||||
start()
|
||||
|
@ -5,9 +5,10 @@
|
||||
|
||||
from scrapy.item import Item, Field
|
||||
|
||||
|
||||
class Result(Item):
|
||||
attribute = Field()
|
||||
value = Field()
|
||||
source = Field()
|
||||
reliability = Field()
|
||||
conditions = Field()
|
||||
conditions = Field()
|
||||
|
@ -14,5 +14,7 @@ ITEM_PIPELINES = {
|
||||
'FourmiCrawler.pipelines.FourmiPipeline': 100
|
||||
}
|
||||
|
||||
# Crawl responsibly by identifying yourself (and your website) on the user-agent
|
||||
#USER_AGENT = 'FourmiCrawler (+http://www.yourdomain.com)'
|
||||
# Crawl responsibly by identifying yourself (and your website) on the
|
||||
# user-agent
|
||||
|
||||
# USER_AGENT = 'FourmiCrawler (+http://www.yourdomain.com)'
|
||||
|
@ -1,12 +1,15 @@
|
||||
from scrapy.spider import Spider
|
||||
|
||||
class FourmiSpider(Spider):
|
||||
name="FourmiSpider"
|
||||
|
||||
def __init__(self, compound=None, *args, **kwargs):
|
||||
super(FourmiSpider, self).__init__(*args, **kwargs)
|
||||
# [TODO] - Initiate all parsers for the different websites and get allowed URLs.
|
||||
|
||||
def parse(self, reponse):
|
||||
# [TODO] - This function should delegate it's functionality to other parsers.
|
||||
pass
|
||||
class FourmiSpider(Spider):
|
||||
name = "FourmiSpider"
|
||||
|
||||
def __init__(self, compound=None, *args, **kwargs):
|
||||
super(FourmiSpider, self).__init__(*args, **kwargs)
|
||||
# [TODO] - Initiate all parsers for the different websites and get
|
||||
# allowed URLs.
|
||||
|
||||
def parse(self, reponse):
|
||||
# [TODO] - This function should delegate it's functionality to other
|
||||
# parsers.
|
||||
pass
|
||||
|
Reference in New Issue
Block a user