Archived
1
0

Added documentation for the sourceloader

This commit is contained in:
Jip J. Dekker 2014-06-01 20:01:19 +02:00
parent e272c9f342
commit a040bc7a02

View File

@ -1,6 +1,7 @@
import inspect import inspect
import os import os
import re import re
from FourmiCrawler.sources.source import Source from FourmiCrawler.sources.source import Source
@ -8,6 +9,10 @@ class SourceLoader:
sources = [] sources = []
def __init__(self, rel_dir="FourmiCrawler/sources"): def __init__(self, rel_dir="FourmiCrawler/sources"):
"""
The initiation of a SourceLoader, selects and indexes a directory for usable sources.
:param rel_dir: A relative path to a directory.
"""
path = os.path.dirname(os.path.abspath(__file__)) path = os.path.dirname(os.path.abspath(__file__))
path += "/" + rel_dir path += "/" + rel_dir
known_parser = set() known_parser = set()
@ -21,18 +26,30 @@ class SourceLoader:
known_parser.add(cls) known_parser.add(cls)
def include(self, source_names): def include(self, source_names):
"""
This function excludes all sources that don't match the given regular expressions.
:param source_names: A list of regular expression (strings)
"""
new = set() new = set()
for name in source_names: for name in source_names:
new.update([src for src in self.sources if re.match(name, src.__class__.__name__)]) new.update([src for src in self.sources if re.match(name, src.__class__.__name__)])
self.sources = list(new) self.sources = list(new)
def exclude(self, source_names): def exclude(self, source_names):
"""
This function excludes all sources that match the given regular expressions.
:param source_names: A list of regular expression (strings)
"""
exclude = [] exclude = []
for name in source_names: for name in source_names:
exclude.extend([src for src in self.sources if re.match(name, src.__class__.__name__)]) exclude.extend([src for src in self.sources if re.match(name, src.__class__.__name__)])
self.sources = [src for src in self.sources if src not in exclude] self.sources = [src for src in self.sources if src not in exclude]
def __str__(self): def __str__(self):
"""
This function returns a string with all sources currently available in the SourceLoader.
:return: a string with all available sources.
"""
string = "" string = ""
for src in self.sources: for src in self.sources:
string += "Source: " + src.__class__.__name__ string += "Source: " + src.__class__.__name__