Archived
1
0
This repository has been archived on 2025-03-03. You can view files and clone it, but cannot push or open issues or pull requests.
Fourmi/sourceloader.py
2014-04-16 10:48:29 +02:00

38 lines
1.3 KiB
Python

import inspect
import os
import re
from FourmiCrawler.parsers.parser import Parser
class SourceLoader:
sources = []
def __init__(self, rel_dir="FourmiCrawler/parsers"):
path = os.path.dirname(os.path.abspath(__file__))
path += "/" + rel_dir
known_parser = set()
for py in [f[:-3] for f in os.listdir(path) if f.endswith('.py') and f != '__init__.py']:
mod = __import__('.'.join([rel_dir.replace("/", "."), py]), fromlist=[py])
classes = [getattr(mod, x) for x in dir(mod) if inspect.isclass(getattr(mod, x))]
for cls in classes:
if issubclass(cls, Parser) and cls not in known_parser:
self.sources.append(cls()) # [review] - Would we ever need arguments for the parsers?
known_parser.add(cls)
def include(self, source_names):
new = []
for name in source_names:
new.extend([src for src in self.sources if re.match(name, src.__class__.__name__)])
self.sources = new
def exclude(self, source_names):
pass # [todo] - implement source exclusion.
def __str__(self):
string = ""
for src in self.sources:
string += "Source: " + src.__class__.__name__
string += " - "
string += "URI: " + src.website + "\n"
return string