diff --git a/dexter.core b/dexter.core
new file mode 100644
index 00000000..488691b8
--- /dev/null
+++ b/dexter.core
@@ -0,0 +1,283 @@
+%!PS-Adobe-3.0
+%%Creator: (ImageMagick)
+%%Title: (dexter.core)
+%%CreationDate: (2018-02-13T12:57:01+02:00)
+%%BoundingBox: 2462 297 2496 298
+%%HiResBoundingBox: 2462 297 2496 298
+%%DocumentData: Clean7Bit
+%%LanguageLevel: 1
+%%Orientation: Portrait
+%%PageOrder: Ascend
+%%Pages: 1
+%%EndComments
+
+%%BeginDefaults
+%%EndDefaults
+
+%%BeginProlog
+%
+% Display a color image.  The image is displayed in color on
+% Postscript viewers or printers that support color, otherwise
+% it is displayed as grayscale.
+%
+/DirectClassPacket
+{
+  %
+  % Get a DirectClass packet.
+  %
+  % Parameters:
+  %   red.
+  %   green.
+  %   blue.
+  %   length: number of pixels minus one of this color (optional).
+  %
+  currentfile color_packet readhexstring pop pop
+  compression 0 eq
+  {
+    /number_pixels 3 def
+  }
+  {
+    currentfile byte readhexstring pop 0 get
+    /number_pixels exch 1 add 3 mul def
+  } ifelse
+  0 3 number_pixels 1 sub
+  {
+    pixels exch color_packet putinterval
+  } for
+  pixels 0 number_pixels getinterval
+} bind def
+
+/DirectClassImage
+{
+  %
+  % Display a DirectClass image.
+  %
+  systemdict /colorimage known
+  {
+    columns rows 8
+    [
+      columns 0 0
+      rows neg 0 rows
+    ]
+    { DirectClassPacket } false 3 colorimage
+  }
+  {
+    %
+    % No colorimage operator;  convert to grayscale.
+    %
+    columns rows 8
+    [
+      columns 0 0
+      rows neg 0 rows
+    ]
+    { GrayDirectClassPacket } image
+  } ifelse
+} bind def
+
+/GrayDirectClassPacket
+{
+  %
+  % Get a DirectClass packet;  convert to grayscale.
+  %
+  % Parameters:
+  %   red
+  %   green
+  %   blue
+  %   length: number of pixels minus one of this color (optional).
+  %
+  currentfile color_packet readhexstring pop pop
+  color_packet 0 get 0.299 mul
+  color_packet 1 get 0.587 mul add
+  color_packet 2 get 0.114 mul add
+  cvi
+  /gray_packet exch def
+  compression 0 eq
+  {
+    /number_pixels 1 def
+  }
+  {
+    currentfile byte readhexstring pop 0 get
+    /number_pixels exch 1 add def
+  } ifelse
+  0 1 number_pixels 1 sub
+  {
+    pixels exch gray_packet put
+  } for
+  pixels 0 number_pixels getinterval
+} bind def
+
+/GrayPseudoClassPacket
+{
+  %
+  % Get a PseudoClass packet;  convert to grayscale.
+  %
+  % Parameters:
+  %   index: index into the colormap.
+  %   length: number of pixels minus one of this color (optional).
+  %
+  currentfile byte readhexstring pop 0 get
+  /offset exch 3 mul def
+  /color_packet colormap offset 3 getinterval def
+  color_packet 0 get 0.299 mul
+  color_packet 1 get 0.587 mul add
+  color_packet 2 get 0.114 mul add
+  cvi
+  /gray_packet exch def
+  compression 0 eq
+  {
+    /number_pixels 1 def
+  }
+  {
+    currentfile byte readhexstring pop 0 get
+    /number_pixels exch 1 add def
+  } ifelse
+  0 1 number_pixels 1 sub
+  {
+    pixels exch gray_packet put
+  } for
+  pixels 0 number_pixels getinterval
+} bind def
+
+/PseudoClassPacket
+{
+  %
+  % Get a PseudoClass packet.
+  %
+  % Parameters:
+  %   index: index into the colormap.
+  %   length: number of pixels minus one of this color (optional).
+  %
+  currentfile byte readhexstring pop 0 get
+  /offset exch 3 mul def
+  /color_packet colormap offset 3 getinterval def
+  compression 0 eq
+  {
+    /number_pixels 3 def
+  }
+  {
+    currentfile byte readhexstring pop 0 get
+    /number_pixels exch 1 add 3 mul def
+  } ifelse
+  0 3 number_pixels 1 sub
+  {
+    pixels exch color_packet putinterval
+  } for
+  pixels 0 number_pixels getinterval
+} bind def
+
+/PseudoClassImage
+{
+  %
+  % Display a PseudoClass image.
+  %
+  % Parameters:
+  %   class: 0-PseudoClass or 1-Grayscale.
+  %
+  currentfile buffer readline pop
+  token pop /class exch def pop
+  class 0 gt
+  {
+    currentfile buffer readline pop
+    token pop /depth exch def pop
+    /grays columns 8 add depth sub depth mul 8 idiv string def
+    columns rows depth
+    [
+      columns 0 0
+      rows neg 0 rows
+    ]
+    { currentfile grays readhexstring pop } image
+  }
+  {
+    %
+    % Parameters:
+    %   colors: number of colors in the colormap.
+    %   colormap: red, green, blue color packets.
+    %
+    currentfile buffer readline pop
+    token pop /colors exch def pop
+    /colors colors 3 mul def
+    /colormap colors string def
+    currentfile colormap readhexstring pop pop
+    systemdict /colorimage known
+    {
+      columns rows 8
+      [
+        columns 0 0
+        rows neg 0 rows
+      ]
+      { PseudoClassPacket } false 3 colorimage
+    }
+    {
+      %
+      % No colorimage operator;  convert to grayscale.
+      %
+      columns rows 8
+      [
+        columns 0 0
+        rows neg 0 rows
+      ]
+      { GrayPseudoClassPacket } image
+    } ifelse
+  } ifelse
+} bind def
+
+/DisplayImage
+{
+  %
+  % Display a DirectClass or PseudoClass image.
+  %
+  % Parameters:
+  %   x & y translation.
+  %   x & y scale.
+  %   label pointsize.
+  %   image label.
+  %   image columns & rows.
+  %   class: 0-DirectClass or 1-PseudoClass.
+  %   compression: 0-none or 1-RunlengthEncoded.
+  %   hex color packets.
+  %
+  gsave
+  /buffer 512 string def
+  /byte 1 string def
+  /color_packet 3 string def
+  /pixels 768 string def
+
+  currentfile buffer readline pop
+  token pop /x exch def
+  token pop /y exch def pop
+  x y translate
+  currentfile buffer readline pop
+  token pop /x exch def
+  token pop /y exch def pop
+  currentfile buffer readline pop
+  token pop /pointsize exch def pop
+  /Times-Roman findfont pointsize scalefont setfont
+  x y scale
+  currentfile buffer readline pop
+  token pop /columns exch def
+  token pop /rows exch def pop
+  currentfile buffer readline pop
+  token pop /class exch def pop
+  currentfile buffer readline pop
+  token pop /compression exch def pop
+  class 0 gt { PseudoClassImage } { DirectClassImage } ifelse
+  grestore
+  showpage
+} bind def
+%%EndProlog
+%%Page:  1 1
+%%PageBoundingBox: 2462 297 2496 298
+DisplayImage
+2462 297
+34 1
+12
+34 1
+0
+0
+B2B4BBB2B4BBB2B4BBB2B4BBB2B4BBB2B4BBB2B4BBB2B4BBB2B4BBB2B4BBB2B4BBB2B4BBB2B4BB
+B2B4BBB2B4BBB2B4BBB2B4BBB2B4BBB2B4BBB2B4BBB2B4BBB2B4BBB2B4BBB2B4BBB2B4BBB2B4BB
+B2B4BBB2B4BBB2B4BBB2B4BBB2B4BBB2B4BBB2B4BBB2B4BB
+
+%%PageTrailer
+%%Trailer
+%%EOF
diff --git a/dexter/models/country.py b/dexter/models/country.py
index 06969d77..84f88ae5 100644
--- a/dexter/models/country.py
+++ b/dexter/models/country.py
@@ -50,6 +50,10 @@ def create_defaults(cls):
 Germany|de
 United Kingdom (Great Britain)|gb
 Kenya|ke
+Nigeria|ng
+France|fr
+United States of America|us
+China|cn
             """
 
         countries = []
diff --git a/dexter/models/fdi.py b/dexter/models/fdi.py
index f18fccbd..beeea7f0 100644
--- a/dexter/models/fdi.py
+++ b/dexter/models/fdi.py
@@ -775,7 +775,7 @@ class Involvements2(db.Model):
     __tablename__ = "involvements2"
 
     id = Column(Integer, primary_key=True)
-    name = Column(String(50), index=True, nullable=False, unique=True)
+    name = Column(String(128), index=True, nullable=False, unique=True)
 
     def __repr__(self):
         return "<Involvements2='%s'>" % (self.name)
@@ -878,7 +878,7 @@ class Involvements3(db.Model):
     __tablename__ = "involvements3"
 
     id = Column(Integer, primary_key=True)
-    name = Column(String(50), index=True, nullable=False, unique=True)
+    name = Column(String(128), index=True, nullable=False, unique=True)
 
     def __repr__(self):
         return "<Involvements3='%s'>" % (self.name)
diff --git a/dexter/models/medium.py b/dexter/models/medium.py
index 87f34556..a5876249 100644
--- a/dexter/models/medium.py
+++ b/dexter/models/medium.py
@@ -40,7 +40,8 @@ def is_tld_exception(cls, url):
         """
         url_exceptions = [
             'thecitizen.co.tz',
-            'dailynews.co.tz'
+            'dailynews.co.tz',
+            'mathewnyaungwa.blogspot.co.za'
         ]
         for ex in url_exceptions: 
             # check if it exists in the url add buffer for [https://www.] characters at start
@@ -51,10 +52,12 @@ def is_tld_exception(cls, url):
 
     @classmethod
     def for_url(cls, url):
+        sub_domain_exception_list = [
+            'blogspot.co.za'
+        ]
         domain = get_tld(url, fail_silently=True)
         # fail silently
-        
-        if domain is None:
+        if domain is None or domain in sub_domain_exception_list:
             domain = cls.is_tld_exception(url)
         
         if domain is None:
@@ -175,6 +178,56 @@ def create_defaults(cls):
 The East African|online|theeastafrican.co.ke||ke
 Daily News (Tanzania)|online|dailynews.co.tz||tz
 Daily News (Zimbabwe)|online|dailynews.co.zw||tz
+SAVCA|online|savca.co.za||za
+How We Made It In Africa|online|howwemadeitinafrica.com||za
+Rhodes University (MathewYaungwaBlog)|online|mathewnyaungwa.blogspot.co.za||za
+World Stage|online|worldstagegroup.com||ng
+Classic FM|online|classic97.net||ng
+Agence France Presse|online|afp.com||fr
+Naija News Agency|online|naijanewsagency.com||ng
+Daily Trust Newspaper|online|dailytrust.com.ng||ng
+Daily Telegraph New Telegraph Online|online|newtelegraphonline.com||ng
+The Point|online|thepointng.com||ng
+The Daily Times|online|dailytimes.ng||ng
+The Nation Online|online|thenationonlineng.net||ng
+Media Max Network|online|mediamaxnetwork.co.ke||ke
+Leadership|online|leadership.ng||ng
+The Interview|online|theinterview.com.ng||ng
+RSA Parliament|online|parliament.gov.za||za
+Guardian|online|guardian.ng||ng
+Naitional Daily Nigeria|online|nationaldailyng.com||ng
+Nigerian Television Authority|online|nta.ng||ng
+ACDIVOCA|online|acdivoca.org||us
+This Day Live|online|thisdaylive.com||ng
+Channel Africa|online|channelafrica.co.za||za
+News Agency Of Nigeria|online|nan.ng||ng
+Nigeria Today|online|nigeriatoday.ng||ng
+Business Day Online|online|businessdayonline.com||ng
+Standard Media KTN News|online|standardmedia.co.ke/ktnnews||ke
+Global Times China|online|globaltimes.cn||cn
+National Mirror|online|nationalmirroronline.net||ng
+Monitor Kenya|online|monitor.co.ke||ke
+Newsverge|online|newsverge.com||ng
+Sundiata Post|online|sundiatapost.com||ng
+Agrilinks|online|agrilinks.org||us
+Business Daily Africa|online|businessdailyafrica.com||ke
+The Business Post|online|thebusinesspost.ng||ng
+The Guardian UK|online|theguardian.com||gb
+Independent NG|online|independent.ng||ng
+The Nerve Africa|online|thenerveafrica.com||ng
+Ameh News|online|amehnews.com||ng
+Sun News Online|online|sunnewsonline.com||ng
+Seed Magazine|online|seedmagazine.co.ke||ke
+Business Hallmark News|online|hallmarknews.com||ng
+Destiny Connect|online|destinyconnect.com||za
+The Economist|online|economist.com||us
+Washington Post|online|washingtonpost.com||us
+Ama Bhungane|online|amabhungane.co.za||za
+Africa Investor|online|africainvestor.com||za
+Outrepreneurs|online|outrepreneurs.com||ng
+CNBC Africa|online|cnbcafrica.com||za
+Plan International|online|plan-international.org||gb
+Bloomberg|online|bloomberg.com||za
             """
 
         mediums = []
diff --git a/dexter/models/seeds.py b/dexter/models/seeds.py
index 03157157..91c62851 100644
--- a/dexter/models/seeds.py
+++ b/dexter/models/seeds.py
@@ -4,80 +4,80 @@
 def seed_db(db):
     """ Add seed entities to the database. """
     with app.app_context():
-        # for x in AnalysisNature.create_defaults():
-        #     db.session.add(x)
-        #
-        # for x in Country.create_defaults():
-        #     db.session.add(x)
-        # db.session.flush()
-        #
-        # for x in User.create_defaults():
-        #     db.session.add(x)
-        #
-        # for x in Medium.create_defaults():
-        #     db.session.add(x)
-        #
-        # for x in Gender.create_defaults():
-        #     db.session.add(x)
-        #
-        # for x in Race.create_defaults():
-        #     db.session.add(x)
-        #
-        # for x in SourceFunction.create_defaults():
-        #     db.session.add(x)
-        #
-        # for x in Topic.create_defaults():
-        #     db.session.add(x)
-        #
-        # for x in DocumentType.create_defaults():
-        #     db.session.add(x)
-        #
-        # for x in AuthorType.create_defaults():
-        #     db.session.add(x)
-        #
-        # for x in Issue.create_defaults():
-        #     db.session.add(x)
-        #
-        # for x in Fairness.create_defaults():
-        #     db.session.add(x)
-        #
-        # for x in Affiliation.create_defaults():
-        #     db.session.add(x)
-        #
-        # for x in SourceRole.create_defaults():
-        #     db.session.add(x)
-        #
-        # for x in InvestmentType.create_defaults():
-        #     db.session.add(x)
-        #
-        # for x in InvestmentOrigins.create_defaults():
-        #     db.session.add(x)
-        #
-        # for x in Sectors.create_defaults():
-        #     db.session.add(x)
-        #
-        # for x in Phases.create_defaults():
-        #     db.session.add(x)
-        #
-        # for x in Currencies.create_defaults():
-        #     db.session.add(x)
-        #
-        # for x in Industries.create_defaults():
-        #     db.session.add(x)
-        #
+        for x in AnalysisNature.create_defaults():
+            db.session.add(x)
+        
+        for x in Country.create_defaults():
+            db.session.add(x)
+        db.session.flush()
+        
+        for x in User.create_defaults():
+            db.session.add(x)
+        
+        for x in Medium.create_defaults():
+            db.session.add(x)
+        
+        for x in Gender.create_defaults():
+            db.session.add(x)
+        
+        for x in Race.create_defaults():
+            db.session.add(x)
+        
+        for x in SourceFunction.create_defaults():
+            db.session.add(x)
+        
+        for x in Topic.create_defaults():
+            db.session.add(x)
+        
+        for x in DocumentType.create_defaults():
+            db.session.add(x)
+        
+        for x in AuthorType.create_defaults():
+            db.session.add(x)
+        
+        for x in Issue.create_defaults():
+            db.session.add(x)
+        
+        for x in Fairness.create_defaults():
+            db.session.add(x)
+        
+        for x in Affiliation.create_defaults():
+            db.session.add(x)
+        
+        for x in SourceRole.create_defaults():
+            db.session.add(x)
+        
+        for x in InvestmentType.create_defaults():
+            db.session.add(x)
+        
+        for x in InvestmentOrigins.create_defaults():
+            db.session.add(x)
+        
+        for x in Sectors.create_defaults():
+            db.session.add(x)
+        
+        for x in Phases.create_defaults():
+            db.session.add(x)
+        
+        for x in Currencies.create_defaults():
+            db.session.add(x)
+        
+        for x in Industries.create_defaults():
+            db.session.add(x)
+        
         # for x in Involvements.create_defaults():
         #     db.session.add(x)
-        #
-        # for x in ValueUnits.create_defaults():
-        #     db.session.add(x)
-        #
-        # db.session.flush()
-        #
-        # for x in Principle.create_defaults():
-        #     db.session.add(x)
-        #
-        # for x in Role.create_defaults():
-        #     db.session.add(x)
+        
+        for x in ValueUnits.create_defaults():
+            db.session.add(x)
+        
+        db.session.flush()
+        
+        for x in Principle.create_defaults():
+            db.session.add(x)
+        
+        for x in Role.create_defaults():
+            db.session.add(x)
 
         for x in Provinces.create_defaults():
             db.session.add(x)
diff --git a/dexter/processing/crawlers/__init__.py b/dexter/processing/crawlers/__init__.py
index 878dd9f3..a1e92fed 100644
--- a/dexter/processing/crawlers/__init__.py
+++ b/dexter/processing/crawlers/__init__.py
@@ -18,4 +18,54 @@
 from .newsdayzw import NewsDayZWCrawler
 from .dwcom import DWCrawler
 from .chroniclezw import ChronicleZWCrawler
-from .bbc import BBCCrawler
\ No newline at end of file
+from .bbc import BBCCrawler
+from .howwemadeitinafrica import HowWeMadeItInAfricaCrawler
+from .savca import SAVCACrawler
+from .rhodesunimathewblog import RhodesUniMathewBlogCrawler
+from .worldstage import WorldStageCrawler
+from .classicfm import ClassicFMCrawler
+from .afp import AFPCrawler
+from .naijanews import NaijaNewsCrawler
+from .dailytrustnp import DailyTrustNPCrawler
+from .newteleonline import NewTeleOnlineCrawler
+from .thepoint import ThePointCrawler
+from .dailytimes import DailyTimesCrawler
+from .thenation import TheNationCrawler
+from .mediamaxnet import MediaMaxNetCrawler
+from .leadership import LeadershipCrawler
+from .theinterview import TheInterviewCrawler
+from .rsaparliament import RSAParliamentCrawler
+from .guardian import GuardianCrawler
+from .nationaldailyng import NationalDailyNgCrawler
+from .nta import NTACrawler
+from .acdivoca import ACDIVOCACrawler
+from .thisdaylive import ThisDayLiveCrawler
+from .channelafrica import ChannelAfricaCrawler
+from .nan import NANCrawler
+from .nigeriatoday import NigeriaTodayCrawler
+from .businessdayonline import BusinessDayOnlineCrawler
+from .standardmediaktnnews import StandardMediaKTNCrawler
+from .globaltimescn import GlobalTimesCN
+from .nationalmirror import NationalMirrorCrawler
+from .monitorke import MonitorKECrawler
+from .newsverge import NewsvergeCrawler
+from .sundiatapost import SundiataPostCrawler
+from .agrilinks import AgrilinksCrawler
+from .businessdailyafrica import BusinessDailyAfricaCrawler
+from .thebusinesspost import TheBusinessPostCrawler
+from .theguardianuk import TheGuardianUKCrawler
+from .independentng import IndependentNGCrawler
+from .thenerveafrica import TheNerveAfricaCrawler
+from .amehnews import AmehNewsCrawler
+from .sunnewsonline import SunNewsOnlineCrawler
+from .seedmagazine import SeedMagazineCrawler
+from .hallmarknews import HallmarkNewsCrawler
+from .destinyconnect import DestinyConnectCrawler
+from .economist import EconomistCrawler
+from .washingtonpost import WashingtonPostCrawler
+from .amabhungane import AmaBhunganeCrawler
+from .africainvestor import AfricaInvestorCrawler
+from .outrepreneurs import OutrepreneursCrawler
+from .cnbcafrica import CNBCAfricaCrawler
+from .planintl import PlanIntlCrawler
+from .bloomberg import BloombergCrawler
\ No newline at end of file
diff --git a/dexter/processing/crawlers/acdivoca.py b/dexter/processing/crawlers/acdivoca.py
new file mode 100644
index 00000000..03cc080c
--- /dev/null
+++ b/dexter/processing/crawlers/acdivoca.py
@@ -0,0 +1,57 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class ACDIVOCACrawler(BaseCrawler):
+    ACDIVO_RE = re.compile('(www\.)?acdivoca.org')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.ACDIVO_RE.match(parts.netloc))
+
+    def fetch(self, url):
+        """
+        Fetch and return the raw HTML for this url.
+        The return content is a unicode string.
+        """
+        self.log.info("Fetching URL: " + url)
+
+        headers = {'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.91 Safari/537.36'}
+
+        r = requests.get(url, headers=headers, timeout=10)
+        # raise an HTTPError on badness
+        r.raise_for_status()
+
+        # this decodes r.content using a guessed encoding
+        return r.text
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(ACDIVOCACrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('#main #page-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('#main .meta-top .date'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('#main .main-content p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:1])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        author = self.extract_plaintext(soup.select('#main .meta-author h3 a'))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
diff --git a/dexter/processing/crawlers/afp.py b/dexter/processing/crawlers/afp.py
new file mode 100644
index 00000000..1d4afe10
--- /dev/null
+++ b/dexter/processing/crawlers/afp.py
@@ -0,0 +1,52 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class AFPCrawler(BaseCrawler):
+    AFP_RE = re.compile('(www\.)?afp.com')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.AFP_RE.match(parts.netloc))
+
+    def canonicalise_url(self, url):
+        """ Strip anchors, etc."""
+
+        # Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+        if '//' not in url:
+            url = '%s%s' % ('https://', url)
+
+        parts = urlparse(url)
+
+        netloc = parts.netloc.strip(':80')
+
+        # force http, strip trailing slash, anchors etc.
+        return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(AFPCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.container .article_content h3.htitle'))
+
+        #gather publish date
+        #date_nodes = soup.select('.container .article_content .article_content_meta .article_content_date span')
+        date = self.extract_plaintext(soup.select('.container .article_content .article_content_meta .article_content_date span'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('.container .article_content .textcontent p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:1])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/africainvestor.py b/dexter/processing/crawlers/africainvestor.py
new file mode 100644
index 00000000..278c1404
--- /dev/null
+++ b/dexter/processing/crawlers/africainvestor.py
@@ -0,0 +1,62 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class AfricaInvestorCrawler(BaseCrawler):
+    AI_RE = re.compile('(www\.)?africainvestor.com')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.AI_RE.match(parts.netloc))
+
+
+    def fetch(self, url):
+        """
+        Fetch and return the raw HTML for this url.
+        The return content is a unicode string.
+        """
+        self.log.info("Fetching URL: " + url)
+
+        headers = {'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.91 Safari/537.36'}
+
+        r = requests.get(url, headers=headers, timeout=10)
+        # raise an HTTPError on badness
+        r.raise_for_status()
+
+        # this decodes r.content using a guessed encoding
+        return r.text
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(AfricaInvestorCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('article.post .td-post-title .entry-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('article.post .td-post-title .td-module-meta-info .td-post-date time.entry-date'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('article.post .td-post-content')
+        text_list = []
+        for node in nodes[0].children:
+            if node.name in ['h5','p']:
+                text_list = text_list + [node]
+        doc.summary = "\n\n".join(p.text.strip() for p in text_list[:1])
+        doc.text = "\n\n".join(p.text.strip() for p in text_list)
+
+        # gather author 
+        author = self.extract_plaintext(soup.select('article.post .td-post-title .td-module-meta-info .td-post-author-name a'))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
diff --git a/dexter/processing/crawlers/agrilinks.py b/dexter/processing/crawlers/agrilinks.py
new file mode 100644
index 00000000..fb8540f3
--- /dev/null
+++ b/dexter/processing/crawlers/agrilinks.py
@@ -0,0 +1,62 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class AgrilinksCrawler(BaseCrawler):
+    A_RE = re.compile('agrilinks.org')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.A_RE.match(parts.netloc))
+
+    def canonicalise_url(self, url):
+        """ Strip anchors, etc."""
+
+        # Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+        if '//' not in url:
+            url = '%s%s' % ('https://', url)
+
+        parts = urlparse(url)
+
+        netloc = parts.netloc.strip(':80')
+
+        # force http, strip trailing slash, anchors etc.
+        return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(AgrilinksCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('#content-main .node-post .field-name-title .page-header'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('#content-main .node-post .group-post-info .field-name-post-date .field-item'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('#content-main .node-post .field-name-body .field-item')
+        child_list = []
+        for child in nodes[0].descendants:
+            if isinstance(child, basestring):
+                child_list.append(child)
+        doc.summary = " ".join(p.strip() for p in child_list[:3])
+        doc.text = " ".join(p for p in child_list)
+
+        # gather author 
+        author = ''
+        author_nodes = soup.select('#content-main .node-post .group-post-info .group-author-name a')
+        if author_nodes:
+            author = ''.join(a.text.strip() for a in author_nodes)
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/amabhungane.py b/dexter/processing/crawlers/amabhungane.py
new file mode 100644
index 00000000..bb043673
--- /dev/null
+++ b/dexter/processing/crawlers/amabhungane.py
@@ -0,0 +1,46 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class AmaBhunganeCrawler(BaseCrawler):
+    AB_RE = re.compile('amabhungane.co.za')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.AB_RE.match(parts.netloc))
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        raw_html = raw_html.encode("utf-8")
+        raw_html = unicode(raw_html, errors='ignore')
+
+        super(AmaBhunganeCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.large-12 .orbit-caption h5'))
+
+        #gather text and summary
+        article_nodes = soup.select('.large-8 > .row')
+        body_nodes = article_nodes[1].select('.large-12 > p')
+        doc.summary = "\n\n".join(p.text.strip() for p in body_nodes[:2])
+        doc.text = "\n\n".join(p.text.strip() for p in body_nodes)
+        
+        date_author = self.extract_plaintext(soup.select('.large-12 .orbit-caption time'))
+        #gather publish date
+        date = date_author[:date_author.index('-') - 1].strip()
+        doc.published_at = self.parse_timestamp(date)
+
+        # gather author 
+        author = date_author[date_author.index('-') + 1:]
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
diff --git a/dexter/processing/crawlers/amehnews.py b/dexter/processing/crawlers/amehnews.py
new file mode 100644
index 00000000..8111e7c7
--- /dev/null
+++ b/dexter/processing/crawlers/amehnews.py
@@ -0,0 +1,41 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class AmehNewsCrawler(BaseCrawler):
+    AN_RE = re.compile('amehnews.com')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.AN_RE.match(parts.netloc))
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(AmehNewsCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('article.post h1.entry-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('article.post .entry-meta .entry-date time.entry-date'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('article.post .entry-content > p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:2])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        author = self.extract_plaintext(soup.select('article.post .entry-meta .entry-author a'))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
diff --git a/dexter/processing/crawlers/bloomberg.py b/dexter/processing/crawlers/bloomberg.py
new file mode 100644
index 00000000..f70df06d
--- /dev/null
+++ b/dexter/processing/crawlers/bloomberg.py
@@ -0,0 +1,70 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class BloombergCrawler(BaseCrawler):
+    B_RE = re.compile('(www\.)?bloomberg.com')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.B_RE.match(parts.netloc))
+
+    def canonicalise_url(self, url):
+        """ Strip anchors, etc."""
+
+        # Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+        if '//' not in url:
+            url = '%s%s' % ('https://', url)
+
+        parts = urlparse(url)
+
+        netloc = parts.netloc.strip(':80')
+
+        # force http, strip trailing slash, anchors etc.
+        return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(BloombergCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('article .lede-text-only .lede-text-only__content .lede-text-only__hed .lede-text-only__highlight'))
+
+        #gather publish date
+        date_node = soup.select('article .lede-text-only .lede-text-only__content time.article-timestamp')
+        date = ''
+        for node in date_node[0].children:
+            if node.name == 'noscript':
+                date = node.text.strip()
+        doc.published_at = self.parse_timestamp(date)
+        
+
+
+        #gather text and summary
+        summary_nodes = soup.select('article .content-well .abstract li')
+        doc.summary = "\n\n".join(p.text.strip() for p in summary_nodes)
+
+        nodes = soup.select('article .content-well .body-copy')
+        text_list = []
+        for node in nodes[0].children:
+            if node.name in ['h3','p']:
+                text_list = text_list + [node]
+        doc.text = "\n\n".join(p.text.strip() for p in text_list)
+
+        # gather author 
+        author = []
+        author_nodes = soup.select('article .lede-text-only .lede-text-only__content .author')
+        for node in author_nodes:
+            author += [node.find(text=True).strip()]
+        if author:
+            doc.author = Author.get_or_create(','.join(author), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
diff --git a/dexter/processing/crawlers/businessdailyafrica.py b/dexter/processing/crawlers/businessdailyafrica.py
new file mode 100644
index 00000000..5ebb3db5
--- /dev/null
+++ b/dexter/processing/crawlers/businessdailyafrica.py
@@ -0,0 +1,60 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class BusinessDailyAfricaCrawler(BaseCrawler):
+    BDA_RE = re.compile('(www\.)?businessdailyafrica.com')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.BDA_RE.match(parts.netloc))
+
+    def canonicalise_url(self, url):
+        """ Strip anchors, etc."""
+
+        # Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+        if '//' not in url:
+            url = '%s%s' % ('https://', url)
+
+        parts = urlparse(url)
+
+        netloc = parts.netloc.strip(':80')
+
+        # force http, strip trailing slash, anchors etc.
+        return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(BusinessDailyAfricaCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('article.article-story .page-box-inner header .article-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('article.article-story .page-box-inner header .byline'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('article.article-story .page-box-inner p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:2])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author
+        author = ''
+        byline = self.extract_plaintext(soup.select('article.article-story .page-box-inner header .mobileShow .byline'))
+        if 'BY ' in byline:
+            author = byline[byline.index('BY ') + 3:]
+        else:
+            author = byline
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
diff --git a/dexter/processing/crawlers/businessdayonline.py b/dexter/processing/crawlers/businessdayonline.py
new file mode 100644
index 00000000..759083ae
--- /dev/null
+++ b/dexter/processing/crawlers/businessdayonline.py
@@ -0,0 +1,41 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class BusinessDayOnlineCrawler(BaseCrawler):
+    BDO_RE = re.compile('(www\.)?businessdayonline.com')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.BDO_RE.match(parts.netloc))
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(BusinessDayOnlineCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.container article h1 a'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('.container article .date'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('.container article p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:2])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        author = self.extract_plaintext(soup.select('.container article .author a'))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/channelafrica.py b/dexter/processing/crawlers/channelafrica.py
new file mode 100644
index 00000000..96d16bf9
--- /dev/null
+++ b/dexter/processing/crawlers/channelafrica.py
@@ -0,0 +1,38 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class ChannelAfricaCrawler(BaseCrawler):
+    CA_RE = re.compile('(www\.)?channelafrica.co.za')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.CA_RE.match(parts.netloc))
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(ChannelAfricaCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.MainContentInner .inPageHeader'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('.MainContentInner .datesContainer .dates'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        summary = self.extract_plaintext(soup.select('.MainContentInner .ArticleInner .excerpt p'))
+        doc.summary = summary
+        nodes = soup.select('.MainContentInner .ArticleInner .articleBody p')
+        doc.text = summary + '\n\n' + "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/classicfm.py b/dexter/processing/crawlers/classicfm.py
new file mode 100644
index 00000000..6776329e
--- /dev/null
+++ b/dexter/processing/crawlers/classicfm.py
@@ -0,0 +1,37 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class ClassicFMCrawler(BaseCrawler):
+    CFM_RE = re.compile('www.classic97.net')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.CFM_RE.match(parts.netloc))
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(ClassicFMCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('#maincontent #page-title'))
+
+        #gather publish date
+        date = doc.url[-10:]
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('#maincontent .field-item p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:1])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/cnbcafrica.py b/dexter/processing/crawlers/cnbcafrica.py
new file mode 100644
index 00000000..40a4d90c
--- /dev/null
+++ b/dexter/processing/crawlers/cnbcafrica.py
@@ -0,0 +1,60 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class CNBCAfricaCrawler(BaseCrawler):
+    CBNCA_RE = re.compile('(www\.)?cnbcafrica.com')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.CBNCA_RE.match(parts.netloc))
+
+    def canonicalise_url(self, url):
+        """ Strip anchors, etc."""
+
+        # Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+        if '//' not in url:
+            url = '%s%s' % ('https://', url)
+
+        parts = urlparse(url)
+
+        netloc = parts.netloc.strip(':80')
+
+        # force http, strip trailing slash, anchors etc.
+        return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(CNBCAfricaCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('article.post .td-post-header .td-post-title .entry-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('article.post .td-post-header .td-module-meta-info .td-post-date time.entry-date'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('article.post .td-post-content')
+        text_list = []
+        for node in nodes[0].children:
+            if node.name in ['h3','h4','h5','p']:
+                text_list = text_list + [node]
+        doc.summary = "\n\n".join(p.text.strip() for p in text_list[:3])
+        doc.text = "\n\n".join(p.text.strip() for p in text_list)
+
+
+        # gather author 
+        author = self.extract_plaintext(soup.select('article.post .td-post-header .td-module-meta-info .td-post-author-name a'))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
diff --git a/dexter/processing/crawlers/dailytimes.py b/dexter/processing/crawlers/dailytimes.py
new file mode 100644
index 00000000..72c8e1a7
--- /dev/null
+++ b/dexter/processing/crawlers/dailytimes.py
@@ -0,0 +1,55 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class DailyTimesCrawler(BaseCrawler):
+    DT_RE = re.compile('dailytimes.ng')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.DT_RE.match(parts.netloc))
+
+    def canonicalise_url(self, url):
+        """ Strip anchors, etc."""
+
+        # Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+        if '//' not in url:
+            url = '%s%s' % ('https://', url)
+
+        parts = urlparse(url)
+
+        netloc = parts.netloc.strip(':80')
+
+        # force http, strip trailing slash, anchors etc.
+        return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(DailyTimesCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.post .post-header h1.post-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('.post .post-header .post-byline'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('.post .post-content p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:1])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        author = self.extract_plaintext(soup.select('.post .post-author h3'))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/dailytrustnp.py b/dexter/processing/crawlers/dailytrustnp.py
new file mode 100644
index 00000000..5cfbe856
--- /dev/null
+++ b/dexter/processing/crawlers/dailytrustnp.py
@@ -0,0 +1,62 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class DailyTrustNPCrawler(BaseCrawler):
+    DTNP_RE = re.compile('(www\.)?dailytrust.com.ng')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.DTNP_RE.match(parts.netloc))
+
+    def canonicalise_url(self, url):
+        """ Strip anchors, etc."""
+
+        # Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+        if '//' not in url:
+            url = '%s%s' % ('https://', url)
+
+        parts = urlparse(url)
+
+        netloc = parts.netloc.strip(':80')
+
+        # force http, strip trailing slash, anchors etc.
+        return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(DailyTrustNPCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.container .story h1'))
+
+        author_date_str = self.extract_plaintext(soup.select('.container .story span.storydate'))
+        
+        #gather publish date
+        date = author_date_str[author_date_str.index('Publish Date:') + 13:].strip()
+        print "This date %s" % (date)
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('.container .fullstory p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:1])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author
+        if '-' not in author_date_str:
+            author = author_date_str[author_date_str.index('By') + 2 : author_date_str.index('|')].strip()
+            
+            if author:
+                doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+            else:
+                doc.author = Author.unknown()
+        else:
+            doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/destinyconnect.py b/dexter/processing/crawlers/destinyconnect.py
new file mode 100644
index 00000000..12e853bb
--- /dev/null
+++ b/dexter/processing/crawlers/destinyconnect.py
@@ -0,0 +1,41 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class DestinyConnectCrawler(BaseCrawler):
+    DC_RE = re.compile('(www\.)?destinyconnect.com')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.DC_RE.match(parts.netloc))
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(DestinyConnectCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('article.post .entry-header .entry-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('article.post .entry-header .entry-meta time.entry-date'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('article.post .entry-content > p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:2])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        author = self.extract_plaintext(soup.select('article.post .entry-header .entry-meta .byline .author'))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
diff --git a/dexter/processing/crawlers/economist.py b/dexter/processing/crawlers/economist.py
new file mode 100644
index 00000000..944c3a1a
--- /dev/null
+++ b/dexter/processing/crawlers/economist.py
@@ -0,0 +1,61 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class EconomistCrawler(BaseCrawler):
+    E_RE = re.compile('(www\.)?economist.com')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.E_RE.match(parts.netloc))
+
+    def canonicalise_url(self, url):
+        """ Strip anchors, etc."""
+
+        # Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+        if '//' not in url:
+            url = '%s%s' % ('https://', url)
+
+        parts = urlparse(url)
+
+        netloc = parts.netloc.strip(':80')
+
+        # force http, strip trailing slash, anchors etc.
+        return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(EconomistCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('article.blog-post .flytitle-and-title__body .flytitle-and-title__title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('article.blog-post .blog-post__section-date-author time.blog-post__datetime'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('article.blog-post .blog-post__text > p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:1])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        author_byline = self.extract_plaintext(soup.select('article.blog-post .blog-post__section-date-author .blog-post__byline-container .blog-post__byline'))
+        author = ''
+        if '|' in author_byline:
+            author = author_byline[:author_byline.index('|') -1]
+        else:
+            author = author_byline
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
+
diff --git a/dexter/processing/crawlers/globaltimescn.py b/dexter/processing/crawlers/globaltimescn.py
new file mode 100644
index 00000000..0c8fca6a
--- /dev/null
+++ b/dexter/processing/crawlers/globaltimescn.py
@@ -0,0 +1,50 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class GlobalTimesCN(BaseCrawler):
+    GTCN_RE = re.compile('(www\.)?globaltimes.cn')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.GTCN_RE.match(parts.netloc))
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(GlobalTimesCN, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('#contents #left .article-title h3'))
+
+        #gather publish date
+        date = ''
+        source_string = self.extract_plaintext(soup.select('#contents #left .article-source .text-left'))
+        if 'Published:' in  source_string:
+            date = source_string[source_string.index('Published:') + 10:].strip()
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('#contents #left .row-content')
+        child_list = []
+        for child in nodes[0].descendants:
+            if isinstance(child, basestring):
+                child_list.append(child)
+        doc.summary = "\n".join(p for p in child_list[:1])
+        doc.text = "\n".join(p for p in child_list)
+
+        # gather author
+        author = ''
+        if 'By ' in source_string:
+            author = source_string[source_string.index('By ') + 3:source_string.index('Source:')-1].strip()
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/guardian.py b/dexter/processing/crawlers/guardian.py
new file mode 100644
index 00000000..a45cd1fc
--- /dev/null
+++ b/dexter/processing/crawlers/guardian.py
@@ -0,0 +1,66 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class GuardianCrawler(BaseCrawler):
+    G_RE = re.compile('guardian.ng')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.G_RE.match(parts.netloc))
+
+    def canonicalise_url(self, url):
+        """ Strip anchors, etc."""
+
+        # Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+        if '//' not in url:
+            url = '%s%s' % ('https://', url)
+
+        parts = urlparse(url)
+
+        netloc = parts.netloc.strip(':80')
+
+        # force http, strip trailing slash, anchors etc.
+        return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(GuardianCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.single-post-header h1.single-article-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('.page-main .single-article-aside .single-article-datetime'))
+        doc.published_at = self.parse_timestamp(date[:date.index('|') - 1].strip() + ' ' + date[date.index('|') + 1:].strip())
+        
+        #gather text and summary
+        summary_list = []
+        summary_nodes = soup.select('.page-main .single-article-content article')
+        for item in summary_nodes[0].find('br').next_siblings:
+            if item != u'\n':
+                if isinstance(item, basestring):
+                    summary_list.append(item)
+                else:
+                    summary_list.append(item.text)
+        summary = summary_list[0].strip()
+        doc.summary = summary
+        text_nodes = soup.select('.page-main .single-article-content article p')
+        doc.text = summary + "\n\n" + "\n\n".join(p.text.strip() for p in text_nodes[2:])
+
+        # gather author 
+        author = self.extract_plaintext(soup.select('.page-main .single-article-aside .single-article-author strong'))
+        if author:
+            if ',' in author:
+                doc.author = Author.get_or_create(author[:author.index(',')].strip(), AuthorType.journalist())
+            else:
+                doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
diff --git a/dexter/processing/crawlers/hallmarknews.py b/dexter/processing/crawlers/hallmarknews.py
new file mode 100644
index 00000000..3a3c4799
--- /dev/null
+++ b/dexter/processing/crawlers/hallmarknews.py
@@ -0,0 +1,43 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class HallmarkNewsCrawler(BaseCrawler):
+    HN_RE = re.compile('hallmarknews.com')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.HN_RE.match(parts.netloc))
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(HallmarkNewsCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('#content .post h1.posttitle'))
+
+        #gather publish date
+        meta_date = self.extract_plaintext(soup.select('#content #datemeta #datemeta_l'))
+        date = meta_date[meta_date.index('Published On:') + 13:].strip()
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('#content .post .entry > p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:2])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        author_nodes = soup.select('#content #datemeta #datemeta_r a')
+        author = author_nodes[-1].text
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
diff --git a/dexter/processing/crawlers/howwemadeitinafrica.py b/dexter/processing/crawlers/howwemadeitinafrica.py
new file mode 100644
index 00000000..61903fdc
--- /dev/null
+++ b/dexter/processing/crawlers/howwemadeitinafrica.py
@@ -0,0 +1,71 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class HowWeMadeItInAfricaCrawler(BaseCrawler):
+    HWMIIA_RE = re.compile('(www\.)?howwemadeitinafrica.com')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.HWMIIA_RE.match(parts.netloc))
+
+    def canonicalise_url(self, url):
+        """ Strip anchors, etc."""
+
+        # Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+        if '//' not in url:
+            url = '%s%s' % ('https://', url)
+
+        parts = urlparse(url)
+
+        netloc = parts.netloc.strip(':80')
+
+        # force http, strip trailing slash, anchors etc.
+        return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+    def fetch(self, url):
+        """
+        Fetch and return the raw HTML for this url.
+        The return content is a unicode string.
+        """
+        self.log.info("Fetching URL: " + url)
+
+        headers = {'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.91 Safari/537.36'}
+
+        r = requests.get(url, headers=headers, timeout=10)
+        # raise an HTTPError on badness
+        r.raise_for_status()
+
+        # this decodes r.content using a guessed encoding
+        return r.text
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(HowWeMadeItInAfricaCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.content-wrapper .post header h1'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('.content-wrapper .post header p time'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select(".content-wrapper .post .content p")
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:2])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        author = self.extract_plaintext(soup.select(".content-wrapper .post p a"))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/independentng.py b/dexter/processing/crawlers/independentng.py
new file mode 100644
index 00000000..c280edb3
--- /dev/null
+++ b/dexter/processing/crawlers/independentng.py
@@ -0,0 +1,55 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class IndependentNGCrawler(BaseCrawler):
+    ING_RE = re.compile('independent.ng')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.ING_RE.match(parts.netloc))
+
+    def canonicalise_url(self, url):
+        """ Strip anchors, etc."""
+
+        # Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+        if '//' not in url:
+            url = '%s%s' % ('https://', url)
+
+        parts = urlparse(url)
+
+        netloc = parts.netloc.strip(':80')
+
+        # force http, strip trailing slash, anchors etc.
+        return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(IndependentNGCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('article.post .td-post-header .td-post-title .entry-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('article.post .td-post-header .td-post-title .td-module-meta-info .td-post-date time.entry-date'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('article.post .td-post-content > p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:1])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes[:-1])
+
+        # gather author 
+        author = self.extract_plaintext(soup.select('article.post .td-post-header .td-post-title .td-module-meta-info .td-post-author-name a'))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/leadership.py b/dexter/processing/crawlers/leadership.py
new file mode 100644
index 00000000..fb5a2eaf
--- /dev/null
+++ b/dexter/processing/crawlers/leadership.py
@@ -0,0 +1,59 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class LeadershipCrawler(BaseCrawler):
+    L_RE = re.compile('leadership.ng')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.L_RE.match(parts.netloc))
+
+    def canonicalise_url(self, url):
+        """ Strip anchors, etc."""
+
+        # Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+        if '//' not in url:
+            url = '%s%s' % ('https://', url)
+
+        parts = urlparse(url)
+
+        netloc = parts.netloc.strip(':80')
+
+        # force http, strip trailing slash, anchors etc.
+        return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(LeadershipCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('#mvp-post-main #mvp-post-content h1.mvp-post-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('#mvp-post-main #mvp-post-content .mvp-author-info-date time.post-date'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('#mvp-post-main #mvp-post-content #mvp-content-main > p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:1])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author
+        author_nodes = soup.select('#mvp-post-main #mvp-post-content .mvp-author-info-name .author-name a')
+        if len(author_nodes) > 1:    
+            author = self.extract_plaintext([author_nodes[1]])
+        else:
+            author = self.extract_plaintext([author_nodes[0]])
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
diff --git a/dexter/processing/crawlers/mediamaxnet.py b/dexter/processing/crawlers/mediamaxnet.py
new file mode 100644
index 00000000..f712cb5d
--- /dev/null
+++ b/dexter/processing/crawlers/mediamaxnet.py
@@ -0,0 +1,41 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class MediaMaxNetCrawler(BaseCrawler):
+    MMN_RE = re.compile('(www\.)?mediamaxnetwork.co.ke')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.MMN_RE.match(parts.netloc))
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(MediaMaxNetCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.post header h1.entry-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('.post header time.entry-date'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('.post .entry-content p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:2])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        author = self.extract_plaintext(soup.select('.post header .author-link a span'))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/monitorke.py b/dexter/processing/crawlers/monitorke.py
new file mode 100644
index 00000000..d0102060
--- /dev/null
+++ b/dexter/processing/crawlers/monitorke.py
@@ -0,0 +1,46 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class MonitorKECrawler(BaseCrawler):
+	MKE_RE = re.compile('(www\.)?monitor.co.ke')
+
+	def offer(self, url):
+		""" Can this crawler process this URL? """
+		parts = urlparse(url)
+		return bool(self.MKE_RE.match(parts.netloc))
+
+	def extract(self, doc, raw_html):
+		""" Extract text and other things from the raw_html for this document. """
+		super(MonitorKECrawler, self).extract(doc, raw_html)
+
+		soup = BeautifulSoup(raw_html)
+
+		# gather title
+		doc.title = self.extract_plaintext(soup.select('#main-content .post .post-inner .post-title'))
+
+		#gather publish date
+		date = self.extract_plaintext(soup.select('#main-content .post .post-inner .updated'))
+		doc.published_at = self.parse_timestamp(date)
+
+		#gather text and summary
+		nodes = soup.select('#main-content .post .post-inner .entry p')
+		doc.summary = "\n\n".join(p.text.strip() for p in nodes[:2])
+		doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+		# gather author
+		author = ''
+		entry_author = nodes[0].text
+		if 'By ' in entry_author:
+			author = entry_author[entry_author.index('By ') + 3:].strip()
+		else:
+			author = self.extract_plaintext(soup.select('#main-content .post .post-inner .post-meta .post-meta-author a'))
+		if author:
+			doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+		else:
+			doc.author = Author.unknown()
diff --git a/dexter/processing/crawlers/naijanews.py b/dexter/processing/crawlers/naijanews.py
new file mode 100644
index 00000000..a00c2cc4
--- /dev/null
+++ b/dexter/processing/crawlers/naijanews.py
@@ -0,0 +1,56 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class NaijaNewsCrawler(BaseCrawler):
+    NNA_RE = re.compile('naijanewsagency.com')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.NNA_RE.match(parts.netloc))
+
+    def canonicalise_url(self, url):
+        """ Strip anchors, etc."""
+
+        # Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+        if '//' not in url:
+            url = '%s%s' % ('https://', url)
+
+        parts = urlparse(url)
+
+        netloc = parts.netloc.strip(':80')
+
+        # force http, strip trailing slash, anchors etc.
+        return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(NaijaNewsCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.main_container h1.post-tile'))
+
+        #gather publish date
+        #date_nodes = soup.select('.container .article_content .article_content_meta .article_content_date span')
+        date = self.extract_plaintext(soup.select('.main_container .single-post-meta span time'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('.main_container .entry-content')
+        text_list = []
+        for node in nodes[0].children:
+            if node.name in ['h2','h3','p']:
+                text_list = text_list + [node]
+        doc.summary = "\n\n".join(p.text.strip() for p in text_list[:2])
+        doc.text = "\n\n".join(p.text.strip() for p in text_list)
+
+        # gather author 
+        doc.author = Author.unknown()
diff --git a/dexter/processing/crawlers/nan.py b/dexter/processing/crawlers/nan.py
new file mode 100644
index 00000000..83a6c1b5
--- /dev/null
+++ b/dexter/processing/crawlers/nan.py
@@ -0,0 +1,37 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class NANCrawler(BaseCrawler):
+    NAN_RE = re.compile('(www\.)?nan.ng')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.NAN_RE.match(parts.netloc))
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(NANCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.post .article-header .xt-post-title'))
+
+        #gather publish date
+        date = soup.select('.post article time')[0]['datetime']
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('.post .article-content .post-body p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:2])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/nationaldailyng.py b/dexter/processing/crawlers/nationaldailyng.py
new file mode 100644
index 00000000..5766ad9f
--- /dev/null
+++ b/dexter/processing/crawlers/nationaldailyng.py
@@ -0,0 +1,66 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class NationalDailyNgCrawler(BaseCrawler):
+    NDN_RE = re.compile('(www\.)?nationaldailyng.com')
+    ignore_lst = [
+        'wabtn_container',
+        'fb-root',
+        'fbcb_container',
+        'td-a-rec td-a-rec-id-content_bottom ',
+        'td-a-rec td-a-rec-id-content_inline ']
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.NDN_RE.match(parts.netloc))
+
+    def validate_attrs(self, attrs):
+        """ Validation test to check if an element is on the ignore list. """
+        for item in self.ignore_lst:
+            if item in attrs.values():
+                return False
+            if 'class' in attrs:
+                for c in attrs['class']:
+                    if c == item:
+                        return False
+        
+        return True
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(NationalDailyNgCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.td-main-content .td-post-header .td-post-title h1.entry-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('.td-main-content .td-post-header .td-post-title .td-post-date time'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('.td-main-content .td-post-content')
+        doc.summary = ''
+        doc.text = ''
+        for node in nodes[0].contents:
+            if not isinstance(node, basestring) and self.validate_attrs(node.attrs):
+                doc.text += "\n\n" + node.text.strip() if node.text.strip() else ''
+                if len(doc.summary) < 200:
+                    doc.summary += "\n\n" + node.text.strip() if node.text.strip() else ''
+        doc.text = doc.text.strip()
+        doc.summary = doc.summary.strip()
+        
+        # gather author 
+        author = self.extract_plaintext(soup.select('.td-main-content .td-post-header .td-post-title .td-post-author-name a'))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
diff --git a/dexter/processing/crawlers/nationalmirror.py b/dexter/processing/crawlers/nationalmirror.py
new file mode 100644
index 00000000..4d1dd269
--- /dev/null
+++ b/dexter/processing/crawlers/nationalmirror.py
@@ -0,0 +1,51 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class NationalMirrorCrawler(BaseCrawler):
+	NM_RE = re.compile('(www\.)?nationalmirroronline.net')
+
+	def offer(self, url):
+		""" Can this crawler process this URL? """
+		parts = urlparse(url)
+		return bool(self.NM_RE.match(parts.netloc))
+
+	def canonicalise_url(self, url):
+		""" Strip anchors, etc."""
+
+		# Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+		if '//' not in url:
+			url = '%s%s' % ('https://', url)
+
+		parts = urlparse(url)
+
+		netloc = parts.netloc.strip(':80')
+
+		# force http, strip trailing slash, anchors etc.
+		return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+	def extract(self, doc, raw_html):
+		""" Extract text and other things from the raw_html for this document. """
+		super(NationalMirrorCrawler, self).extract(doc, raw_html)
+
+		soup = BeautifulSoup(raw_html)
+
+		# gather title
+		doc.title = self.extract_plaintext(soup.select('#primary .post .entry-header .entry-title'))
+
+		#gather publish date
+		date = self.extract_plaintext(soup.select('#primary .post .entry-header .entry-meta .entry-date'))
+		doc.published_at = self.parse_timestamp(date)
+
+		#gather text and summary
+		nodes = soup.select('#primary .post .entry-content p')
+		doc.summary = "\n\n".join(p.text.strip() for p in nodes[:1])
+		doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+		# gather author 
+		doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/newsverge.py b/dexter/processing/crawlers/newsverge.py
new file mode 100644
index 00000000..0deb6f8d
--- /dev/null
+++ b/dexter/processing/crawlers/newsverge.py
@@ -0,0 +1,56 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class NewsvergeCrawler(BaseCrawler):
+    N_RE = re.compile('newsverge.com')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.N_RE.match(parts.netloc))
+
+    def canonicalise_url(self, url):
+        """ Strip anchors, etc."""
+
+        # Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+        if '//' not in url:
+            url = '%s%s' % ('https://', url)
+
+        parts = urlparse(url)
+
+        netloc = parts.netloc.strip(':80')
+
+        # force http, strip trailing slash, anchors etc.
+        return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(NewsvergeCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.td-ss-main-content .post .td-post-header .td-post-title .entry-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('.td-ss-main-content .post .td-post-header .td-post-title .td-post-date .entry-date'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('.td-ss-main-content .post .td-post-content p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:1])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        author_nodes = soup.select('.td-ss-main-content .post .td-post-header .td-post-title .td-post-author-name a')
+        author = "\n\n".join(p.text.strip() for p in author_nodes)
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
diff --git a/dexter/processing/crawlers/newteleonline.py b/dexter/processing/crawlers/newteleonline.py
new file mode 100644
index 00000000..fbd515d2
--- /dev/null
+++ b/dexter/processing/crawlers/newteleonline.py
@@ -0,0 +1,41 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class NewTeleOnlineCrawler(BaseCrawler):
+    NTO_RE = re.compile('newtelegraphonline.com')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.NTO_RE.match(parts.netloc))
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(NewTeleOnlineCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('#mvp-main-body-wrap h1.mvp-post-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('#mvp-main-body-wrap .mvp-author-info-date span.mvp-post-date time.post-date'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('#mvp-main-body-wrap #mvp-content-main p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:1])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        author = self.extract_plaintext(soup.select('#mvp-main-body-wrap .mvp-author-info-wrap .author-name a'))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/nigeriatoday.py b/dexter/processing/crawlers/nigeriatoday.py
new file mode 100644
index 00000000..44bf21be
--- /dev/null
+++ b/dexter/processing/crawlers/nigeriatoday.py
@@ -0,0 +1,38 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class NigeriaTodayCrawler(BaseCrawler):
+    NT_RE = re.compile('(www\.)?nigeriatoday.ng')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.NT_RE.match(parts.netloc))
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(NigeriaTodayCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.post h1.title'))
+
+        #gather publish date
+        meta_info = self.extract_plaintext(soup.select('.post .post-meta p'))
+        date = meta_info[meta_info.index(' on') + 3:meta_info.index(' in')].strip()
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('.post > p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:2])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/nta.py b/dexter/processing/crawlers/nta.py
new file mode 100644
index 00000000..57a00314
--- /dev/null
+++ b/dexter/processing/crawlers/nta.py
@@ -0,0 +1,42 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class NTACrawler(BaseCrawler):
+    NTA_RE = re.compile('(www\.)?nta.ng')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.NTA_RE.match(parts.netloc))
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(NTACrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.post-header h1.post-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('.post-header .post-meta a.date span'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('.post-body p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:2])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        author = self.extract_plaintext(soup.select('.post-body .post-author .author-info h4 a'))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
+
diff --git a/dexter/processing/crawlers/outrepreneurs.py b/dexter/processing/crawlers/outrepreneurs.py
new file mode 100644
index 00000000..afe584fe
--- /dev/null
+++ b/dexter/processing/crawlers/outrepreneurs.py
@@ -0,0 +1,58 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class OutrepreneursCrawler(BaseCrawler):
+    O_RE = re.compile('outrepreneurs.com')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.O_RE.match(parts.netloc))
+
+    def canonicalise_url(self, url):
+        """ Strip anchors, etc."""
+
+        # Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+        if '//' not in url:
+            url = '%s%s' % ('https://', url)
+
+        parts = urlparse(url)
+
+        netloc = parts.netloc.strip(':80')
+
+        # force http, strip trailing slash, anchors etc.
+        return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(OutrepreneursCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.block-content .single-post-box .title-post h1'))
+
+        byline = soup.select('.block-content .single-post-box .title-post .post-tags li')
+        
+        #gather publish date
+        date = byline[0].text.strip()
+        doc.published_at = self.parse_timestamp(date)
+
+        # gather author 
+        author = self.extract_plaintext(byline[1].select('a'))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
+
+        #gather text and summary
+        nodes = soup.select('.block-content .single-post-box .the-content > p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:1])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes[:-1])
+
diff --git a/dexter/processing/crawlers/planintl.py b/dexter/processing/crawlers/planintl.py
new file mode 100644
index 00000000..ed613aa8
--- /dev/null
+++ b/dexter/processing/crawlers/planintl.py
@@ -0,0 +1,64 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class PlanIntlCrawler(BaseCrawler):
+    PI_RE = re.compile('plan-international.org')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.PI_RE.match(parts.netloc))
+
+    def canonicalise_url(self, url):
+        """ Strip anchors, etc."""
+
+        # Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+        if '//' not in url:
+            url = '%s%s' % ('https://', url)
+
+        parts = urlparse(url)
+
+        netloc = parts.netloc.strip(':80')
+
+        # force http, strip trailing slash, anchors etc.
+        return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(PlanIntlCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('#main-content .header-breadcrumb .page-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('#main-content .content-middle .date-display-single'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('#main-content .core-content')
+        text_list = []
+        doc.summary = ''
+        for node in nodes:
+            for child in node.descendants:
+                if child.name in ['h3','p','li']:
+                    text_list = text_list + [child]
+                if len(doc.summary) < 200:
+                    doc.summary = "\n\n".join(p.text.strip() for p in text_list).strip()
+            doc.text = "\n\n".join(p.text.strip() for p in text_list).strip()
+
+        # gather author
+        author_type_A = self.extract_plaintext(soup.select('#main-content .content-middle .article-meta .field-guest-authors .field-guest-author'))
+        author_type_B = self.extract_plaintext(soup.select('#main-content .content-middle .author-bio h2'))
+        author = author_type_A + author_type_B
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/rhodesunimathewblog.py b/dexter/processing/crawlers/rhodesunimathewblog.py
new file mode 100644
index 00000000..d1bef85b
--- /dev/null
+++ b/dexter/processing/crawlers/rhodesunimathewblog.py
@@ -0,0 +1,41 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class RhodesUniMathewBlogCrawler(BaseCrawler):
+    RUMB_RE = re.compile('mathewnyaungwa.blogspot.co.za')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.RUMB_RE.match(parts.netloc))
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(RhodesUniMathewBlogCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.blog-posts .date-posts .post-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('.blog-posts .date-header span'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('.blog-posts .date-posts .post-body span')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:1])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        author = self.extract_plaintext(soup.select('.blog-posts .post-footer a.g-profile'))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/rsaparliament.py b/dexter/processing/crawlers/rsaparliament.py
new file mode 100644
index 00000000..616bc526
--- /dev/null
+++ b/dexter/processing/crawlers/rsaparliament.py
@@ -0,0 +1,62 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class RSAParliamentCrawler(BaseCrawler):
+    RSAP_RE = re.compile('(www\.)?parliament.gov.za')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.RSAP_RE.match(parts.netloc))
+
+    def canonicalise_url(self, url):
+        """ Strip anchors, etc."""
+
+        # Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+        if '//' not in url:
+            url = '%s%s' % ('https://', url)
+
+        parts = urlparse(url)
+
+        netloc = parts.netloc.strip(':80')
+
+        # force http, strip trailing slash, anchors etc.
+        return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(RSAParliamentCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.page .page-header h4'))
+
+
+        #gather text and summary
+        nodes = soup.select('.page #content .page-content p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:1])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes[:-1])
+
+        #gather publish date
+        date_pattern = re.compile('(?:\s|\w){1}(\d{1,2} (?:January|February|March|April|May|June|July|August|September|October|November|December) \d{4})(?:[\s\w]|$)')
+        if re.search(date_pattern, nodes[0].text) != None:
+            date = re.search(date_pattern, nodes[0].text).group(0)
+        if re.search(date_pattern, nodes[-1].text) != None: 
+            date = re.search(date_pattern, nodes[-1].text).group(0)
+        doc.published_at = self.parse_timestamp(date.strip())    
+        
+        #gather author 
+        author_pattern = re.compile('(?:By )?([\w ]*)\s*(?:\d{1,2} (?:January|February|March|April|May|June|July|August|September|October|November|December) \d{4})(?:\s|$)|(?:Name: )([\w ]+)')
+        reg_result = re.search(author_pattern, nodes[-1].text)
+        if reg_result != None:
+            author = reg_result.group(1) if reg_result.group(1) else reg_result.group(2)
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
diff --git a/dexter/processing/crawlers/savca.py b/dexter/processing/crawlers/savca.py
new file mode 100644
index 00000000..255da889
--- /dev/null
+++ b/dexter/processing/crawlers/savca.py
@@ -0,0 +1,55 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class SAVCACrawler(BaseCrawler):
+    SAVCA_RE = re.compile('(www\.)?savca.co.za')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.SAVCA_RE.match(parts.netloc))
+
+    def canonicalise_url(self, url):
+        """ Strip anchors, etc."""
+
+        # Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+        if '//' not in url:
+            url = '%s%s' % ('https://', url)
+
+        parts = urlparse(url)
+
+        netloc = parts.netloc.strip(':80')
+
+        # force http, strip trailing slash, anchors etc.
+        return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(SAVCACrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.fl-row-content-wrap .fl-module-heading h1.fl-heading'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('.fl-row-content-wrap .fl-module-fl-post-info .fl-module-content .fl-post-info-date'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select(".fl-row-content-wrap .fl-col-content .fl-module-fl-post-content p")
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:2])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        author = self.extract_plaintext(soup.select(".fl-row-content-wrap .fl-module-fl-post-info .fl-module-content .fl-post-info-author a"))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/seedmagazine.py b/dexter/processing/crawlers/seedmagazine.py
new file mode 100644
index 00000000..4369d8a0
--- /dev/null
+++ b/dexter/processing/crawlers/seedmagazine.py
@@ -0,0 +1,49 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class SeedMagazineCrawler(BaseCrawler):
+    SM_RE = re.compile('(www\.)?seedmagazine.co.ke')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.SM_RE.match(parts.netloc))
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(SeedMagazineCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.content-wrapper h1.page-title'))
+
+        #gather publish date
+        date = ''
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('.content-wrapper .page-content > p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:2])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author
+        author = ''
+        entry_author = nodes[0].text
+        if 'By ' in entry_author:
+            if '\n' in entry_author:
+                author = entry_author[entry_author.index('By ') + 3:entry_author.index('\n')].strip()
+            else: 
+                author = entry_author[entry_author.index('By ') + 3:].strip()
+        else:
+            author = self.extract_plaintext(soup.select('.content-wrapper .page-meta-wrapper .author a'))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/standardmediaktnnews.py b/dexter/processing/crawlers/standardmediaktnnews.py
new file mode 100644
index 00000000..4ed851a4
--- /dev/null
+++ b/dexter/processing/crawlers/standardmediaktnnews.py
@@ -0,0 +1,56 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class StandardMediaKTNCrawler(BaseCrawler):
+    BDO_RE = re.compile('(www\.)?standardmedia.co.ke/ktnnews')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        paths = parts.path.split('/')
+        return bool(self.BDO_RE.match(parts.netloc + '/' + paths[1]))
+
+    def canonicalise_url(self, url):
+        """ Strip anchors, etc."""
+
+        # Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+        if '//' not in url:
+            url = '%s%s' % ('https://', url)
+
+        parts = urlparse(url)
+
+        netloc = parts.netloc.strip(':80')
+
+        # force http, strip trailing slash, anchors etc.
+        return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(StandardMediaKTNCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.container .card .card-block .card-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('.container .card .card-block .card-text'))
+        doc.published_at = self.parse_timestamp(date[date.index('|') + 1:].strip())
+
+        #gather text and summary
+        nodes = soup.select('.container .card .card-block p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:1])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        author = self.extract_plaintext(soup.select('.container .card .card-text a'))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/sundiatapost.py b/dexter/processing/crawlers/sundiatapost.py
new file mode 100644
index 00000000..303d666d
--- /dev/null
+++ b/dexter/processing/crawlers/sundiatapost.py
@@ -0,0 +1,77 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+from datetime import datetime, timedelta
+from dateutil.parser import parse
+
+class SundiataPostCrawler(BaseCrawler):
+    SP_RE = re.compile('sundiatapost.com')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.SP_RE.match(parts.netloc))
+
+    def canonicalise_url(self, url):
+        """ Strip anchors, etc."""
+
+        # Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+        if '//' not in url:
+            url = '%s%s' % ('https://', url)
+
+        parts = urlparse(url)
+
+        netloc = parts.netloc.strip(':80')
+
+        # force http, strip trailing slash, anchors etc.
+        return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+    def parse_timestamp(self, ts):
+        if 'hour' in ts:
+            return datetime.now() - timedelta(hours = int(ts[:ts.index('hour') -1].strip()))
+        elif 'day' in ts:
+            return datetime.now() - timedelta(days = int(ts[:ts.index('day') -1].strip()))
+        elif 'week' in ts:
+            return datetime.now() - timedelta(weeks = int(ts[:ts.index('week') -1].strip()))
+        else:
+            return parse(ts)
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(SundiataPostCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('#main-content .post .post-inner .post-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('#main-content .post .post-inner .post-meta .tie-date'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('#main-content .post .post-inner .entry p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:2])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author
+        author = ''
+        entry_author = nodes[0].text
+        if 'By ' in entry_author:
+            if '\n' in entry_author:
+                author = entry_author[entry_author.index('By ') + 3:entry_author.index('\n')].strip()
+            else: 
+                author = entry_author[entry_author.index('By ') + 3:].strip()
+        else:
+            author = self.extract_plaintext(soup.select('#main-content .post .post-inner .post-meta .post-meta-author a'))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
+
diff --git a/dexter/processing/crawlers/sunnewsonline.py b/dexter/processing/crawlers/sunnewsonline.py
new file mode 100644
index 00000000..d3c4b421
--- /dev/null
+++ b/dexter/processing/crawlers/sunnewsonline.py
@@ -0,0 +1,37 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class SunNewsOnlineCrawler(BaseCrawler):
+    SNO_RE = re.compile('sunnewsonline.com')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.SNO_RE.match(parts.netloc))
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(SunNewsOnlineCrawler, self).extract(doc, raw_html)
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.post header h4.entry-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('.post header span.entry-date'))
+        doc.published_at = self.parse_timestamp(date[1:].strip())
+
+        #gather text and summary
+        nodes = soup.select('.post .elements-box > p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:2])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        doc.author = Author.unknown()
+
diff --git a/dexter/processing/crawlers/thebusinesspost.py b/dexter/processing/crawlers/thebusinesspost.py
new file mode 100644
index 00000000..9b296017
--- /dev/null
+++ b/dexter/processing/crawlers/thebusinesspost.py
@@ -0,0 +1,60 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class TheBusinessPostCrawler(BaseCrawler):
+    TBP_RE = re.compile('(www\.)?thebusinesspost.ng')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.TBP_RE.match(parts.netloc))
+
+    def canonicalise_url(self, url):
+        """ Strip anchors, etc."""
+
+        # Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+        if '//' not in url:
+            url = '%s%s' % ('https://', url)
+
+        parts = urlparse(url)
+
+        netloc = parts.netloc.strip(':80')
+
+        # force http, strip trailing slash, anchors etc.
+        return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(TheBusinessPostCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.article-container article.story .media-heading'))
+
+        nodes = soup.select('.article-container article.story p')
+        
+
+        #gather text and summary
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[3:4])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes[3:])
+
+        byline = nodes[2].text
+        
+        # gather author
+        author = byline[:byline.index(' | ')]
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
+
+        #gather publish date
+        sub_byline = byline[byline.index(' | ') + 2:]
+        date = sub_byline[:sub_byline.index(' | ')].strip()
+        doc.published_at = self.parse_timestamp(date)
\ No newline at end of file
diff --git a/dexter/processing/crawlers/theguardianuk.py b/dexter/processing/crawlers/theguardianuk.py
new file mode 100644
index 00000000..35443584
--- /dev/null
+++ b/dexter/processing/crawlers/theguardianuk.py
@@ -0,0 +1,57 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class TheGuardianUKCrawler(BaseCrawler):
+    TGUK_RE = re.compile('(www\.)?theguardian.com')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.TGUK_RE.match(parts.netloc))
+
+    def canonicalise_url(self, url):
+        """ Strip anchors, etc."""
+
+        # Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+        if '//' not in url:
+            url = '%s%s' % ('https://', url)
+
+        parts = urlparse(url)
+
+        netloc = parts.netloc.strip(':80')
+
+        # force http, strip trailing slash, anchors etc.
+        return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        raw_html = raw_html.encode("utf-8")
+        raw_html = unicode(raw_html, errors='ignore')
+
+        super(TheGuardianUKCrawler, self).extract(doc, raw_html)
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('#article .content__article-body .content__head h1.content__headline'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('#article .content__article-body .content__head .content__dateline time.content__dateline-wpd'))
+        doc.published_at = self.parse_timestamp(date.replace('.', ':'))
+
+        #gather text and summary
+        nodes = soup.select('#article .content__article-body > p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:1])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        author = self.extract_plaintext(soup.select('#article .content__article-body .content__head .byline a span'))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
diff --git a/dexter/processing/crawlers/theinterview.py b/dexter/processing/crawlers/theinterview.py
new file mode 100644
index 00000000..e7718d4d
--- /dev/null
+++ b/dexter/processing/crawlers/theinterview.py
@@ -0,0 +1,60 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class TheInterviewCrawler(BaseCrawler):
+    TI_RE = re.compile('(www\.)?theinterview.com.ng')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.TI_RE.match(parts.netloc))
+
+    def fetch(self, url):
+        """
+        Fetch and return the raw HTML for this url.
+        The return content is a unicode string.
+        """
+        self.log.info("Fetching URL: " + url)
+
+        headers = {'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.91 Safari/537.36'}
+
+        r = requests.get(url, headers=headers, timeout=10)
+        # raise an HTTPError on badness
+        r.raise_for_status()
+
+        # this decodes r.content using a guessed encoding
+        return r.text
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(TheInterviewCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.post header h2.sd-entry-title'))
+
+        #gather concatenated author and date node        
+        date_author = soup.select('.post header .sd-entry-meta ul li.sd-meta-author')
+
+        #gather publish date
+        date = ''.join(date_author[0].find('i').next_siblings)
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('.post .sd-entry-content p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:1])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        author = ''.join(date_author[1].find('i').next_siblings)
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/thenation.py b/dexter/processing/crawlers/thenation.py
new file mode 100644
index 00000000..a3a31cb7
--- /dev/null
+++ b/dexter/processing/crawlers/thenation.py
@@ -0,0 +1,41 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class TheNationCrawler(BaseCrawler):
+    TN_RE = re.compile('thenationonlineng.net')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.TN_RE.match(parts.netloc))
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(TheNationCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.single-post-main .post-single-title h1.entry-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('.single-post-main .entry-meta .posted-on time.entry-date'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('.single-post-main .single-post-content p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:1])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        author = self.extract_plaintext(soup.select('.single-post-main .entry-meta .author a'))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/thenerveafrica.py b/dexter/processing/crawlers/thenerveafrica.py
new file mode 100644
index 00000000..f8265447
--- /dev/null
+++ b/dexter/processing/crawlers/thenerveafrica.py
@@ -0,0 +1,41 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class TheNerveAfricaCrawler(BaseCrawler):
+    TNA_RE = re.compile('thenerveafrica.com')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.TNA_RE.match(parts.netloc))
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(TheNerveAfricaCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('article.post .entry-content-container .entry-header h1.entry-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('article.post .entry-content-container .entry-meta .posted-on time.entry-date'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('article.post .entry-content-container .entry-content > p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:1])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        author = self.extract_plaintext(soup.select('article.post .entry-content-container .entry-meta .byline .author a'))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
diff --git a/dexter/processing/crawlers/thepoint.py b/dexter/processing/crawlers/thepoint.py
new file mode 100644
index 00000000..58a75359
--- /dev/null
+++ b/dexter/processing/crawlers/thepoint.py
@@ -0,0 +1,44 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class ThePointCrawler(BaseCrawler):
+    TP_RE = re.compile('(www\.)?thepointng.com')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.TP_RE.match(parts.netloc))
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(ThePointCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.td-main-content .td-post-header h1.entry-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('.td-main-content .td-post-header .td-post-date time.entry-date'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('.td-main-content .td-post-content p')
+        if len(nodes) <= 1:
+            doc.summary = ''.join(nodes[0].find('br').previous_siblings)
+        else:
+            doc.summary = "\n\n".join(p.text.strip() for p in nodes[:2])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        author = self.extract_plaintext(soup.select('.td-main-content .td-post-header .td-post-author-name a'))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/thisdaylive.py b/dexter/processing/crawlers/thisdaylive.py
new file mode 100644
index 00000000..ffb84b0f
--- /dev/null
+++ b/dexter/processing/crawlers/thisdaylive.py
@@ -0,0 +1,37 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class ThisDayLiveCrawler(BaseCrawler):
+    TDL_RE = re.compile('(www\.)?thisdaylive.com')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.TDL_RE.match(parts.netloc))
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(ThisDayLiveCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('.post .td-post-header .td-post-title .entry-title'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('.post .td-post-header .td-post-title time.entry-date'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('.post .td-post-content p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:3])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/washingtonpost.py b/dexter/processing/crawlers/washingtonpost.py
new file mode 100644
index 00000000..bd9be0a2
--- /dev/null
+++ b/dexter/processing/crawlers/washingtonpost.py
@@ -0,0 +1,55 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class WashingtonPostCrawler(BaseCrawler):
+    WP_RE = re.compile('(www\.)?washingtonpost.com')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.WP_RE.match(parts.netloc))
+
+    def canonicalise_url(self, url):
+        """ Strip anchors, etc."""
+
+        # Needed to handle urls being recieved without protocol (http[s]://), check if it can be parsed first, then handle and re parse if there is no netloc found
+        if '//' not in url:
+            url = '%s%s' % ('https://', url)
+
+        parts = urlparse(url)
+
+        netloc = parts.netloc.strip(':80')
+
+        # force http, strip trailing slash, anchors etc.
+        return urlunparse(['https', netloc, parts.path.rstrip('/') or '/', parts.params, parts.query, None])
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        super(WashingtonPostCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('#article-topper #topper-headline-wrapper h1'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('#main-content #article-body .pb-timestamp'))
+        doc.published_at = self.parse_timestamp(date)
+
+        #gather text and summary
+        nodes = soup.select('#main-content #article-body article.paywall > p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:2])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        author = self.extract_plaintext(soup.select('#main-content #article-body .pb-byline a'))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/crawlers/worldstage.py b/dexter/processing/crawlers/worldstage.py
new file mode 100644
index 00000000..9b2a7890
--- /dev/null
+++ b/dexter/processing/crawlers/worldstage.py
@@ -0,0 +1,44 @@
+from urlparse import urlparse, urlunparse
+import re
+
+from bs4 import BeautifulSoup
+import requests
+
+from .base import BaseCrawler
+from ...models import Entity, Author, AuthorType
+
+class WorldStageCrawler(BaseCrawler):
+    WS_RE = re.compile('worldstagegroup.com')
+
+    def offer(self, url):
+        """ Can this crawler process this URL? """
+        parts = urlparse(url)
+        return bool(self.WS_RE.match(parts.netloc))
+
+    def extract(self, doc, raw_html):
+        """ Extract text and other things from the raw_html for this document. """
+        raw_html = raw_html.encode("utf-8")
+        raw_html = unicode(raw_html, errors='ignore')
+
+        super(WorldStageCrawler, self).extract(doc, raw_html)
+
+        soup = BeautifulSoup(raw_html)
+
+        # gather title
+        doc.title = self.extract_plaintext(soup.select('#headline .fnewstitle'))
+
+        #gather publish date
+        date = self.extract_plaintext(soup.select('#headline .fndate'))
+        doc.published_at = self.parse_timestamp(date[:date.index('|')].rstrip())
+
+        #gather text and summary
+        nodes = soup.select('#headline .fnewssummary p')
+        doc.summary = "\n\n".join(p.text.strip() for p in nodes[:1])
+        doc.text = "\n\n".join(p.text.strip() for p in nodes)
+
+        # gather author 
+        author = self.extract_plaintext(soup.select('#headline .catfnauthor'))
+        if author:
+            doc.author = Author.get_or_create(author.strip(), AuthorType.journalist())
+        else:
+            doc.author = Author.unknown()
\ No newline at end of file
diff --git a/dexter/processing/document_processor.py b/dexter/processing/document_processor.py
index 7a8c756f..0dd4ef7a 100644
--- a/dexter/processing/document_processor.py
+++ b/dexter/processing/document_processor.py
@@ -38,6 +38,7 @@ def __init__(self):
             PostZambiaCrawler(),
             TimesZambiaCrawler(),
             NationKECrawler(),
+            StandardMediaKTNCrawler(),
             StandardMediaCrawler(),
             TheStarKECrawler(),
             TheEastAfricanKECrawler(),
@@ -48,10 +49,59 @@ def __init__(self):
             DWCrawler(),
             ChronicleZWCrawler(),
             BBCCrawler(),
+            HowWeMadeItInAfricaCrawler(),
+            SAVCACrawler(),
+            RhodesUniMathewBlogCrawler(),
+            WorldStageCrawler(),
+            ClassicFMCrawler(),
+            AFPCrawler(),
+            NaijaNewsCrawler(),
+            DailyTrustNPCrawler(),
+            NewTeleOnlineCrawler(),
+            ThePointCrawler(),
+            DailyTimesCrawler(),
+            TheNationCrawler(),
+            MediaMaxNetCrawler(),
+            LeadershipCrawler(),
+            TheInterviewCrawler(),
+            RSAParliamentCrawler(),
+            GuardianCrawler(),
+            NationalDailyNgCrawler(),
+            NTACrawler(),
+            ACDIVOCACrawler(),
+            ThisDayLiveCrawler(),
+            ChannelAfricaCrawler(),
+            NANCrawler(),
+            NigeriaTodayCrawler(),
+            BusinessDayOnlineCrawler(),
+            GlobalTimesCN(),
+            NationalMirrorCrawler(),
+            MonitorKECrawler(),
+            NewsvergeCrawler(),
+            SundiataPostCrawler(),
+            AgrilinksCrawler(),
+            BusinessDailyAfricaCrawler(),
+            TheBusinessPostCrawler(),
+            TheGuardianUKCrawler(),
+            IndependentNGCrawler(),
+            TheNerveAfricaCrawler(),
+            AmehNewsCrawler(),
+            SunNewsOnlineCrawler(),
+            SeedMagazineCrawler(),
+            HallmarkNewsCrawler(),
+            DestinyConnectCrawler(),
+            EconomistCrawler(),
+            WashingtonPostCrawler(),
+            AmaBhunganeCrawler(),
+            AfricaInvestorCrawler(),
+            OutrepreneursCrawler(),
+            CNBCAfricaCrawler(),
+            PlanIntlCrawler(),
+            BloombergCrawler(),
             # must come last
             GenericCrawler()]
         self.extractors = [
-            AlchemyExtractor(),
+            # AlchemyExtractor(),
             CalaisExtractor(),
             SourcesExtractor(),
             PlacesExtractor()]
@@ -170,7 +220,6 @@ def process_feed_item(self, item):
             if not url:
                 self.log.info("URL could not be parsed, ignoring: %s" % url)
                 return None
-
             existing = Document.query.filter(Document.url == url).first()
             if existing:
                 self.log.info("URL has already been processed, ignoring: %s" % url)
@@ -179,7 +228,6 @@ def process_feed_item(self, item):
             if not self.newstools_crawler.offer(url):
                 self.log.info("No medium for URL, ignoring: %s" % url)
                 return
-
             # this sets up basic info
             doc = self.newstools_crawler.crawl(item)
             try:
@@ -190,7 +238,7 @@ def process_feed_item(self, item):
                 raise ProcessingError("Error fetching document: %s" % (e,))
 
             # is it sane?
-            # TODO: this breaks for isolezwe and other non-english media
+            # TODO: this breaks for isolezwe and other non-english media'
             if not doc.text or 'the' not in doc.text:
                 self.log.info("Document %s doesn't have reasonable-looking text, ignoring: %s..." % (url, doc.text[0:100]))
                 db.session.rollback()
@@ -324,6 +372,7 @@ def __init__(self):
             PostZambiaCrawler(),
             TimesZambiaCrawler(),
             NationKECrawler(),
+            StandardMediaKTNCrawler(),
             StandardMediaCrawler(),
             TheStarKECrawler(),
             TheEastAfricanKECrawler(),
@@ -334,6 +383,55 @@ def __init__(self):
             DWCrawler(),
             ChronicleZWCrawler(),
             BBCCrawler(),
+            HowWeMadeItInAfricaCrawler(),
+            SAVCACrawler(),
+            RhodesUniMathewBlogCrawler(),
+            WorldStageCrawler(),
+            ClassicFMCrawler(),
+            AFPCrawler(),
+            NaijaNewsCrawler(),
+            DailyTrustNPCrawler(),
+            NewTeleOnlineCrawler(),
+            ThePointCrawler(),
+            DailyTimesCrawler(),
+            TheNationCrawler(),
+            MediaMaxNetCrawler(),
+            LeadershipCrawler(),
+            TheInterviewCrawler(),
+            RSAParliamentCrawler(),
+            GuardianCrawler(),
+            NationalDailyNgCrawler(),
+            NTACrawler(),
+            ACDIVOCACrawler(),
+            ThisDayLiveCrawler(),
+            ChannelAfricaCrawler(),
+            NANCrawler(),
+            NigeriaTodayCrawler(),
+            BusinessDayOnlineCrawler(),
+            GlobalTimesCN(),
+            NationalMirrorCrawler(),
+            MonitorKECrawler(),
+            NewsvergeCrawler(),
+            SundiataPostCrawler(),
+            AgrilinksCrawler(),
+            BusinessDailyAfricaCrawler(),
+            TheBusinessPostCrawler(),
+            TheGuardianUKCrawler(),
+            IndependentNGCrawler(),
+            TheNerveAfricaCrawler(),
+            AmehNewsCrawler(),
+            SunNewsOnlineCrawler(),
+            SeedMagazineCrawler(),
+            HallmarkNewsCrawler(),
+            DestinyConnectCrawler(),
+            EconomistCrawler(),
+            WashingtonPostCrawler(),
+            AmaBhunganeCrawler(),
+            AfricaInvestorCrawler(),
+            OutrepreneursCrawler(),
+            CNBCAfricaCrawler(),
+            PlanIntlCrawler(),
+            BloombergCrawler(),
             # must come last
             GenericCrawler()]
 
diff --git a/rebuild_db.py b/rebuild_db.py
index a2b670a9..1231f558 100644
--- a/rebuild_db.py
+++ b/rebuild_db.py
@@ -1,5 +1,5 @@
-# from dexter.models import db
-# from dexter.models.seeds import seed_db
-# db.drop_all()
-# db.create_all()
-# seed_db(db)
\ No newline at end of file
+from dexter.models import db
+from dexter.models.seeds import seed_db
+db.drop_all()
+db.create_all()
+seed_db(db)
\ No newline at end of file