# Challenge, return number of matches within a page import mpi import urllib # what we are searching for inputs = [ "seidel", "beck", "loni", "eastman", "brandt"] # collection of web pages our search engine knows about pages = [ 'http://www.cct.lsu.edu/about/overview/', 'http://www.cct.lsu.edu/projects/Coastal_Modeling/', 'http://www.cct.lsu.edu/projects/SURASCOOP/', 'http://www.cct.lsu.edu/projects/CactusCode/', 'http://www.cct.lsu.edu/about/employment/employment.php', 'http://www.cct.lsu.edu/projects/GridChemCCG/', 'http://www.cct.lsu.edu/about/people/faculty/all.php', 'http://www.cct.lsu.edu/about/focus/', 'http://www.cct.lsu.edu/projects/Enlightened/', 'http://www.loni.org/', 'http://www.loni.org/plan/', 'http://www.cct.lsu.edu/news/news/289'] n = len(pages)/mpi.size ilo = mpi.rank*n ihi = (mpi.rank+1)*n-1 # each mpi proc searches a subset of pages from ilo to ihi c = range(ihi+1) for i in range(ilo,ihi+1): c[i] = urllib.urlopen(pages[i]).read().lower() for input in inputs: matches = [] for i in range(ilo,ihi+1): if c[i].find(input) >= 0: matches.append(pages[i]) # proc zero receives the results of all searches if mpi.rank == 0: for i in range(1,mpi.size): other_matches = mpi.recv(mpi.ANY_SOURCE)[0] for match in other_matches: matches.append(match) else: mpi.send(matches,0) if mpi.rank == 0: print input,len(matches)