Package intermine :: Module results
[hide private]
[frames] | no frames]

Source Code for Module intermine.results

  1  try: 
  2      import simplejson as json # Prefer this as it is faster 
  3  except ImportError: # pragma: no cover 
  4      try: 
  5          import json 
  6      except ImportError: 
  7          raise ImportError("Could not find any JSON module to import - " 
  8              + "please install simplejson or jsonlib to continue") 
  9   
 10  import urllib 
 11  import httplib 
 12  import re 
 13  import copy 
 14  import base64 
 15  from urlparse import urlparse 
 16  from itertools import groupby 
 17  import UserDict 
 18   
 19  from intermine.errors import WebserviceError 
 20  from intermine.model import Attribute, Reference, Collection 
 21   
 22  USER_AGENT = 'WebserviceInterMinePythonAPIClient' 
23 24 -class EnrichmentLine(UserDict.UserDict):
25 """ 26 An object that represents a result returned from the enrichment service. 27 ======================================================================== 28 29 These objects operate as dictionaries as well as objects with predefined 30 properties. 31 """ 32
33 - def __str__(self):
34 return str(self.data)
35
36 - def __repr__(self):
37 return "EnrichmentLine(%s)" % self.data
38
39 - def __getattr__(self, name):
40 if name is not None: 41 key_name = name.replace('_', '-') 42 if key_name in self.keys(): 43 return self.data[key_name] 44 raise AttributeError(name)
45
46 -class ResultObject(object):
47 """ 48 An object used to represent result records as returned in jsonobjects format 49 ============================================================================ 50 51 These objects are backed by a row of data and the class descriptor that 52 describes the object. They allow access in standard object style: 53 54 >>> for gene in query.results(): 55 ... print gene.symbol 56 ... print map(lambda x: x.name, gene.pathways) 57 58 All objects will have "id" and "type" properties. The type refers to the 59 actual type of this object: if it is a subclass of the one requested, the 60 subclass name will be returned. The "id" refers to the internal database id 61 of the object, and is a guarantor of object identity. 62 63 """ 64
65 - def __init__(self, data, cld, view=[]):
66 stripped = [v[v.find(".") + 1:] for v in view] 67 self.selected_attributes = [v for v in stripped if "." not in v] 68 self.reference_paths = dict(((k, list(i)) for k, i in groupby(stripped, lambda x: x[:x.find(".") + 1]))) 69 self._data = data 70 self._cld = cld if "class" not in data or cld.name == data["class"] else cld.model.get_class(data["class"]) 71 self._attr_cache = {}
72
73 - def __str__(self):
74 dont_show = set(["objectId", "class"]) 75 return "%s(%s)" % (self._cld.name, ", ".join("%s = %r" % (k, v) for k, v in self._data.items() 76 if not isinstance(v, dict) and not isinstance(v, list) and k not in dont_show))
77
78 - def __repr__(self):
79 dont_show = set(["objectId", "class"]) 80 return "%s(%s)" % (self._cld.name, ", ".join("%s = %r" % (k, getattr(self, k)) for k in self._data.keys() 81 if k not in dont_show))
82
83 - def __getattr__(self, name):
84 if name in self._attr_cache: 85 return self._attr_cache[name] 86 87 if name == "type": 88 return self._data["class"] 89 90 fld = self._cld.get_field(name) 91 attr = None 92 if isinstance(fld, Attribute): 93 if name in self._data: 94 attr = self._data[name] 95 if attr is None: 96 attr = self._fetch_attr(fld) 97 elif isinstance(fld, Reference): 98 ref_paths = self._get_ref_paths(fld) 99 if name in self._data: 100 data = self._data[name] 101 else: 102 data = self._fetch_reference(fld) 103 if isinstance(fld, Collection): 104 if data is None: 105 attr = [] 106 else: 107 attr = map(lambda x: ResultObject(x, fld.type_class, ref_paths), data) 108 else: 109 if data is None: 110 attr = None 111 else: 112 attr = ResultObject(data, fld.type_class, ref_paths) 113 else: 114 raise WebserviceError("Inconsistent model - This should never happen") 115 self._attr_cache[name] = attr 116 return attr
117
118 - def _get_ref_paths(self, fld):
119 if fld.name + "." in self.reference_paths: 120 return self.reference_paths[fld.name + "."] 121 else: 122 return []
123 124 @property
125 - def id(self):
126 """Return the internal DB identifier of this object. Or None if this is not an InterMine object""" 127 return self._data.get('objectId')
128
129 - def _fetch_attr(self, fld):
130 if fld.name in self.selected_attributes: 131 return None # Was originally selected - no point asking twice 132 c = self._cld 133 if "id" not in c: 134 return None # Cannot reliably fetch anything without access to the objectId. 135 q = c.model.service.query(c, fld).where(id = self.id) 136 r = q.first() 137 return r._data[fld.name] if fld.name in r._data else None
138
139 - def _fetch_reference(self, ref):
140 if ref.name + "." in self.reference_paths: 141 return None # Was originally selected - no point asking twice. 142 c = self._cld 143 if "id" not in c: 144 return None # Cannot reliably fetch anything without access to the objectId. 145 q = c.model.service.query(ref).outerjoin(ref).where(id = self.id) 146 r = q.first() 147 return r._data[ref.name] if ref.name in r._data else None
148
149 -class ResultRow(object):
150 """ 151 An object for representing a row of data received back from the server. 152 ======================================================================= 153 154 ResultRows provide access to the fields of the row through index lookup. However, 155 for convenience both list indexes and dictionary keys can be used. So the 156 following all work: 157 158 >>> # Assuming the view is "Gene.symbol", "Gene.organism.name": 159 >>> row[0] == row["symbol"] == row["Gene.symbol"] == row(0) == row("symbol") 160 ... True 161 162 """ 163
164 - def __init__(self, data, views):
165 self.data = data 166 self.views = views 167 self.index_map = None
168
169 - def __len__(self):
170 """Return the number of cells in this row""" 171 return len(self.data)
172
173 - def __iter__(self):
174 """Return the list view of the row, so each cell can be processed""" 175 return iter(self.to_l())
176
177 - def _get_index_for(self, key):
178 if self.index_map is None: 179 self.index_map = {} 180 for i in range(len(self.views)): 181 view = self.views[i] 182 headless_view = re.sub("^[^.]+.", "", view) 183 self.index_map[view] = i 184 self.index_map[headless_view] = i 185 186 return self.index_map[key]
187
188 - def __str__(self):
189 root = re.sub("\..*$", "", self.views[0]) 190 parts = [root + ":"] 191 for view in self.views: 192 short_form = re.sub("^[^.]+.", "", view) 193 value = self[view] 194 parts.append(short_form + "=" + repr(value)) 195 return " ".join(parts)
196
197 - def __call__(self, name):
198 return self.__getitem__(name)
199
200 - def __getitem__(self, key):
201 if isinstance(key, int): 202 return self.data[key] 203 elif isinstance(key, slice): 204 return self.data[key] 205 else: 206 index = self._get_index_for(key) 207 return self.data[index]
208
209 - def to_l(self):
210 """Return a list view of this row""" 211 return [x for x in self.data]
212 213
214 - def to_d(self):
215 """Return a dictionary view of this row""" 216 d = {} 217 for view in self.views: 218 d[view] = self[view] 219 220 return d
221
222 - def items(self):
223 return [(view, self[view]) for view in self.views]
224
225 - def iteritems(self):
226 for view in self.views: 227 yield (view, self[view])
228
229 - def keys(self):
230 return copy.copy(self.views)
231
232 - def values(self):
233 return self.to_l()
234
235 - def itervalues(self):
236 return iter(self.to_l())
237
238 - def iterkeys(self):
239 return iter(self.views)
240
241 - def has_key(self, key):
242 try: 243 self._get_index_for(key) 244 return True 245 except KeyError: 246 return False
247
248 -class TableResultRow(ResultRow):
249 """ 250 A class for parsing results from the jsonrows data format. 251 """ 252
253 - def __getitem__(self, key):
254 if isinstance(key, int): 255 return self.data[key]["value"] 256 elif isinstance(key, slice): 257 vals = map(lambda x: x["value"], self.data[key]) 258 return vals 259 else: 260 index = self._get_index_for(key) 261 return self.data[index]["value"]
262
263 - def to_l(self):
264 """Return a list view of this row""" 265 return map(lambda x: x["value"], self.data)
266
267 -class ResultIterator(object):
268 """ 269 A facade over the internal iterator object 270 ========================================== 271 272 These objects handle the iteration over results 273 in the formats requested by the user. They are responsible 274 for generating an appropriate parser, 275 connecting the parser to the results, and delegating 276 iteration appropriately. 277 """ 278 279 PARSED_FORMATS = frozenset(["rr", "list", "dict"]) 280 STRING_FORMATS = frozenset(["tsv", "csv", "count"]) 281 JSON_FORMATS = frozenset(["jsonrows", "jsonobjects", "json"]) 282 ROW_FORMATS = PARSED_FORMATS | STRING_FORMATS | JSON_FORMATS 283
284 - def __init__(self, service, path, params, rowformat, view, cld=None):
285 """ 286 Constructor 287 =========== 288 289 Services are responsible for getting result iterators. You will 290 not need to create one manually. 291 292 @param root: The root path (eg: "http://www.flymine.org/query/service") 293 @type root: string 294 @param path: The resource path (eg: "/query/results") 295 @type path: string 296 @param params: The query parameters for this request 297 @type params: dict 298 @param rowformat: One of "rr", "object", "count", "dict", "list", "tsv", "csv", "jsonrows", "jsonobjects", "json" 299 @type rowformat: string 300 @param view: The output columns 301 @type view: list 302 @param opener: A url opener (user-agent) 303 @type opener: urllib.URLopener 304 305 @raise ValueError: if the row format is incorrect 306 @raise WebserviceError: if the request is unsuccessful 307 """ 308 if rowformat.startswith("object"): # Accept "object", "objects", "objectformat", etc... 309 rowformat = "jsonobjects" # these are synonymous 310 if rowformat not in self.ROW_FORMATS: 311 raise ValueError("'%s' is not one of the valid row formats (%s)" 312 % (rowformat, repr(list(self.ROW_FORMATS)))) 313 314 self.row = ResultRow if service.version >= 8 else TableResultRow 315 316 if rowformat in self.PARSED_FORMATS: 317 if service.version >= 8: 318 params.update({"format": "json"}) 319 else: 320 params.update({"format" : "jsonrows"}) 321 elif rowformat == 'tsv': 322 params.update({"format": "tab"}) 323 else: 324 params.update({"format" : rowformat}) 325 326 self.url = service.root + path 327 self.data = urllib.urlencode(params) 328 self.view = view 329 self.opener = service.opener 330 self.cld = cld 331 self.rowformat = rowformat 332 self._it = None
333
334 - def __len__(self):
335 """ 336 Return the number of items in this iterator 337 =========================================== 338 339 Note that this requires iterating over the full result set. 340 """ 341 c = 0 342 for x in self: 343 c += 1 344 return c
345
346 - def __iter__(self):
347 """ 348 Return an iterator over the results 349 =================================== 350 351 Returns the internal iterator object. 352 """ 353 con = self.opener.open(self.url, self.data) 354 identity = lambda x: x 355 flat_file_parser = lambda: FlatFileIterator(con, identity) 356 simple_json_parser = lambda: JSONIterator(con, identity) 357 358 try: 359 reader = { 360 "tsv" : flat_file_parser, 361 "csv" : flat_file_parser, 362 "count" : flat_file_parser, 363 "json" : simple_json_parser, 364 "jsonrows" : simple_json_parser, 365 "list" : lambda: JSONIterator(con, lambda x: self.row(x, self.view).to_l()), 366 "rr" : lambda: JSONIterator(con, lambda x: self.row(x, self.view)), 367 "dict" : lambda: JSONIterator(con, lambda x: self.row(x, self.view).to_d()), 368 "jsonobjects" : lambda: JSONIterator(con, lambda x: ResultObject(x, self.cld, self.view)) 369 }.get(self.rowformat)() 370 except Exception, e: 371 raise Exception("Couldn't get iterator for " + self.rowformat + str(e)) 372 return reader
373
374 - def next(self):
375 """ 376 Returns the next row, in the appropriate format 377 378 @rtype: whatever the rowformat was determined to be 379 """ 380 if self._it is None: 381 self._it = iter(self) 382 try: 383 return self._it.next() 384 except StopIteration: 385 self._it = None 386 raise StopIteration
387
388 -class FlatFileIterator(object):
389 """ 390 An iterator for handling results returned as a flat file (TSV/CSV). 391 =================================================================== 392 393 This iterator can be used as the sub iterator in a ResultIterator 394 """ 395
396 - def __init__(self, connection, parser):
397 """ 398 Constructor 399 =========== 400 401 @param connection: The source of data 402 @type connection: socket.socket 403 @param parser: a handler for each row of data 404 @type parser: Parser 405 """ 406 self.connection = connection 407 self.parser = parser
408
409 - def __iter__(self):
410 return self
411
412 - def next(self):
413 """Return a parsed line of data""" 414 line = self.connection.next().strip() 415 if line.startswith("[ERROR]"): 416 raise WebserviceError(line) 417 return self.parser(line)
418
419 -class JSONIterator(object):
420 """ 421 An iterator for handling results returned in the JSONRows format 422 ================================================================ 423 424 This iterator can be used as the sub iterator in a ResultIterator 425 """ 426
427 - def __init__(self, connection, parser):
428 """ 429 Constructor 430 =========== 431 432 @param connection: The source of data 433 @type connection: socket.socket 434 @param parser: a handler for each row of data 435 @type parser: Parser 436 """ 437 self.connection = connection 438 self.parser = parser 439 self.header = "" 440 self.footer = "" 441 self.parse_header() 442 self._is_finished = False
443
444 - def __iter__(self):
445 return self
446
447 - def next(self):
448 """Returns a parsed row of data""" 449 if self._is_finished: 450 raise StopIteration 451 return self.get_next_row_from_connection()
452
453 - def parse_header(self):
454 """Reads out the header information from the connection""" 455 try: 456 line = self.connection.next().strip() 457 self.header += line 458 if not line.endswith('"results":['): 459 self.parse_header() 460 except StopIteration: 461 raise WebserviceError("The connection returned a bad header" + self.header)
462
463 - def check_return_status(self):
464 """ 465 Perform status checks 466 ===================== 467 468 The footer containts information as to whether the result 469 set was successfully transferred in its entirety. This 470 method makes sure we don't silently accept an 471 incomplete result set. 472 473 @raise WebserviceError: if the footer indicates there was an error 474 """ 475 container = self.header + self.footer 476 info = None 477 try: 478 info = json.loads(container) 479 except: 480 raise WebserviceError("Error parsing JSON container: " + container) 481 482 if not info["wasSuccessful"]: 483 raise WebserviceError(info["statusCode"], info["error"])
484
486 """ 487 Reads the connection to get the next row, and sends it to the parser 488 489 @raise WebserviceError: if the connection is interrupted 490 """ 491 next_row = None 492 try: 493 line = self.connection.next() 494 if line.startswith("]"): 495 self.footer += line; 496 for otherline in self.connection: 497 self.footer += line 498 self.check_return_status() 499 else: 500 line = line.strip().strip(',') 501 if len(line) > 0: 502 try: 503 row = json.loads(line) 504 except json.decoder.JSONDecodeError, e: 505 raise WebserviceError("Error parsing line from results: '" 506 + line + "' - " + str(e)) 507 next_row = self.parser(row) 508 except StopIteration: 509 raise WebserviceError("Connection interrupted") 510 511 if next_row is None: 512 self._is_finished = True 513 raise StopIteration 514 else: 515 return next_row
516
517 -class InterMineURLOpener(urllib.FancyURLopener):
518 """ 519 Specific implementation of urllib.FancyURLOpener for this client 520 ================================================================ 521 522 Provides user agent and authentication headers, and handling of errors 523 """ 524 version = "InterMine-Python-Client-1.07.01" 525 PLAIN_TEXT = "text/plain" 526 JSON = "application/json" 527
528 - def __init__(self, credentials=None, token=None):
529 """ 530 Constructor 531 =========== 532 533 InterMineURLOpener((username, password)) S{->} InterMineURLOpener 534 535 Return a new url-opener with the appropriate credentials 536 """ 537 urllib.FancyURLopener.__init__(self) 538 self.token = token 539 self.plain_post_header = { 540 "Content-Type": "text/plain; charset=utf-8", 541 "UserAgent": USER_AGENT 542 } 543 if credentials and len(credentials) == 2: 544 base64string = base64.encodestring('%s:%s' % credentials)[:-1] 545 self.addheader("Authorization", base64string) 546 self.plain_post_header["Authorization"] = base64string 547 self.using_authentication = True 548 else: 549 self.using_authentication = False
550
551 - def post_plain_text(self, url, body):
552 return self.post_content(url, body, InterMineURLOpener.PLAIN_TEXT)
553
554 - def post_content(self, url, body, mimetype, charset = "utf-8"):
555 headers = { 556 "Content-Type": "%s; charset=%s" % (mimetype, charset), 557 "UserAgent": USER_AGENT 558 } 559 url = self.prepare_url(url) 560 o = urlparse(url) 561 con = httplib.HTTPConnection(o.hostname, o.port) 562 con.request('POST', url, body, headers) 563 resp = con.getresponse() 564 content = resp.read() 565 con.close() 566 if resp.status != 200: 567 raise WebserviceError(resp.status, resp.reason, content) 568 return content
569
570 - def open(self, url, data=None):
571 url = self.prepare_url(url) 572 return urllib.FancyURLopener.open(self, url, data)
573
574 - def prepare_url(self, url):
575 if self.token: 576 token_param = "token=" + self.token 577 o = urlparse(url) 578 if o.query: 579 url += "&" + token_param 580 else: 581 url += "?" + token_param 582 583 return url
584
585 - def delete(self, url):
586 url = self.prepare_url(url) 587 o = urlparse(url) 588 con = httplib.HTTPConnection(o.hostname, o.port) 589 con.request('DELETE', url, None, self.plain_post_header) 590 resp = con.getresponse() 591 content = resp.read() 592 con.close() 593 if resp.status != 200: 594 raise WebserviceError(resp.status, resp.reason, content) 595 return content
596
597 - def http_error_default(self, url, fp, errcode, errmsg, headers):
598 """Re-implementation of http_error_default, with content now supplied by default""" 599 content = fp.read() 600 fp.close() 601 raise WebserviceError(errcode, errmsg, content)
602
603 - def http_error_400(self, url, fp, errcode, errmsg, headers, data=None):
604 """ 605 Handle 400 HTTP errors, attempting to return informative error messages 606 ======================================================================= 607 608 400 errors indicate that something about our request was incorrect 609 610 @raise WebserviceError: in all circumstances 611 612 """ 613 content = fp.read() 614 fp.close() 615 try: 616 message = json.loads(content)["error"] 617 except: 618 message = content 619 raise WebserviceError("There was a problem with our request", errcode, errmsg, message)
620
621 - def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
622 """ 623 Handle 401 HTTP errors, attempting to return informative error messages 624 ======================================================================= 625 626 401 errors indicate we don't have sufficient permission for the resource 627 we requested - usually a list or a tempate 628 629 @raise WebserviceError: in all circumstances 630 631 """ 632 content = fp.read() 633 fp.close() 634 if self.using_authentication: 635 raise WebserviceError("Insufficient permissions", errcode, errmsg, content) 636 else: 637 raise WebserviceError("No permissions - not logged in", errcode, errmsg, content)
638
639 - def http_error_403(self, url, fp, errcode, errmsg, headers, data=None):
640 """ 641 Handle 403 HTTP errors, attempting to return informative error messages 642 ======================================================================= 643 644 401 errors indicate we don't have sufficient permission for the resource 645 we requested - usually a list or a tempate 646 647 @raise WebserviceError: in all circumstances 648 649 """ 650 content = fp.read() 651 fp.close() 652 try: 653 message = json.loads(content)["error"] 654 except: 655 message = content 656 if self.using_authentication: 657 raise WebserviceError("Insufficient permissions", errcode, errmsg, message) 658 else: 659 raise WebserviceError("No permissions - not logged in", errcode, errmsg, message)
660
661 - def http_error_404(self, url, fp, errcode, errmsg, headers, data=None):
662 """ 663 Handle 404 HTTP errors, attempting to return informative error messages 664 ======================================================================= 665 666 404 errors indicate that the requested resource does not exist - usually 667 a template that is not longer available. 668 669 @raise WebserviceError: in all circumstances 670 671 """ 672 content = fp.read() 673 fp.close() 674 try: 675 message = json.loads(content)["error"] 676 except: 677 message = content 678 raise WebserviceError("Missing resource", errcode, errmsg, message)
679 - def http_error_500(self, url, fp, errcode, errmsg, headers, data=None):
680 """ 681 Handle 500 HTTP errors, attempting to return informative error messages 682 ======================================================================= 683 684 500 errors indicate that the server borked during the request - ie: it wasn't 685 our fault. 686 687 @raise WebserviceError: in all circumstances 688 689 """ 690 content = fp.read() 691 fp.close() 692 try: 693 message = json.loads(content)["error"] 694 except: 695 message = content 696 raise WebserviceError("Internal server error", errcode, errmsg, message)
697