1 try:
2 import simplejson as json
3 except ImportError:
4 try:
5 import json
6 except ImportError:
7 raise ImportError("Could not find any JSON module to import - "
8 + "please install simplejson or jsonlib to continue")
9
10 import urllib
11 import httplib
12 import re
13 import copy
14 import base64
15 from urlparse import urlparse
16 from itertools import groupby
17 import UserDict
18
19 from intermine.errors import WebserviceError
20 from intermine.model import Attribute, Reference, Collection
21
22 USER_AGENT = 'WebserviceInterMinePythonAPIClient'
25 """
26 An object that represents a result returned from the enrichment service.
27 ========================================================================
28
29 These objects operate as dictionaries as well as objects with predefined
30 properties.
31 """
32
35
37 return "EnrichmentLine(%s)" % self.data
38
40 if name is not None:
41 key_name = name.replace('_', '-')
42 if key_name in self.keys():
43 return self.data[key_name]
44 raise AttributeError(name)
45
47 """
48 An object used to represent result records as returned in jsonobjects format
49 ============================================================================
50
51 These objects are backed by a row of data and the class descriptor that
52 describes the object. They allow access in standard object style:
53
54 >>> for gene in query.results():
55 ... print gene.symbol
56 ... print map(lambda x: x.name, gene.pathways)
57
58 All objects will have "id" and "type" properties. The type refers to the
59 actual type of this object: if it is a subclass of the one requested, the
60 subclass name will be returned. The "id" refers to the internal database id
61 of the object, and is a guarantor of object identity.
62
63 """
64
66 stripped = [v[v.find(".") + 1:] for v in view]
67 self.selected_attributes = [v for v in stripped if "." not in v]
68 self.reference_paths = dict(((k, list(i)) for k, i in groupby(stripped, lambda x: x[:x.find(".") + 1])))
69 self._data = data
70 self._cld = cld if "class" not in data or cld.name == data["class"] else cld.model.get_class(data["class"])
71 self._attr_cache = {}
72
74 dont_show = set(["objectId", "class"])
75 return "%s(%s)" % (self._cld.name, ", ".join("%s = %r" % (k, v) for k, v in self._data.items()
76 if not isinstance(v, dict) and not isinstance(v, list) and k not in dont_show))
77
79 dont_show = set(["objectId", "class"])
80 return "%s(%s)" % (self._cld.name, ", ".join("%s = %r" % (k, getattr(self, k)) for k in self._data.keys()
81 if k not in dont_show))
82
84 if name in self._attr_cache:
85 return self._attr_cache[name]
86
87 if name == "type":
88 return self._data["class"]
89
90 fld = self._cld.get_field(name)
91 attr = None
92 if isinstance(fld, Attribute):
93 if name in self._data:
94 attr = self._data[name]
95 if attr is None:
96 attr = self._fetch_attr(fld)
97 elif isinstance(fld, Reference):
98 ref_paths = self._get_ref_paths(fld)
99 if name in self._data:
100 data = self._data[name]
101 else:
102 data = self._fetch_reference(fld)
103 if isinstance(fld, Collection):
104 if data is None:
105 attr = []
106 else:
107 attr = map(lambda x: ResultObject(x, fld.type_class, ref_paths), data)
108 else:
109 if data is None:
110 attr = None
111 else:
112 attr = ResultObject(data, fld.type_class, ref_paths)
113 else:
114 raise WebserviceError("Inconsistent model - This should never happen")
115 self._attr_cache[name] = attr
116 return attr
117
119 if fld.name + "." in self.reference_paths:
120 return self.reference_paths[fld.name + "."]
121 else:
122 return []
123
124 @property
126 """Return the internal DB identifier of this object. Or None if this is not an InterMine object"""
127 return self._data.get('objectId')
128
130 if fld.name in self.selected_attributes:
131 return None
132 c = self._cld
133 if "id" not in c:
134 return None
135 q = c.model.service.query(c, fld).where(id = self.id)
136 r = q.first()
137 return r._data[fld.name] if fld.name in r._data else None
138
140 if ref.name + "." in self.reference_paths:
141 return None
142 c = self._cld
143 if "id" not in c:
144 return None
145 q = c.model.service.query(ref).outerjoin(ref).where(id = self.id)
146 r = q.first()
147 return r._data[ref.name] if ref.name in r._data else None
148
150 """
151 An object for representing a row of data received back from the server.
152 =======================================================================
153
154 ResultRows provide access to the fields of the row through index lookup. However,
155 for convenience both list indexes and dictionary keys can be used. So the
156 following all work:
157
158 >>> # Assuming the view is "Gene.symbol", "Gene.organism.name":
159 >>> row[0] == row["symbol"] == row["Gene.symbol"] == row(0) == row("symbol")
160 ... True
161
162 """
163
165 self.data = data
166 self.views = views
167 self.index_map = None
168
170 """Return the number of cells in this row"""
171 return len(self.data)
172
174 """Return the list view of the row, so each cell can be processed"""
175 return iter(self.to_l())
176
178 if self.index_map is None:
179 self.index_map = {}
180 for i in range(len(self.views)):
181 view = self.views[i]
182 headless_view = re.sub("^[^.]+.", "", view)
183 self.index_map[view] = i
184 self.index_map[headless_view] = i
185
186 return self.index_map[key]
187
189 root = re.sub("\..*$", "", self.views[0])
190 parts = [root + ":"]
191 for view in self.views:
192 short_form = re.sub("^[^.]+.", "", view)
193 value = self[view]
194 parts.append(short_form + "=" + repr(value))
195 return " ".join(parts)
196
199
201 if isinstance(key, int):
202 return self.data[key]
203 elif isinstance(key, slice):
204 return self.data[key]
205 else:
206 index = self._get_index_for(key)
207 return self.data[index]
208
210 """Return a list view of this row"""
211 return [x for x in self.data]
212
213
215 """Return a dictionary view of this row"""
216 d = {}
217 for view in self.views:
218 d[view] = self[view]
219
220 return d
221
223 return [(view, self[view]) for view in self.views]
224
226 for view in self.views:
227 yield (view, self[view])
228
230 return copy.copy(self.views)
231
234
236 return iter(self.to_l())
237
239 return iter(self.views)
240
242 try:
243 self._get_index_for(key)
244 return True
245 except KeyError:
246 return False
247
249 """
250 A class for parsing results from the jsonrows data format.
251 """
252
254 if isinstance(key, int):
255 return self.data[key]["value"]
256 elif isinstance(key, slice):
257 vals = map(lambda x: x["value"], self.data[key])
258 return vals
259 else:
260 index = self._get_index_for(key)
261 return self.data[index]["value"]
262
264 """Return a list view of this row"""
265 return map(lambda x: x["value"], self.data)
266
268 """
269 A facade over the internal iterator object
270 ==========================================
271
272 These objects handle the iteration over results
273 in the formats requested by the user. They are responsible
274 for generating an appropriate parser,
275 connecting the parser to the results, and delegating
276 iteration appropriately.
277 """
278
279 PARSED_FORMATS = frozenset(["rr", "list", "dict"])
280 STRING_FORMATS = frozenset(["tsv", "csv", "count"])
281 JSON_FORMATS = frozenset(["jsonrows", "jsonobjects", "json"])
282 ROW_FORMATS = PARSED_FORMATS | STRING_FORMATS | JSON_FORMATS
283
284 - def __init__(self, service, path, params, rowformat, view, cld=None):
285 """
286 Constructor
287 ===========
288
289 Services are responsible for getting result iterators. You will
290 not need to create one manually.
291
292 @param root: The root path (eg: "http://www.flymine.org/query/service")
293 @type root: string
294 @param path: The resource path (eg: "/query/results")
295 @type path: string
296 @param params: The query parameters for this request
297 @type params: dict
298 @param rowformat: One of "rr", "object", "count", "dict", "list", "tsv", "csv", "jsonrows", "jsonobjects", "json"
299 @type rowformat: string
300 @param view: The output columns
301 @type view: list
302 @param opener: A url opener (user-agent)
303 @type opener: urllib.URLopener
304
305 @raise ValueError: if the row format is incorrect
306 @raise WebserviceError: if the request is unsuccessful
307 """
308 if rowformat.startswith("object"):
309 rowformat = "jsonobjects"
310 if rowformat not in self.ROW_FORMATS:
311 raise ValueError("'%s' is not one of the valid row formats (%s)"
312 % (rowformat, repr(list(self.ROW_FORMATS))))
313
314 self.row = ResultRow if service.version >= 8 else TableResultRow
315
316 if rowformat in self.PARSED_FORMATS:
317 if service.version >= 8:
318 params.update({"format": "json"})
319 else:
320 params.update({"format" : "jsonrows"})
321 elif rowformat == 'tsv':
322 params.update({"format": "tab"})
323 else:
324 params.update({"format" : rowformat})
325
326 self.url = service.root + path
327 self.data = urllib.urlencode(params)
328 self.view = view
329 self.opener = service.opener
330 self.cld = cld
331 self.rowformat = rowformat
332 self._it = None
333
335 """
336 Return the number of items in this iterator
337 ===========================================
338
339 Note that this requires iterating over the full result set.
340 """
341 c = 0
342 for x in self:
343 c += 1
344 return c
345
347 """
348 Return an iterator over the results
349 ===================================
350
351 Returns the internal iterator object.
352 """
353 con = self.opener.open(self.url, self.data)
354 identity = lambda x: x
355 flat_file_parser = lambda: FlatFileIterator(con, identity)
356 simple_json_parser = lambda: JSONIterator(con, identity)
357
358 try:
359 reader = {
360 "tsv" : flat_file_parser,
361 "csv" : flat_file_parser,
362 "count" : flat_file_parser,
363 "json" : simple_json_parser,
364 "jsonrows" : simple_json_parser,
365 "list" : lambda: JSONIterator(con, lambda x: self.row(x, self.view).to_l()),
366 "rr" : lambda: JSONIterator(con, lambda x: self.row(x, self.view)),
367 "dict" : lambda: JSONIterator(con, lambda x: self.row(x, self.view).to_d()),
368 "jsonobjects" : lambda: JSONIterator(con, lambda x: ResultObject(x, self.cld, self.view))
369 }.get(self.rowformat)()
370 except Exception, e:
371 raise Exception("Couldn't get iterator for " + self.rowformat + str(e))
372 return reader
373
375 """
376 Returns the next row, in the appropriate format
377
378 @rtype: whatever the rowformat was determined to be
379 """
380 if self._it is None:
381 self._it = iter(self)
382 try:
383 return self._it.next()
384 except StopIteration:
385 self._it = None
386 raise StopIteration
387
389 """
390 An iterator for handling results returned as a flat file (TSV/CSV).
391 ===================================================================
392
393 This iterator can be used as the sub iterator in a ResultIterator
394 """
395
396 - def __init__(self, connection, parser):
397 """
398 Constructor
399 ===========
400
401 @param connection: The source of data
402 @type connection: socket.socket
403 @param parser: a handler for each row of data
404 @type parser: Parser
405 """
406 self.connection = connection
407 self.parser = parser
408
411
413 """Return a parsed line of data"""
414 line = self.connection.next().strip()
415 if line.startswith("[ERROR]"):
416 raise WebserviceError(line)
417 return self.parser(line)
418
420 """
421 An iterator for handling results returned in the JSONRows format
422 ================================================================
423
424 This iterator can be used as the sub iterator in a ResultIterator
425 """
426
427 - def __init__(self, connection, parser):
428 """
429 Constructor
430 ===========
431
432 @param connection: The source of data
433 @type connection: socket.socket
434 @param parser: a handler for each row of data
435 @type parser: Parser
436 """
437 self.connection = connection
438 self.parser = parser
439 self.header = ""
440 self.footer = ""
441 self.parse_header()
442 self._is_finished = False
443
446
452
454 """Reads out the header information from the connection"""
455 try:
456 line = self.connection.next().strip()
457 self.header += line
458 if not line.endswith('"results":['):
459 self.parse_header()
460 except StopIteration:
461 raise WebserviceError("The connection returned a bad header" + self.header)
462
464 """
465 Perform status checks
466 =====================
467
468 The footer containts information as to whether the result
469 set was successfully transferred in its entirety. This
470 method makes sure we don't silently accept an
471 incomplete result set.
472
473 @raise WebserviceError: if the footer indicates there was an error
474 """
475 container = self.header + self.footer
476 info = None
477 try:
478 info = json.loads(container)
479 except:
480 raise WebserviceError("Error parsing JSON container: " + container)
481
482 if not info["wasSuccessful"]:
483 raise WebserviceError(info["statusCode"], info["error"])
484
486 """
487 Reads the connection to get the next row, and sends it to the parser
488
489 @raise WebserviceError: if the connection is interrupted
490 """
491 next_row = None
492 try:
493 line = self.connection.next()
494 if line.startswith("]"):
495 self.footer += line;
496 for otherline in self.connection:
497 self.footer += line
498 self.check_return_status()
499 else:
500 line = line.strip().strip(',')
501 if len(line) > 0:
502 try:
503 row = json.loads(line)
504 except json.decoder.JSONDecodeError, e:
505 raise WebserviceError("Error parsing line from results: '"
506 + line + "' - " + str(e))
507 next_row = self.parser(row)
508 except StopIteration:
509 raise WebserviceError("Connection interrupted")
510
511 if next_row is None:
512 self._is_finished = True
513 raise StopIteration
514 else:
515 return next_row
516
518 """
519 Specific implementation of urllib.FancyURLOpener for this client
520 ================================================================
521
522 Provides user agent and authentication headers, and handling of errors
523 """
524 version = "InterMine-Python-Client-1.07.01"
525 PLAIN_TEXT = "text/plain"
526 JSON = "application/json"
527
528 - def __init__(self, credentials=None, token=None):
529 """
530 Constructor
531 ===========
532
533 InterMineURLOpener((username, password)) S{->} InterMineURLOpener
534
535 Return a new url-opener with the appropriate credentials
536 """
537 urllib.FancyURLopener.__init__(self)
538 self.token = token
539 self.plain_post_header = {
540 "Content-Type": "text/plain; charset=utf-8",
541 "UserAgent": USER_AGENT
542 }
543 if credentials and len(credentials) == 2:
544 base64string = base64.encodestring('%s:%s' % credentials)[:-1]
545 self.addheader("Authorization", base64string)
546 self.plain_post_header["Authorization"] = base64string
547 self.using_authentication = True
548 else:
549 self.using_authentication = False
550
551 - def post_plain_text(self, url, body):
553
554 - def post_content(self, url, body, mimetype, charset = "utf-8"):
555 headers = {
556 "Content-Type": "%s; charset=%s" % (mimetype, charset),
557 "UserAgent": USER_AGENT
558 }
559 url = self.prepare_url(url)
560 o = urlparse(url)
561 con = httplib.HTTPConnection(o.hostname, o.port)
562 con.request('POST', url, body, headers)
563 resp = con.getresponse()
564 content = resp.read()
565 con.close()
566 if resp.status != 200:
567 raise WebserviceError(resp.status, resp.reason, content)
568 return content
569
570 - def open(self, url, data=None):
571 url = self.prepare_url(url)
572 return urllib.FancyURLopener.open(self, url, data)
573
575 if self.token:
576 token_param = "token=" + self.token
577 o = urlparse(url)
578 if o.query:
579 url += "&" + token_param
580 else:
581 url += "?" + token_param
582
583 return url
584
586 url = self.prepare_url(url)
587 o = urlparse(url)
588 con = httplib.HTTPConnection(o.hostname, o.port)
589 con.request('DELETE', url, None, self.plain_post_header)
590 resp = con.getresponse()
591 content = resp.read()
592 con.close()
593 if resp.status != 200:
594 raise WebserviceError(resp.status, resp.reason, content)
595 return content
596
598 """Re-implementation of http_error_default, with content now supplied by default"""
599 content = fp.read()
600 fp.close()
601 raise WebserviceError(errcode, errmsg, content)
602
603 - def http_error_400(self, url, fp, errcode, errmsg, headers, data=None):
604 """
605 Handle 400 HTTP errors, attempting to return informative error messages
606 =======================================================================
607
608 400 errors indicate that something about our request was incorrect
609
610 @raise WebserviceError: in all circumstances
611
612 """
613 content = fp.read()
614 fp.close()
615 try:
616 message = json.loads(content)["error"]
617 except:
618 message = content
619 raise WebserviceError("There was a problem with our request", errcode, errmsg, message)
620
621 - def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
622 """
623 Handle 401 HTTP errors, attempting to return informative error messages
624 =======================================================================
625
626 401 errors indicate we don't have sufficient permission for the resource
627 we requested - usually a list or a tempate
628
629 @raise WebserviceError: in all circumstances
630
631 """
632 content = fp.read()
633 fp.close()
634 if self.using_authentication:
635 raise WebserviceError("Insufficient permissions", errcode, errmsg, content)
636 else:
637 raise WebserviceError("No permissions - not logged in", errcode, errmsg, content)
638
639 - def http_error_403(self, url, fp, errcode, errmsg, headers, data=None):
640 """
641 Handle 403 HTTP errors, attempting to return informative error messages
642 =======================================================================
643
644 401 errors indicate we don't have sufficient permission for the resource
645 we requested - usually a list or a tempate
646
647 @raise WebserviceError: in all circumstances
648
649 """
650 content = fp.read()
651 fp.close()
652 try:
653 message = json.loads(content)["error"]
654 except:
655 message = content
656 if self.using_authentication:
657 raise WebserviceError("Insufficient permissions", errcode, errmsg, message)
658 else:
659 raise WebserviceError("No permissions - not logged in", errcode, errmsg, message)
660
661 - def http_error_404(self, url, fp, errcode, errmsg, headers, data=None):
662 """
663 Handle 404 HTTP errors, attempting to return informative error messages
664 =======================================================================
665
666 404 errors indicate that the requested resource does not exist - usually
667 a template that is not longer available.
668
669 @raise WebserviceError: in all circumstances
670
671 """
672 content = fp.read()
673 fp.close()
674 try:
675 message = json.loads(content)["error"]
676 except:
677 message = content
678 raise WebserviceError("Missing resource", errcode, errmsg, message)
679 - def http_error_500(self, url, fp, errcode, errmsg, headers, data=None):
680 """
681 Handle 500 HTTP errors, attempting to return informative error messages
682 =======================================================================
683
684 500 errors indicate that the server borked during the request - ie: it wasn't
685 our fault.
686
687 @raise WebserviceError: in all circumstances
688
689 """
690 content = fp.read()
691 fp.close()
692 try:
693 message = json.loads(content)["error"]
694 except:
695 message = content
696 raise WebserviceError("Internal server error", errcode, errmsg, message)
697