Source code for pycounter.sushi

"""NISO SUSHI support."""
from __future__ import absolute_import

import collections
import datetime
import logging
import time
import uuid
import warnings

from lxml import etree
from lxml import objectify
import pendulum
import requests
import six

from pycounter import sushi5
import pycounter.constants
import pycounter.exceptions
from pycounter.helpers import convert_date_run
import pycounter.report


logger = logging.getLogger(__name__)
NS = pycounter.constants.NS


[docs]def get_sushi_stats_raw( wsdl_url, start_date, end_date, requestor_id=None, requestor_email=None, requestor_name=None, customer_reference=None, customer_name=None, report="JR1", release=4, sushi_dump=False, verify=True, **extra_params ): """Get SUSHI stats for a given site in raw XML format. :param wsdl_url: URL to SOAP WSDL for this provider :param start_date: start date for report (must be first day of a month) :param end_date: end date for report (must be last day of a month) :param requestor_id: requestor ID as defined by SUSHI protocol :param requestor_email: requestor email address, if required by provider :param requestor_name: Internationally recognized organization name :param customer_reference: customer reference number as defined by SUSHI protocol :param customer_name: Internationally recognized organization name :param report: report type, values defined by SUSHI protocol :param release: report release number (should generally be `4`.) :param sushi_dump: produces dump of XML (or JSON, for COUNTER 5) to DEBUG logger :param verify: bool: whether to verify SSL certificates :param extra_params: extra params are passed to requests.post """ # pylint: disable=too-many-locals root = etree.Element("{%(SOAP-ENV)s}Envelope" % NS, nsmap=NS) body = etree.SubElement(root, "{%(SOAP-ENV)s}Body" % NS) timestamp = pendulum.now("UTC").isoformat() report_req = etree.SubElement( body, "{%(sushicounter)s}ReportRequest" % NS, {"Created": timestamp, "ID": str(uuid.uuid4())}, ) req = etree.SubElement(report_req, "{%(sushi)s}Requestor" % NS) rid = etree.SubElement(req, "{%(sushi)s}ID" % NS) rid.text = requestor_id req_name_element = etree.SubElement(req, "{%(sushi)s}Name" % NS) req_name_element.text = requestor_name req_email_element = etree.SubElement(req, "{%(sushi)s}Email" % NS) req_email_element.text = requestor_email cust_ref_elem = etree.SubElement(report_req, "{%(sushi)s}CustomerReference" % NS) cid = etree.SubElement(cust_ref_elem, "{%(sushi)s}ID" % NS) cid.text = customer_reference cust_name_elem = etree.SubElement(cust_ref_elem, "{%(sushi)s}Name" % NS) cust_name_elem.text = customer_name report_def_elem = etree.SubElement( report_req, "{%(sushi)s}ReportDefinition" % NS, Name=report, Release=str(release), ) filters = etree.SubElement(report_def_elem, "{%(sushi)s}Filters" % NS) udr = etree.SubElement(filters, "{%(sushi)s}UsageDateRange" % NS) beg = etree.SubElement(udr, "{%(sushi)s}Begin" % NS) beg.text = start_date.strftime("%Y-%m-%d") end = etree.SubElement(udr, "{%(sushi)s}End" % NS) end.text = end_date.strftime("%Y-%m-%d") payload = etree.tostring( root, pretty_print=True, xml_declaration=True, encoding="utf-8" ) headers = { "SOAPAction": '"SushiService:GetReportIn"', "Content-Type": "text/xml; charset=UTF-8", "User-Agent": "pycounter/%s" % pycounter.__version__, "Content-Length": str(len(payload)), } response = requests.post( url=wsdl_url, headers=headers, data=payload, verify=verify, **extra_params ) if sushi_dump: logger.debug( "SUSHI DUMP: request: %s \n\n response: %s", payload, response.content ) return response.content
[docs]def get_report(*args, **kwargs): """Get a usage report from a SUSHI server. returns a :class:`pycounter.report.CounterReport` object. parameters: see get_sushi_stats_raw :param no_delay: don't delay in retrying Report Queued """ if kwargs.get("release") == 5: gssr = sushi5.get_sushi_stats_raw rtf = sushi5.raw_to_full else: gssr = get_sushi_stats_raw rtf = raw_to_full if "api_key" in kwargs: warnings.warn( pycounter.exceptions.SushiWarning("api_key only supported in COUNTER 5") ) kwargs.pop("api_key", None) no_delay = kwargs.pop("no_delay", False) delay_amount = 0 if no_delay else 60 while True: try: raw_report = gssr(*args, **kwargs) return rtf(raw_report) except pycounter.exceptions.ServiceBusyError: print("Service busy, retrying in %d seconds" % delay_amount) time.sleep(delay_amount)
def ns(namespace, name): """Convenience function to make a namespaced XML name. :param namespace: one of 'SOAP-ENV', 'sushi', 'sushicounter', 'counter' :param name: tag name within the given namespace """ return "{" + NS[namespace] + "}" + name def raw_to_full(raw_report): """Convert a raw report to CounterReport. :param raw_report: raw XML report :return: a :class:`pycounter.report.CounterReport` """ # pylint: disable=too-many-statements,too-many-branches,too-many-locals try: root = etree.fromstring(raw_report) except etree.XMLSyntaxError: logger.error("XML syntax error: %s", raw_report) raise pycounter.exceptions.SushiException( message="XML syntax error", raw=raw_report ) o_root = objectify.fromstring(raw_report) rep = None try: rep = o_root.Body[ns("sushicounter", "ReportResponse")] c_report = rep.Report[ns("counter", "Report")] except AttributeError: try: c_report = rep.Report[ns("counter", "Reports")].Report except AttributeError: if b"Report Queued" in raw_report: raise pycounter.exceptions.ServiceBusyError("Report Queued") else: logger.error("report not found in XML: %s", raw_report) raise pycounter.exceptions.SushiException( message="report not found in XML", raw=raw_report, xml=o_root ) logger.debug("COUNTER report: %s", etree.tostring(c_report)) start_date = datetime.datetime.strptime( root.find(".//%s" % ns("sushi", "Begin")).text, "%Y-%m-%d" ).date() end_date = datetime.datetime.strptime( root.find(".//%s" % ns("sushi", "End")).text, "%Y-%m-%d" ).date() report_data = {"period": (start_date, end_date)} rep_def = root.find(".//%s" % ns("sushi", "ReportDefinition")) report_data["report_version"] = int(rep_def.get("Release")) report_data["report_type"] = rep_def.get("Name") customer = root.find(".//%s" % ns("counter", "Customer")) try: report_data["customer"] = customer.find(".//%s" % ns("counter", "Name")).text except AttributeError: report_data["customer"] = "" try: inst_id = customer.find(".//%s" % ns("counter", "ID")).text except AttributeError: inst_id = u"" report_data["institutional_identifier"] = inst_id rep_root = root.find(".//%s" % ns("counter", "Report")) created_string = rep_root.get("Created") if created_string is not None: report_data["date_run"] = pendulum.parse(created_string) else: report_data["date_run"] = datetime.datetime.now() report = pycounter.report.CounterReport(**report_data) report.metric = pycounter.constants.METRICS.get(report_data["report_type"]) for item in c_report.Customer.ReportItems: try: publisher_name = item.ItemPublisher.text except AttributeError: publisher_name = "" title = item.ItemName.text platform = item.ItemPlatform.text eissn = issn = "" print_isbn = None online_isbn = None doi = "" prop_id = "" try: for identifier in item.ItemIdentifier: if identifier.Type == "Print_ISSN": issn = identifier.Value.text if issn is None: issn = "" elif identifier.Type == "Online_ISSN": eissn = identifier.Value.text if eissn is None: eissn = "" elif identifier.Type == "Online_ISBN": online_isbn = identifier.Value.text elif identifier.Type == "Print_ISBN": print_isbn = identifier.Value.text elif identifier.Type == "DOI": doi = identifier.Value.text elif identifier.Type == "Proprietary": prop_id = identifier.Value.text except AttributeError: pass month_data = [] html_usage = 0 pdf_usage = 0 metrics_for_db = collections.OrderedDict() for perform_item in item.ItemPerformance: item_date = convert_date_run(perform_item.Period.Begin.text) logger.debug("perform_item date: %r", item_date) usage = None if hasattr(perform_item, "Instance"): for inst in perform_item.Instance: if inst.MetricType == "ft_total": usage = str(inst.Count) elif inst.MetricType == "ft_pdf": pdf_usage += int(inst.Count) elif inst.MetricType == "ft_html": html_usage += int(inst.Count) elif report.report_type.startswith("DB") or report.report_type in ( "PR1", "JR2", "BR3", ): metrics_for_db.setdefault(inst.MetricType, []).append( (item_date, int(inst.Count)) ) if usage is not None: month_data.append((item_date, int(usage))) if report.report_type: if report.report_type == "JR1": report.pubs.append( pycounter.report.CounterJournal( title=title, platform=platform, publisher=publisher_name, period=report.period, metric=report.metric, issn=issn, eissn=eissn, doi=doi, proprietary_id=prop_id, month_data=month_data, html_total=html_usage, pdf_total=pdf_usage, ) ) elif report.report_type == "BR3": for metric_code, month_data in six.iteritems(metrics_for_db): metric = pycounter.constants.DB_METRIC_MAP[metric_code] report.pubs.append( pycounter.report.CounterBook( title=title, platform=platform, publisher=publisher_name, period=report.period, metric=metric, issn=issn, print_isbn=print_isbn, online_isbn=online_isbn, doi=doi, proprietary_id=prop_id, month_data=month_data, ) ) elif report.report_type.startswith("BR"): # BR1, BR2 report.pubs.append( pycounter.report.CounterBook( title=title, platform=platform, publisher=publisher_name, period=report.period, metric=report.metric, issn=issn, doi=doi, proprietary_id=prop_id, print_isbn=print_isbn, online_isbn=online_isbn, month_data=month_data, ) ) elif report.report_type.startswith("DB"): for metric_code, month_data in six.iteritems(metrics_for_db): metric = pycounter.constants.DB_METRIC_MAP[metric_code] report.pubs.append( pycounter.report.CounterDatabase( title=title, platform=platform, publisher=publisher_name, period=report.period, metric=metric, month_data=month_data, ) ) elif report.report_type == "PR1": for metric_code, month_data in six.iteritems(metrics_for_db): metric = pycounter.constants.DB_METRIC_MAP[metric_code] report.pubs.append( pycounter.report.CounterPlatform( platform=platform, publisher=publisher_name, period=report.period, metric=metric, month_data=month_data, ) ) elif report.report_type == "JR2": for metric_code, month_data in six.iteritems(metrics_for_db): metric = pycounter.constants.DB_METRIC_MAP[metric_code] report.pubs.append( pycounter.report.CounterJournal( title=title, platform=platform, publisher=publisher_name, period=report.period, metric=metric, issn=issn, eissn=eissn, doi=doi, proprietary_id=prop_id, month_data=month_data, ) ) return report