Source code for

import re
from datetime import datetime
from lxml import etree
import attr
import requests
import parsedatetime
from import tzoffset
from pytz import UTC
from .._common import _make_int

__all__ = [

AJAX_USERSEARCH_URL = FORUM_URL + "/ajax.php?do=usersearch"

class AmbiguousUserNameError(Exception):
    """Exception when username is not unique, there are more starting with the same."""

class UserNotFoundError(Exception):
    """User cannot be found."""

class _ExtraUser:
    id = attr.ib()
    name = attr.ib()

def search_userid(username):
    headers = {
        "X-Requested-With": "XMLHttpRequest",
        "Origin": FORUM_URL,
        "Referer": FORUM_URL + "/search.php",

    data = {"securitytoken": "guest", "do": "usersearch", "fragment": username}

    response =, data, headers=headers)
    root = etree.fromstring(response.content)

        found_name = root[0].text
    except IndexError:
        raise UserNotFoundError(username)

    # The request is basically a search, can return multiple userids
    # for users starting with username. Make sure we got the right one!
    if found_name.upper() != username.upper():
        exc = AmbiguousUserNameError(username)
        # attach the extra users to the exception
        exc.users = tuple(
            _ExtraUser(name=child.text, id=child.attrib["userid"]) for child in root
        )  # noqa
        raise exc

    return root[0].attrib["userid"]

[docs]class ForumMember: """Download and store a member data from the Two Plus Two forum.""" _tz_re = re.compile("GMT (.*?)\.") _attributes = ( ("username", '//td[@id="username_box"]/h1/text()', str), ("rank", '//td[@id="username_box"]/h2/text()', str), ("profile_picture", '//td[@id="profilepic_cell"]/img/@src', str), ("location", '//div[@id="collapseobj_aboutme"]/div/ul/li/dl/dd[1]/text()', str), ( "total_posts", '//div[@id="collapseobj_stats"]/div/fieldset[1]/ul/li[1]/text()', _make_int, ), # noqa ( "posts_per_day", '//div[@id="collapseobj_stats"]/div/fieldset[1]/ul/li[2]/text()', float, ), ("public_usergroups", '//ul[@id="public_usergroup_list"]/li/text()', tuple), ("avatar", '//img[@id="user_avatar"]/@src', str), ) def __init__(self, username): = search_userid(username) self._download_and_parse() def __repr__(self): return f"<{self.__class__.__name__}: {self.username}>" @classmethod def from_userid(cls, userid: str): self = super().__new__(cls) = userid self._download_and_parse() return self def _download_and_parse(self): root = self._download_page() self._parse_attributes(root) tz = self._get_timezone(root) self._parse_last_activity(root, tz) self._parse_join_date(root) @property def profile_url(self): return f"{FORUM_MEMBER_URL}/{}/" def _download_page(self): stats_page = requests.get(self.profile_url) self.download_date = return etree.HTML(stats_page.text) def _parse_attributes(self, root): for attname, xpath, type_ in self._attributes: if type_ != tuple: try: setattr(self, attname, type_(root.xpath(xpath)[0])) except IndexError: setattr(self, attname, None) else: setattr(self, attname, type_(root.xpath(xpath))) def _get_timezone(self, root): """Find timezone informatation on bottom of the page.""" tz_str = root.xpath('//div[@class="smallfont" and @align="center"]')[0].text hours = int( return tzoffset(tz_str, hours * 60) def _parse_last_activity(self, root, tz): try: li = root.xpath('//div[@id="collapseobj_stats"]/div/fieldset[2]/ul/li[1]')[ 0 ] date_str = li[0].tail.strip() time_str = li[1].text.strip() self.last_activity = self._parse_date(date_str + " " + time_str, tz) except IndexError: self.last_activity = None def _parse_join_date(self, root): ul = root.xpath('//div[@id="collapseobj_stats"]/div/fieldset[2]/ul')[0] try: join_date = ul.xpath("li[2]/text()")[0] except IndexError: # not everybody has a last activity field. # in this case, it's the first li element, not the second join_date = ul.xpath("li[1]/text()")[0] join_date = join_date.strip() self.join_date = datetime.strptime(join_date, "%m-%d-%Y").date() @staticmethod def _parse_date(date_str, tz): try: dt = datetime.strptime(date_str.strip(), "%m-%d-%Y %I:%M %p") return dt.replace(tzinfo=tz).astimezone(UTC) except ValueError: # in case like "Yesterday 3:30 PM" or dates like that. # calculates based on sourceTime. tz is 2p2 forum timezone source = dt, pt = parsedatetime.Calendar().parseDT( date_str, tzinfo=tz, sourceTime=source ) # parsed as a C{datetime}, means that parsing was successful if pt == 3: return dt.astimezone(UTC) raise ValueError(f"Could not parse date: {date_str}")