Source code for unsprawl.providers.data.us.ca.sf.datasf

"""unsprawl.providers.data.us.ca.sf.datasf.

DataSFProvider (US-CA-SF) -- placeholder provider for housing transactions.

This is a *dumb fetcher*:
- handles HTTP/auth/caching
- returns raw tabular data
- does NOT create ``Asset`` objects (adapter layer does that)

TODO(hackathon): For the demo, we can start with a single public dataset endpoint
from data.sfgov.org and a minimal schema.
"""

from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path

import pandas as pd


[docs] def _default_data_root() -> Path: return Path("~/.unsprawl/data").expanduser()
[docs] @dataclass(frozen=True) class DataSFProvider: """Provider for San Francisco open data (housing transactions).""" data_root: Path = _default_data_root() @property def transactions_path(self) -> Path: """Default cache location for housing transactions.""" return self.data_root / "us" / "ca" / "sf" / "housing" / "transactions.csv"
[docs] def fetch_housing_transactions(self, *, force: bool = False) -> pd.DataFrame: """Fetch housing transaction records as a DataFrame. Notes ----- This is currently a stub to establish the interface. The first implementation should: - fetch from data.sfgov.org (Socrata) or another SF open data source - cache to `transactions_path` - provide an offline fallback for CI """ if self.transactions_path.exists() and not force: return pd.read_csv(self.transactions_path) # TODO(hackathon): implement network fetch + synthetic fallback. self.transactions_path.parent.mkdir(parents=True, exist_ok=True) self.transactions_path.write_text("", encoding="utf-8") return pd.DataFrame()