"""unsprawl.providers.data.us.ca.sf.datasf.
DataSFProvider (US-CA-SF) -- placeholder provider for housing transactions.
This is a *dumb fetcher*:
- handles HTTP/auth/caching
- returns raw tabular data
- does NOT create ``Asset`` objects (adapter layer does that)
TODO(hackathon): For the demo, we can start with a single public dataset endpoint
from data.sfgov.org and a minimal schema.
"""
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
import pandas as pd
[docs]
def _default_data_root() -> Path:
return Path("~/.unsprawl/data").expanduser()
[docs]
@dataclass(frozen=True)
class DataSFProvider:
"""Provider for San Francisco open data (housing transactions)."""
data_root: Path = _default_data_root()
@property
def transactions_path(self) -> Path:
"""Default cache location for housing transactions."""
return self.data_root / "us" / "ca" / "sf" / "housing" / "transactions.csv"
[docs]
def fetch_housing_transactions(self, *, force: bool = False) -> pd.DataFrame:
"""Fetch housing transaction records as a DataFrame.
Notes
-----
This is currently a stub to establish the interface.
The first implementation should:
- fetch from data.sfgov.org (Socrata) or another SF open data source
- cache to `transactions_path`
- provide an offline fallback for CI
"""
if self.transactions_path.exists() and not force:
return pd.read_csv(self.transactions_path)
# TODO(hackathon): implement network fetch + synthetic fallback.
self.transactions_path.parent.mkdir(parents=True, exist_ok=True)
self.transactions_path.write_text("", encoding="utf-8")
return pd.DataFrame()