AlphaNova
1

amonRa_final.py

AmonRa's avatarAmonRa
4h ago

/// script

requires-python = ">=3.8"

dependencies = [

"numpy>=1.19.0",

"pandas>=1.2.0",

"scikit-learn>=0.24.0",

]

///

import numpy as np import pandas as pd from sklearn.linear_model import Ridge

The platform automatically injects 'Predictor' into the global namespace.

Subclassing it directly matches their pipeline rules.

class AmonRaPredictor(Predictor): """ Official Subclassed Signal Engine for AlphaNova Competition 5. """

def __init__(self):
    super().__init__()
    self.model = Ridge(alpha=100.0)
    self.feature_names = []
    self.is_trained = False

def engineer_features(self, features_df):
    """
    Safely processes MultiIndex columns (feature_name, ticker) 
    while preserving the historical time series rows.
    """
    df = features_df.copy()
    
    # Extract primary returns out of the top column index level
    if isinstance(df.columns, pd.MultiIndex):
        close_rets = df['Feature.1']
    else:
        close_rets = df

    # Diversify lookbacks to pass the 0.5 greedy correlation filter
    mom_short = close_rets.rolling(window=3, min_periods=1).mean()
    mom_long = close_rets.rolling(window=21, min_periods=1).mean()
    vol_risk = close_rets.rolling(window=15, min_periods=1).std().fillna(0.0)
    cs_rank = close_rets.rank(axis=1, pct=True)

    # Reshape wide data arrays to unstacked long series matrices
    features_dict = {
        'mom_short': mom_short.stack(dropna=False),
        'mom_long': mom_long.stack(dropna=False),
        'vol_risk': vol_risk.stack(dropna=False),
        'cs_rank': cs_rank.stack(dropna=False)
    }
    
    return pd.DataFrame(features_dict)

def train(self, features: pd.DataFrame, target: pd.Series) -> None:
    """
    Trains model cleanly using historical multi-horizon layouts.
    """
    if features is None or features.empty or target is None or target.empty:
        return

    # Build feature set and unstack target to align rows
    X_train = self.engineer_features(features)
    y_train = target.stack(dropna=False).fillna(0.0)

    # Core index row intersection protection
    common_idx = X_train.index.intersection(y_train.index)
    X_train = X_train.loc[common_idx]
    y_train = y_train.loc[common_idx]

    self.feature_names = X_train.columns.tolist()
    self.model.fit(X_train.values, y_train.values)
    self.is_trained = True

def predict(self, features: pd.DataFrame) -> pd.Series:
    """
    Generates and returns cross-sectionally de-meaned signals.
    """
    if features is None or features.empty:
        return pd.Series(dtype=np.float64)

    # Extract active tickers from the column level properties
    if isinstance(features.columns, pd.MultiIndex):
        tickers = features.columns.get_level_values('ticker').unique()
    else:
        tickers = features.columns

    # Transform features
    X_test = self.engineer_features(features)

    # Filter features to look only at the most recent snapshot row timestamp
    latest_timestamp = features.index[-1]
    X_test_latest = X_test.loc[latest_timestamp]

    if self.is_trained:
        raw_preds = self.model.predict(X_test_latest.values)
    else:
        raw_preds = X_test_latest['mom_short'].values + X_test_latest['cs_rank'].values

    # Build output series explicitly mapped to active tickers
    signal = pd.Series(raw_preds, index=tickers, dtype=np.float64)

    # MANDATORY COMPLIANCE STEP: Cross-Sectional De-Meaning (Forces sum to 0)
    signal = signal.sub(signal.mean())
    final_output = signal.fillna(0.0)

    return pd.Series(final_output, index=tickers, dtype=np.float64, name="signal")

if name == "main": pass

0 Replies

No replies yet. Be the first to reply!

Sign in to reply.