1
amonRa_final.py
/// script
requires-python = ">=3.8"
dependencies = [
"numpy>=1.19.0",
"pandas>=1.2.0",
"scikit-learn>=0.24.0",
]
///
import numpy as np import pandas as pd from sklearn.linear_model import Ridge
The platform automatically injects 'Predictor' into the global namespace.
Subclassing it directly matches their pipeline rules.
class AmonRaPredictor(Predictor): """ Official Subclassed Signal Engine for AlphaNova Competition 5. """
def __init__(self):
super().__init__()
self.model = Ridge(alpha=100.0)
self.feature_names = []
self.is_trained = False
def engineer_features(self, features_df):
"""
Safely processes MultiIndex columns (feature_name, ticker)
while preserving the historical time series rows.
"""
df = features_df.copy()
# Extract primary returns out of the top column index level
if isinstance(df.columns, pd.MultiIndex):
close_rets = df['Feature.1']
else:
close_rets = df
# Diversify lookbacks to pass the 0.5 greedy correlation filter
mom_short = close_rets.rolling(window=3, min_periods=1).mean()
mom_long = close_rets.rolling(window=21, min_periods=1).mean()
vol_risk = close_rets.rolling(window=15, min_periods=1).std().fillna(0.0)
cs_rank = close_rets.rank(axis=1, pct=True)
# Reshape wide data arrays to unstacked long series matrices
features_dict = {
'mom_short': mom_short.stack(dropna=False),
'mom_long': mom_long.stack(dropna=False),
'vol_risk': vol_risk.stack(dropna=False),
'cs_rank': cs_rank.stack(dropna=False)
}
return pd.DataFrame(features_dict)
def train(self, features: pd.DataFrame, target: pd.Series) -> None:
"""
Trains model cleanly using historical multi-horizon layouts.
"""
if features is None or features.empty or target is None or target.empty:
return
# Build feature set and unstack target to align rows
X_train = self.engineer_features(features)
y_train = target.stack(dropna=False).fillna(0.0)
# Core index row intersection protection
common_idx = X_train.index.intersection(y_train.index)
X_train = X_train.loc[common_idx]
y_train = y_train.loc[common_idx]
self.feature_names = X_train.columns.tolist()
self.model.fit(X_train.values, y_train.values)
self.is_trained = True
def predict(self, features: pd.DataFrame) -> pd.Series:
"""
Generates and returns cross-sectionally de-meaned signals.
"""
if features is None or features.empty:
return pd.Series(dtype=np.float64)
# Extract active tickers from the column level properties
if isinstance(features.columns, pd.MultiIndex):
tickers = features.columns.get_level_values('ticker').unique()
else:
tickers = features.columns
# Transform features
X_test = self.engineer_features(features)
# Filter features to look only at the most recent snapshot row timestamp
latest_timestamp = features.index[-1]
X_test_latest = X_test.loc[latest_timestamp]
if self.is_trained:
raw_preds = self.model.predict(X_test_latest.values)
else:
raw_preds = X_test_latest['mom_short'].values + X_test_latest['cs_rank'].values
# Build output series explicitly mapped to active tickers
signal = pd.Series(raw_preds, index=tickers, dtype=np.float64)
# MANDATORY COMPLIANCE STEP: Cross-Sectional De-Meaning (Forces sum to 0)
signal = signal.sub(signal.mean())
final_output = signal.fillna(0.0)
return pd.Series(final_output, index=tickers, dtype=np.float64, name="signal")
if name == "main": pass
0 Replies
No replies yet. Be the first to reply!
Sign in to reply.