Build a Custom Indicator

Create a fully custom technical indicator from scratch in pandas — vectorised, compatible with TA-Lib and pandas-ta pipelines.

custom indicatorpandasvectorized

Custom Indicator Framework

This notebook defines a standardized protocol for building custom technical indicators from raw OHLCV data without relying on external libraries. Three custom indicators are implemented: VWAP, a composite momentum score, and a price efficiency ratio.

1. Dependency Installation

[1]

!pip install pandas numpy plotly

Requirement already satisfied: pandas in /usr/local/lib/python3.12/dist-packages (2.2.2)
Requirement already satisfied: numpy in /usr/local/lib/python3.12/dist-packages (2.0.2)
Requirement already satisfied: plotly in /usr/local/lib/python3.12/dist-packages (5.24.1)
Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/dist-packages (from pandas) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas) (2025.2)
Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas) (2026.1)
Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.12/dist-packages (from plotly) (9.1.4)
Requirement already satisfied: packaging in /usr/local/lib/python3.12/dist-packages (from plotly) (26.1)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)

2. Library Imports

[2]

import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

3. Large Dataset Generation

[3]

def generate_larger_data(periods: int) -> pd.DataFrame:
    """Generates a larger synthetic OHLCV dataset with more realistic price fluctuations."""
    start_date = pd.to_datetime("2024-01-01 00:00:00+00:00")
    datetime_index = pd.date_range(start_date, periods=periods, freq="1min", tz="UTC")

    price_data = []
    last_close = 42000 # Starting price
    volatility_scale = 0.005 # Controls the general magnitude of price changes
    wick_deviation_scale = 0.002 # Controls how much wicks extend beyond body

    for i in range(periods):
        # Open price drifts slightly from the previous close
        open_price = last_close + np.random.normal(0, last_close * volatility_scale * 0.1)

        # Simulate a price change to determine the closing price
        price_change = np.random.normal(0, last_close * volatility_scale)
        close_price = open_price + price_change

        # Determine the high and low of the candle body
        body_high = max(open_price, close_price)
        body_low = min(open_price, close_price)

        # Simulate wicks extending beyond the body
        # High wick should be above the body_high
        high_wick_extension = np.abs(np.random.normal(0, last_close * wick_deviation_scale))
        high_price = body_high + high_wick_extension

        # Low wick should be below the body_low
        low_wick_extension = np.abs(np.random.normal(0, last_close * wick_deviation_scale))
        low_price = body_low - low_wick_extension

        # Ensure OHLC integrity: High must be the absolute highest, Low the absolute lowest
        high_price = max(high_price, open_price, close_price)
        low_price = min(low_price, open_price, close_price)

        # Ensure High is never less than Low
        if high_price < low_price:
            high_price, low_price = low_price, high_price # Swap if somehow invalid

        # Ensure all values are positive integers
        open_price = max(1, int(open_price))
        high_price = max(1, int(high_price))
        low_price = max(1, int(low_price))
        close_price = max(1, int(close_price))

        price_data.append({
            "open": open_price,
            "high": high_price,
            "low": low_price,
            "close": close_price
        })
        last_close = close_price # Update last_close for the next iteration

    df_large_raw = pd.DataFrame(price_data, index=datetime_index)
    df_large_raw.index.name = "datetime"

    # Simulate volume with some fluctuation
    df_large_raw["volume"] = np.random.uniform(100.0, 500.0, periods)

    return df_large_raw

periods = 500 # Generate 500 data points
df_large = generate_larger_data(periods)
# Convert datetime index to a regular column and reset index for easier use
df_large = df_large.reset_index()

df = df_large.copy()

print("--- Large Dataset Sample ---")
display(df.head())
df.info()

--- Large Dataset Sample ---

	datetime	open	high	low	close	volume
0	2024-01-01 00:00:00+00:00	42002	42078	41822	41825	454.826067
1	2024-01-01 00:01:00+00:00	41809	42284	41782	42207	283.640710
2	2024-01-01 00:02:00+00:00	42222	42372	42213	42226	136.044669
3	2024-01-01 00:03:00+00:00	42229	42756	42223	42728	321.503868
4	2024-01-01 00:04:00+00:00	42688	42941	42616	42924	101.967045

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype              
---  ------    --------------  -----              
 0   datetime  500 non-null    datetime64[ns, UTC]
 1   open      500 non-null    int64              
 2   high      500 non-null    int64              
 3   low       500 non-null    int64              
 4   close     500 non-null    int64              
 5   volume    500 non-null    float64            
dtypes: datetime64[ns, UTC](1), float64(1), int64(4)
memory usage: 23.6 KB

4. Custom Indicators Function

[4]

def compute_custom_indicators(df: pd.DataFrame, window: int = 10) -> pd.DataFrame:
    """
    Compute three custom indicators: VWAP, Composite Momentum Score,
    and Price Efficiency Ratio.

    Args:
        df     (pd.DataFrame): Cleaned OHLCV DataFrame with UTC datetime column.
        window (int):          Rolling window for momentum and efficiency calculations.

    Returns:
        pd.DataFrame: Input DataFrame extended with custom indicator columns.
    """
    df = df.copy().sort_values("datetime", ignore_index=True)

    # --- Typical Price ---
    # The average of high, low, and close for each candle.
    # Represents a single representative price for the period
    # more fairly than close alone, as it incorporates the full range.
    df["typical_price"] = (df["high"] + df["low"] + df["close"]) / 3

    # --- VWAP (Volume Weighted Average Price) ---
    # VWAP is the average price weighted by volume. A price above VWAP
    # means the asset is trading above the average price at which most
    # volume was transacted — generally considered bullish intraday.
    # Below VWAP is considered bearish. Institutions use VWAP as a
    # benchmark: buying below VWAP is considered a good fill.
    df["cum_tp_vol"]    = (df["typical_price"] * df["volume"]).cumsum()
    df["cum_vol"]       = df["volume"].cumsum()
    df["vwap"]          = df["cum_tp_vol"] / df["cum_vol"]
    df["vwap_distance"] = (df["close"] - df["vwap"]) / df["vwap"]
    df = df.drop(columns=["cum_tp_vol", "cum_vol"])

    # --- Composite Momentum Score ---
    # Combines three momentum signals into a single normalized score:
    # 1. Rate of change: how much the close has moved over N candles.
    # 2. Close position within the high-low range (0 = at low, 1 = at high).
    # 3. Volume trend: whether recent volume is above or below its average.
    # Each component is normalized to 0–1 and averaged. A score near 1
    # indicates strong bullish momentum across all three dimensions;
    # near 0 indicates strong bearish momentum.
    roc         = df["close"].pct_change(window)
    close_pos   = (df["close"] - df["low"]) / (df["high"] - df["low"]).replace(0, np.nan)
    vol_ratio   = df["volume"] / df["volume"].rolling(window).mean()

    roc_norm    = (roc   - roc.min())   / (roc.max()   - roc.min() + 1e-9)
    vol_norm    = (vol_ratio - vol_ratio.min()) / (vol_ratio.max() - vol_ratio.min() + 1e-9)

    df["momentum_score"] = (roc_norm + close_pos + vol_norm) / 3

    # --- Price Efficiency Ratio ---
    # Measures how directly the price moved over N candles relative to
    # the total path it travelled. A ratio near 1 means the price moved
    # in a straight line — a strongly trending, efficient move. A ratio
    # near 0 means the price moved a lot but ended up close to where it
    # started — a noisy, choppy, inefficient move. Used to distinguish
    # trending conditions from ranging conditions.
    net_move    = (df["close"] - df["close"].shift(window)).abs()
    total_path  = df["close"].diff().abs().rolling(window).sum()
    df["efficiency_ratio"] = net_move / total_path.replace(0, np.nan)

    return df[[
        "datetime", "open", "high", "low", "close", "volume",
        "typical_price", "vwap", "vwap_distance",
        "momentum_score", "efficiency_ratio",
    ]]

df_indicators_large = compute_custom_indicators(df, window=10)

print("--- Custom Indicators Output (First 5 rows) ---")
display(df_indicators_large.head())
df_indicators_large.info()

--- Custom Indicators Output (First 5 rows) ---

	datetime	open	high	low	close	volume	typical_price	vwap	vwap_distance	momentum_score	efficiency_ratio
0	2024-01-01 00:00:00+00:00	42002	42078	41822	41825	454.826067	41908.333333	41908.333333	-0.001988	NaN	NaN
1	2024-01-01 00:01:00+00:00	41809	42284	41782	42207	283.640710	42091.000000	41978.494517	0.005443	NaN	NaN
2	2024-01-01 00:02:00+00:00	42222	42372	42213	42226	136.044669	42270.333333	42023.894855	0.004809	NaN	NaN
3	2024-01-01 00:03:00+00:00	42229	42756	42223	42728	321.503868	42569.000000	42170.425932	0.013222	NaN	NaN
4	2024-01-01 00:04:00+00:00	42688	42941	42616	42924	101.967045	42827.000000	42222.005152	0.016626	NaN	NaN

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype              
---  ------            --------------  -----              
 0   datetime          500 non-null    datetime64[ns, UTC]
 1   open              500 non-null    int64              
 2   high              500 non-null    int64              
 3   low               500 non-null    int64              
 4   close             500 non-null    int64              
 5   volume            500 non-null    float64            
 6   typical_price     500 non-null    float64            
 7   vwap              500 non-null    float64            
 8   vwap_distance     500 non-null    float64            
 9   momentum_score    490 non-null    float64            
 10  efficiency_ratio  490 non-null    float64            
dtypes: datetime64[ns, UTC](1), float64(6), int64(4)
memory usage: 43.1 KB

5. RSI Calculation and Plotting

[5]

def calculate_rsi(df, window=14):
    delta = df['close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()

    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

df_indicators_large['rsi_14'] = calculate_rsi(df_indicators_large, window=14)

fig = make_subplots(
    rows=2, cols=1,
    shared_xaxes=True,
    vertical_spacing=0.03,
    row_heights=[0.7, 0.3]
)

# Candlestick chart
fig.add_trace(go.Candlestick(
    x=df_indicators_large["datetime"],
    open=df_indicators_large['open'],
    high=df_indicators_large['high'],
    low=df_indicators_large['low'],
    close=df_indicators_large['close'],
    name='Price'
), row=1, col=1)

# RSI chart
fig.add_trace(go.Scatter(
    x=df_indicators_large["datetime"],
    y=df_indicators_large['rsi_14'],
    mode='lines',
    name='RSI 14',
    line=dict(color='purple', width=1)
), row=2, col=1)

# Add RSI overbought/oversold levels
fig.add_hline(y=70, line_dash="dot", line_color="red", row=2, col=1)
fig.add_hline(y=30, line_dash="dot", line_color="green", row=2, col=1)

fig.update_layout(
    title_text='Candlestick Chart with RSI',
    xaxis_rangeslider_visible=False,
    xaxis_title='Date',
    yaxis_title='Price',
    height=800,
    yaxis=dict(autorange=True) # Ensure y-axis scales to visible candles for candlestick
)

fig.update_yaxes(title_text="RSI", row=2, col=1)

fig.show()

[ ]