Skip to content

Flask graphing app

[project]
name = "tv-dashboard-polars"
version = "0.1.0"
requires-python = ">=3.11"
dependencies = [
"dash>=2.14.0",
"polars>=0.20.0",
"adbc-driver-postgresql>=0.8.0",
"adbc-driver-manager>=0.8.0",
"numpy>=1.24.0"
]
FROM python:3.11-slim-bookworm
COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
RUN apt-get update && apt-get install -y \
libpq5 \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY pyproject.toml .
# Install dependencies into the system environment for simplicity in Docker
RUN uv pip install --system -r pyproject.toml
COPY . .
# Run seeder, then start the dashboard
CMD python seed_data.py && python app.py
services:
db:
image: postgres:15-alpine
environment:
POSTGRES_USER: user
POSTGRES_PASSWORD: password
POSTGRES_DB: newsdb
ports:
- "5432:5432"
volumes:
- pgdata:/var/lib/postgresql/data
web:
build: .
ports:
- "5001:5000"
environment:
- DATABASE_URL=postgresql://user:password@db:5432/newsdb
depends_on:
- db
volumes:
pgdata:
import os
import polars as pl
from dash import Dash, html, dcc, Input, Output, callback, dash_table
import plotly.graph_objects as go
DB_URL = os.getenv("DATABASE_URL", "postgresql://user:password@db:5432/newsdb")
app = Dash(__name__)
server = app.server
app.layout = html.Div(style={'backgroundColor': '#f4f7f9', 'padding': '20px', 'fontFamily': 'sans-serif'}, children=[
html.H2("Media Coverage Intelligence", style={'textAlign': 'center', 'color': '#2c3e50'}),
# Summary Stats
html.Div(id='stats-container', style={'marginBottom': '20px'}),
# Control Panel
html.Div(style={'display': 'flex', 'gap': '20px', 'padding': '20px', 'backgroundColor': 'white', 'borderRadius': '8px', 'boxShadow': '0 2px 4px rgba(0,0,0,0.1)'}, children=[
html.Div(style={'flex': '1'}, children=[
html.Label("1. Analyze Window:", style={'fontWeight': 'bold'}),
dcc.DatePickerRange(id='date-range', start_date='2022-02-24', end_date='2022-03-15')
]),
html.Div(style={'flex': '1'}, children=[
html.Label("2. Smoothing (Rolling Average):", style={'fontWeight': 'bold'}),
dcc.Slider(
id='smooth-slider',
min=0, max=48, step=4, value=8,
marks={0: 'None', 8: '4h', 24: '12h', 48: '24h'}
)
])
]),
dcc.Graph(id='chart-main', style={'marginTop': '20px'})
])
@callback(
[Output('chart-main', 'figure'), Output('stats-container', 'children')],
[Input('date-range', 'start_date'), Input('date-range', 'end_date'), Input('smooth-slider', 'value')]
)
def update_dashboard(start, end, smooth_window):
# SQL Aggregation (30m buckets)
query = f"""
SELECT
date_bin('30 minutes', timestamp, TIMESTAMP '2022-01-01') AS time_bucket,
network,
COUNT(*) as count
FROM raw_mentions
WHERE topic = 'Ukraine' AND timestamp BETWEEN '{start}' AND '{end} 23:59:59'
GROUP BY 1, 2
ORDER BY 1 ASC
"""
try:
df = pl.read_database_uri(query, DB_URL, engine="adbc")
except:
return go.Figure(), "Waiting for database..."
if df.is_empty():
return go.Figure(), "No data found."
# --- SUMMARY STATS ---
summary = df.group_by("network").agg([
pl.sum("count").alias("Total"),
pl.max("count").alias("Peak")
]).to_dicts()
stats_table = dash_table.DataTable(
data=summary, columns=[{"name": i, "id": i} for i in ["network", "Total", "Peak"]],
style_cell={'textAlign': 'center', 'padding': '5px'}
)
# --- CHART LOGIC ---
fig = go.Figure()
colors = {'CNN': '#3b5b92', 'FOXNEWS': '#d95f52', 'MSNBC': '#4dbd9c'}
for net in ['CNN', 'FOXNEWS', 'MSNBC']:
net_df = df.filter(pl.col("network") == net).sort("time_bucket")
# Original Raw Data
y_val = net_df['count']
opacity = 0.3 if smooth_window > 0 else 1.0
# Add the raw spikes (as a faded area if smoothing is on)
fig.add_trace(go.Scatter(
x=net_df['time_bucket'], y=y_val, name=f"{net} (Raw)",
line=dict(width=1, color=colors[net]),
opacity=opacity, showlegend=(smooth_window == 0)
))
# POLARS MAGIC: Add Moving Average Line
if smooth_window > 0:
# rolling_mean needs a window size (integer of rows)
ma_val = net_df.select(
pl.col("count").rolling_mean(window_size=smooth_window, center=True)
).to_series()
fig.add_trace(go.Scatter(
x=net_df['time_bucket'], y=ma_val, name=f"{net} Trend",
line=dict(width=4, color=colors[net]),
mode='lines'
))
fig.update_layout(
hovermode="x unified",
plot_bgcolor='white',
xaxis=dict(rangeslider=dict(visible=True)),
yaxis=dict(title="Mentions per 30m"),
legend=dict(orientation="h", y=1.1)
)
return fig, stats_table
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)
import os, time, numpy as np, polars as pl, psycopg
from datetime import datetime, timedelta
DB_URL = os.getenv("DATABASE_URL", "postgresql://user:password@db:5432/newsdb")
def seed():
while True:
try:
with psycopg.connect(DB_URL, autocommit=True) as conn:
with conn.cursor() as cur:
cur.execute("CREATE TABLE IF NOT EXISTS raw_mentions (timestamp TIMESTAMP, network TEXT, topic TEXT)")
# SPEED BOOST: Indexing the timestamp and topic
cur.execute("CREATE INDEX IF NOT EXISTS idx_timestamp ON raw_mentions (timestamp)")
cur.execute("CREATE INDEX IF NOT EXISTS idx_topic_net ON raw_mentions (topic, network)")
print("Database and Indexes ready.")
break
except:
print("Waiting for Postgres...")
time.sleep(2)
# (Same data generation logic as before...)
start = datetime(2022, 1, 1)
networks, topics = ['CNN', 'FOXNEWS', 'MSNBC'], ['Ukraine', 'Russia']
records = []
for topic in topics:
for net in networks:
x = np.linspace(0, 100, 2160)
spike = 20 * np.exp(-0.5 * ((x - 60) / 5)**2) + np.random.normal(2, 0.5, 2160)
for i, count in enumerate(spike):
if count <= 0: continue
base_time = start + timedelta(hours=i)
for _ in range(int(count)):
records.append((base_time + timedelta(seconds=np.random.randint(0, 3600)), net, topic))
df = pl.DataFrame(records, schema=["timestamp", "network", "topic"])
df.write_database("raw_mentions", DB_URL, if_table_exists="append", engine="adbc")
print("Seeding complete.")
if __name__ == "__main__":
seed()