Initial clean commit
This commit is contained in:
commit
f984bc4cf8
14
.gitignore
vendored
Normal file
14
.gitignore
vendored
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
data/
|
||||||
|
*.parquet
|
||||||
|
*.xlsx
|
||||||
|
*.pdf
|
||||||
|
.venv/
|
||||||
|
node_modules/
|
||||||
|
.DS_Store
|
||||||
|
latex/*.pdf
|
||||||
|
*.log
|
||||||
|
data/
|
||||||
|
*.parquet
|
||||||
|
*.xlsx
|
||||||
|
latex/
|
||||||
|
latex/*.pdf
|
||||||
3
src/optimization/__init__.py
Normal file
3
src/optimization/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
from .model_builder import build_model, load_tables, solve_model
|
||||||
|
|
||||||
|
__all__ = ["build_model", "load_tables", "solve_model"]
|
||||||
BIN
src/optimization/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
src/optimization/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
src/optimization/__pycache__/model_builder.cpython-313.pyc
Normal file
BIN
src/optimization/__pycache__/model_builder.cpython-313.pyc
Normal file
Binary file not shown.
BIN
src/optimization/__pycache__/run_optimization.cpython-313.pyc
Normal file
BIN
src/optimization/__pycache__/run_optimization.cpython-313.pyc
Normal file
Binary file not shown.
1479
src/optimization/model_builder.py
Normal file
1479
src/optimization/model_builder.py
Normal file
File diff suppressed because it is too large
Load Diff
731
src/optimization/run_optimization.py
Normal file
731
src/optimization/run_optimization.py
Normal file
@ -0,0 +1,731 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import pyomo.environ as pyo
|
||||||
|
from pyomo.environ import value
|
||||||
|
|
||||||
|
SRC_ROOT = Path(__file__).resolve().parents[1]
|
||||||
|
if str(SRC_ROOT) not in sys.path:
|
||||||
|
sys.path.insert(0, str(SRC_ROOT))
|
||||||
|
|
||||||
|
from optimization.model_builder import build_model, load_tables, solve_model
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def report_results(model: pyo.ConcreteModel, max_rows: int) -> None:
|
||||||
|
print("Objective value:", value(model.obj))
|
||||||
|
print("Non-zero production decisions (k):")
|
||||||
|
printed = 0
|
||||||
|
for i in model.I:
|
||||||
|
for j in model.J:
|
||||||
|
for w in model.W:
|
||||||
|
for d in model.D:
|
||||||
|
for s in model.S:
|
||||||
|
qty = value(model.k[i, j, w, d, s])
|
||||||
|
if qty > 1e-6:
|
||||||
|
print(f" {i} -> {j} (W{w} D{d} S{s}): {qty:.0f}")
|
||||||
|
printed += 1
|
||||||
|
if printed >= max_rows:
|
||||||
|
print(" ... output truncated ...")
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def export_results(model: pyo.ConcreteModel, output_path: Path) -> None:
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
wd_to_date = getattr(model, "_wd_to_date", {})
|
||||||
|
|
||||||
|
def safe_value(var) -> float:
|
||||||
|
val = pyo.value(var, exception=False)
|
||||||
|
return float(val) if val is not None else 0.0
|
||||||
|
|
||||||
|
def autosize_worksheet(ws, df, index_cols=None, max_width=25):
|
||||||
|
if index_cols is None:
|
||||||
|
index_cols = list(df.index.names)
|
||||||
|
idx_names = list(index_cols)
|
||||||
|
col_widths = [max(10, max(len(str(n)) for n in idx_names if n is not None) + 2) if idx_names else 10]
|
||||||
|
for col_idx, col in enumerate(df.columns):
|
||||||
|
header = " / ".join([str(c) for c in col]) if isinstance(col, tuple) else str(col)
|
||||||
|
max_len = max(len(header), 8)
|
||||||
|
sample = df.iloc[:200, col_idx]
|
||||||
|
max_len = max(max_len, sample.astype(str).str.len().max())
|
||||||
|
col_widths.append(min(max_width, int(max_len) + 2))
|
||||||
|
return col_widths
|
||||||
|
|
||||||
|
def adjust_widths_for_labels(df, widths, label_scale, index_scale=None):
|
||||||
|
adjusted = widths[:]
|
||||||
|
if index_scale is not None and adjusted:
|
||||||
|
adjusted[0] = max(10, int(adjusted[0] * index_scale))
|
||||||
|
if hasattr(df.columns, "get_level_values"):
|
||||||
|
top_level = df.columns.get_level_values(0)
|
||||||
|
for idx, label in enumerate(top_level, start=1):
|
||||||
|
if label in label_scale:
|
||||||
|
adjusted[idx] = max(6, int(adjusted[idx] * label_scale[label]))
|
||||||
|
return adjusted
|
||||||
|
lieferungen_schicht = []
|
||||||
|
for j in model.J:
|
||||||
|
for w in model.W:
|
||||||
|
for d in model.D:
|
||||||
|
for s in model.S:
|
||||||
|
if j == "V":
|
||||||
|
nachfrage = pyo.value(model.dV_N[w, d] + model.dV_W[w, d])
|
||||||
|
else:
|
||||||
|
nachfrage = pyo.value(model.d[j, w, d])
|
||||||
|
use_bunker_out = hasattr(model, "bunker_out") and j in getattr(model, "J_BUNKER", [])
|
||||||
|
delivery_sum = sum(safe_value(model.x[i, j, w, d, s]) for i in model.I)
|
||||||
|
out_sum = (
|
||||||
|
sum(safe_value(model.bunker_out[i, j, w, d, s]) for i in model.I)
|
||||||
|
if use_bunker_out
|
||||||
|
else delivery_sum
|
||||||
|
)
|
||||||
|
bunker_inflow = 0.0
|
||||||
|
if use_bunker_out:
|
||||||
|
x_sum = sum(safe_value(model.x[i, j, w, d, s]) for i in model.I)
|
||||||
|
out_sum = sum(safe_value(model.bunker_out[i, j, w, d, s]) for i in model.I)
|
||||||
|
bunker_inflow = round(x_sum - out_sum, 2)
|
||||||
|
def flow_val(i_name: str) -> float:
|
||||||
|
return safe_value(model.x[i_name, j, w, d, s])
|
||||||
|
lieferungen_schicht.append(
|
||||||
|
{
|
||||||
|
"kraftwerk": j,
|
||||||
|
"woche": w,
|
||||||
|
"tag": d,
|
||||||
|
"datum": wd_to_date.get((w, d)),
|
||||||
|
"schicht": s,
|
||||||
|
"nachfrage_tonnen": nachfrage,
|
||||||
|
"lieferung_tonnen": delivery_sum,
|
||||||
|
"lieferungsabweichung_tonnen": round(delivery_sum - nachfrage, 2),
|
||||||
|
"bunkerzufluss_tonnen": bunker_inflow,
|
||||||
|
"Nochten": flow_val("Nochten"),
|
||||||
|
"Reichwalde": flow_val("Reichwalde"),
|
||||||
|
"Welzow": flow_val("Welzow"),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
order_k_pw = ["J", "SP", "B3", "B4"]
|
||||||
|
order_k_v = ["V"]
|
||||||
|
order_sources = ["Reichwalde", "Nochten", "Welzow"]
|
||||||
|
order_s = ["F", "S", "N"]
|
||||||
|
|
||||||
|
df_raw = pd.DataFrame(lieferungen_schicht).copy()
|
||||||
|
df_raw["datum"] = pd.to_datetime(df_raw["datum"])
|
||||||
|
|
||||||
|
v_demand_map = {
|
||||||
|
(int(w), d): {
|
||||||
|
"welzow": float(pyo.value(model.dV_W[w, d])),
|
||||||
|
"nochten": float(pyo.value(model.dV_N[w, d])),
|
||||||
|
}
|
||||||
|
for w in model.W
|
||||||
|
for d in model.D
|
||||||
|
}
|
||||||
|
|
||||||
|
df_raw["nachfrage_welzow"] = df_raw.get("nachfrage_welzow", pd.Series(index=df_raw.index))
|
||||||
|
df_raw["nachfrage_nochten"] = df_raw.get("nachfrage_nochten", pd.Series(index=df_raw.index))
|
||||||
|
df_raw["nachfrage_welzow"] = df_raw.apply(
|
||||||
|
lambda r: (
|
||||||
|
r["nachfrage_welzow"]
|
||||||
|
if pd.notna(r["nachfrage_welzow"])
|
||||||
|
else v_demand_map.get((int(r["woche"]), r["tag"]), {}).get("welzow", 0)
|
||||||
|
),
|
||||||
|
axis=1,
|
||||||
|
)
|
||||||
|
df_raw["nachfrage_nochten"] = df_raw.apply(
|
||||||
|
lambda r: (
|
||||||
|
r["nachfrage_nochten"]
|
||||||
|
if pd.notna(r["nachfrage_nochten"])
|
||||||
|
else v_demand_map.get((int(r["woche"]), r["tag"]), {}).get("nochten", 0)
|
||||||
|
),
|
||||||
|
axis=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
df = df_raw.rename(columns={"lieferungen_tonnen": "lieferung_tonnen"}).copy()
|
||||||
|
if "lieferung_tonnen" not in df.columns:
|
||||||
|
df["lieferung_tonnen"] = df[order_sources].sum(axis=1)
|
||||||
|
|
||||||
|
present_sources = [c for c in order_sources if c in df.columns]
|
||||||
|
|
||||||
|
df_src = (
|
||||||
|
df.pivot_table(
|
||||||
|
index=["datum", "woche", "tag"],
|
||||||
|
columns=["kraftwerk", "schicht"],
|
||||||
|
values=present_sources,
|
||||||
|
aggfunc="sum",
|
||||||
|
)
|
||||||
|
.fillna(0)
|
||||||
|
)
|
||||||
|
df_src.columns = df_src.columns.reorder_levels([1, 0, 2])
|
||||||
|
df_src = df_src.reindex(
|
||||||
|
columns=pd.MultiIndex.from_product([order_k_pw + order_k_v, present_sources, order_s]),
|
||||||
|
fill_value=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
df_demand = (
|
||||||
|
df.groupby(["datum", "woche", "tag", "kraftwerk"])["nachfrage_tonnen"]
|
||||||
|
.first()
|
||||||
|
.unstack("kraftwerk")
|
||||||
|
.reindex(columns=order_k_pw + order_k_v, fill_value=0)
|
||||||
|
.reindex(df_src.index, fill_value=0)
|
||||||
|
)
|
||||||
|
|
||||||
|
df_v_demand_split = (
|
||||||
|
df[df["kraftwerk"] == "V"]
|
||||||
|
.groupby(["datum", "woche", "tag"])[["nachfrage_welzow", "nachfrage_nochten"]]
|
||||||
|
.first()
|
||||||
|
.reindex(df_src.index, fill_value=0)
|
||||||
|
)
|
||||||
|
|
||||||
|
totals_plain = df_src.T.groupby(level=0).sum().T
|
||||||
|
totals_plain = totals_plain.reindex(columns=order_k_pw + order_k_v, fill_value=0)
|
||||||
|
|
||||||
|
day_diff_rows = []
|
||||||
|
for w in model.W:
|
||||||
|
for d in model.D:
|
||||||
|
date = wd_to_date.get((w, d))
|
||||||
|
if date is None:
|
||||||
|
continue
|
||||||
|
row = {"datum": date, "woche": w, "tag": d}
|
||||||
|
for j in order_k_pw + order_k_v:
|
||||||
|
if j == "V":
|
||||||
|
demand = pyo.value(model.dV_N[w, d] + model.dV_W[w, d])
|
||||||
|
else:
|
||||||
|
demand = pyo.value(model.d[j, w, d])
|
||||||
|
delivered = pyo.value(model.y_delivery[j, w, d])
|
||||||
|
row[j] = delivered - demand
|
||||||
|
day_diff_rows.append(row)
|
||||||
|
|
||||||
|
day_diff_plain = (
|
||||||
|
pd.DataFrame(day_diff_rows)
|
||||||
|
.set_index(["datum", "woche", "tag"])
|
||||||
|
.reindex(totals_plain.index, fill_value=0)
|
||||||
|
.reindex(columns=order_k_pw + order_k_v, fill_value=0)
|
||||||
|
)
|
||||||
|
|
||||||
|
totals = totals_plain.copy()
|
||||||
|
totals.columns = pd.MultiIndex.from_tuples([(k, "Gesamt", "") for k in totals.columns])
|
||||||
|
|
||||||
|
demand_cols = df_demand.copy()
|
||||||
|
demand_cols.columns = pd.MultiIndex.from_tuples([(k, "Nachfrage", "") for k in demand_cols.columns])
|
||||||
|
|
||||||
|
day_diff_cols = day_diff_plain.copy()
|
||||||
|
day_diff_cols.columns = pd.MultiIndex.from_tuples(
|
||||||
|
[(k, "Lieferungstagesabweichung", "") for k in day_diff_cols.columns]
|
||||||
|
)
|
||||||
|
|
||||||
|
v_demand_cols = df_v_demand_split.copy()
|
||||||
|
v_demand_cols.columns = pd.MultiIndex.from_tuples(
|
||||||
|
[
|
||||||
|
("V", "Nachfrage_Welzow", ""),
|
||||||
|
("V", "Nachfrage_Nochtener", ""),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
has_bunker = hasattr(model, "bunker")
|
||||||
|
col_order = []
|
||||||
|
for k in order_k_pw:
|
||||||
|
for src in present_sources:
|
||||||
|
for sch in order_s:
|
||||||
|
col_order.append((k, src, sch))
|
||||||
|
col_order.append((k, "Gesamt", ""))
|
||||||
|
col_order.append((k, "Nachfrage", ""))
|
||||||
|
col_order.append((k, "Lieferungstagesabweichung", ""))
|
||||||
|
|
||||||
|
col_order += [( "V", src, sch) for src in present_sources for sch in order_s]
|
||||||
|
col_order += [
|
||||||
|
("V", "Nachfrage_Welzow", ""),
|
||||||
|
("V", "Nachfrage_Nochtener", ""),
|
||||||
|
("V", "Gesamt", ""),
|
||||||
|
("V", "Nachfrage", ""),
|
||||||
|
("V", "Lieferungstagesabweichung", ""),
|
||||||
|
]
|
||||||
|
|
||||||
|
df_out = pd.concat([df_src, v_demand_cols, totals, demand_cols, day_diff_cols], axis=1)
|
||||||
|
|
||||||
|
df_out = df_out.reindex(
|
||||||
|
columns=col_order,
|
||||||
|
fill_value=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
weekday_order = ["Mo", "Di", "Mi", "Do", "Fr", "Sa", "So"]
|
||||||
|
idx = df_out.index
|
||||||
|
df_out = df_out.copy()
|
||||||
|
df_out.index = pd.MultiIndex.from_arrays(
|
||||||
|
[
|
||||||
|
pd.to_datetime(idx.get_level_values("datum")),
|
||||||
|
idx.get_level_values("woche"),
|
||||||
|
pd.Categorical(idx.get_level_values("tag"), categories=weekday_order, ordered=True),
|
||||||
|
],
|
||||||
|
names=["datum", "woche", "tag"],
|
||||||
|
)
|
||||||
|
df_out = df_out.sort_index(level=["datum", "woche", "tag"])
|
||||||
|
|
||||||
|
df = df_out.copy()
|
||||||
|
df_out = df_out / 1000
|
||||||
|
|
||||||
|
bunker_sheet = None
|
||||||
|
if has_bunker:
|
||||||
|
bunker_rows = []
|
||||||
|
for j in getattr(model, "J_BUNKER", []):
|
||||||
|
for w in model.W:
|
||||||
|
for d in model.D:
|
||||||
|
date = wd_to_date.get((w, d))
|
||||||
|
if date is None:
|
||||||
|
continue
|
||||||
|
bunker_total = sum(safe_value(model.bunker[i, j, w, d]) for i in model.I)
|
||||||
|
bunker_rows.append(
|
||||||
|
{
|
||||||
|
"kraftwerk": j,
|
||||||
|
"woche": w,
|
||||||
|
"tag": d,
|
||||||
|
"datum": date,
|
||||||
|
"bunkerbestand_tonnen": bunker_total,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
if bunker_rows:
|
||||||
|
bunker_df = pd.DataFrame(bunker_rows)
|
||||||
|
bunker_df["datum"] = pd.to_datetime(bunker_df["datum"])
|
||||||
|
bunker_df = bunker_df.sort_values(["kraftwerk", "datum", "woche", "tag"])
|
||||||
|
bunker_df["vortags_bunkerbestand_tonnen"] = bunker_df.groupby("kraftwerk")[
|
||||||
|
"bunkerbestand_tonnen"
|
||||||
|
].shift(1)
|
||||||
|
bunker_df["vortags_bunkerbestand_tonnen"] = bunker_df["vortags_bunkerbestand_tonnen"].fillna(0)
|
||||||
|
bunker_pivot = (
|
||||||
|
bunker_df.pivot_table(
|
||||||
|
index=["datum", "woche", "tag"],
|
||||||
|
columns=["kraftwerk"],
|
||||||
|
values=["bunkerbestand_tonnen"],
|
||||||
|
aggfunc="first",
|
||||||
|
)
|
||||||
|
.fillna(0)
|
||||||
|
)
|
||||||
|
bunker_pivot = bunker_pivot.reindex(columns=order_k_pw + order_k_v, level=1, fill_value=0)
|
||||||
|
bunker_pivot.columns = pd.MultiIndex.from_tuples(
|
||||||
|
[(k, "Bunkerbestand", "") for k in bunker_pivot.columns.get_level_values(1)]
|
||||||
|
)
|
||||||
|
bunker_prev_pivot = (
|
||||||
|
bunker_df.pivot_table(
|
||||||
|
index=["datum", "woche", "tag"],
|
||||||
|
columns=["kraftwerk"],
|
||||||
|
values=["vortags_bunkerbestand_tonnen"],
|
||||||
|
aggfunc="first",
|
||||||
|
)
|
||||||
|
.fillna(0)
|
||||||
|
)
|
||||||
|
bunker_prev_pivot = bunker_prev_pivot.reindex(columns=order_k_pw + order_k_v, level=1, fill_value=0)
|
||||||
|
bunker_prev_pivot.columns = pd.MultiIndex.from_tuples(
|
||||||
|
[(k, "Vortags_Bunkerbestand", "") for k in bunker_prev_pivot.columns.get_level_values(1)]
|
||||||
|
)
|
||||||
|
inflow_pivot = None
|
||||||
|
if "bunkerzufluss_tonnen" in df_raw.columns:
|
||||||
|
inflow_pivot = (
|
||||||
|
df_raw.pivot_table(
|
||||||
|
index=["datum", "woche", "tag"],
|
||||||
|
columns=["kraftwerk"],
|
||||||
|
values=["bunkerzufluss_tonnen"],
|
||||||
|
aggfunc="sum",
|
||||||
|
)
|
||||||
|
.fillna(0)
|
||||||
|
)
|
||||||
|
inflow_pivot.columns = pd.MultiIndex.from_tuples(
|
||||||
|
[(k, "Bunkerzufluss", "") for k in inflow_pivot.columns.get_level_values(1)]
|
||||||
|
)
|
||||||
|
|
||||||
|
frames = [df]
|
||||||
|
frames.append(bunker_prev_pivot.reindex(df.index, fill_value=0))
|
||||||
|
if inflow_pivot is not None:
|
||||||
|
frames.append(inflow_pivot.reindex(df.index, fill_value=0))
|
||||||
|
frames.append(bunker_pivot.reindex(df.index, fill_value=0))
|
||||||
|
bunker_sheet = pd.concat(frames, axis=1)
|
||||||
|
|
||||||
|
col_order_bunker = []
|
||||||
|
for k in order_k_pw:
|
||||||
|
for src in present_sources:
|
||||||
|
for sch in order_s:
|
||||||
|
col_order_bunker.append((k, src, sch))
|
||||||
|
col_order_bunker.append((k, "Gesamt", ""))
|
||||||
|
col_order_bunker.append((k, "Nachfrage", ""))
|
||||||
|
col_order_bunker.append((k, "Lieferungstagesabweichung", ""))
|
||||||
|
if (k, "Vortags_Bunkerbestand", "") in bunker_sheet.columns:
|
||||||
|
col_order_bunker.append((k, "Vortags_Bunkerbestand", ""))
|
||||||
|
if (k, "Bunkerzufluss", "") in bunker_sheet.columns:
|
||||||
|
col_order_bunker.append((k, "Bunkerzufluss", ""))
|
||||||
|
if (k, "Bunkerbestand", "") in bunker_sheet.columns:
|
||||||
|
col_order_bunker.append((k, "Bunkerbestand", ""))
|
||||||
|
|
||||||
|
col_order_bunker += [("V", src, sch) for src in present_sources for sch in order_s]
|
||||||
|
col_order_bunker += [
|
||||||
|
("V", "Nachfrage_Welzow", ""),
|
||||||
|
("V", "Nachfrage_Nochtener", ""),
|
||||||
|
("V", "Gesamt", ""),
|
||||||
|
("V", "Nachfrage", ""),
|
||||||
|
("V", "Lieferungstagesabweichung", ""),
|
||||||
|
]
|
||||||
|
if ("V", "Vortags_Bunkerbestand", "") in bunker_sheet.columns:
|
||||||
|
col_order_bunker.append(("V", "Vortags_Bunkerbestand", ""))
|
||||||
|
if ("V", "Bunkerzufluss", "") in bunker_sheet.columns:
|
||||||
|
col_order_bunker.append(("V", "Bunkerzufluss", ""))
|
||||||
|
if ("V", "Bunkerbestand", "") in bunker_sheet.columns:
|
||||||
|
col_order_bunker.append(("V", "Bunkerbestand", ""))
|
||||||
|
|
||||||
|
bunker_sheet = bunker_sheet.reindex(columns=col_order_bunker, fill_value=0)
|
||||||
|
|
||||||
|
try:
|
||||||
|
import xlsxwriter # type: ignore
|
||||||
|
|
||||||
|
excel_engine = "xlsxwriter"
|
||||||
|
except Exception:
|
||||||
|
excel_engine = "openpyxl"
|
||||||
|
|
||||||
|
with pd.ExcelWriter(output_path, engine=excel_engine) as writer:
|
||||||
|
df.to_excel(writer, sheet_name="Sheet1")
|
||||||
|
if excel_engine == "xlsxwriter":
|
||||||
|
ws1 = writer.sheets["Sheet1"]
|
||||||
|
widths = autosize_worksheet(ws1, df)
|
||||||
|
widths = adjust_widths_for_labels(
|
||||||
|
df,
|
||||||
|
widths,
|
||||||
|
{
|
||||||
|
"Reichwalde": 0.5,
|
||||||
|
"Nochten": 0.5,
|
||||||
|
"Welzow": 0.5,
|
||||||
|
"Gesamt": 0.5,
|
||||||
|
"Nachfrage": 0.5,
|
||||||
|
"Bunkerbestand": 0.5,
|
||||||
|
"Bunkerzufluss": 0.5,
|
||||||
|
"Vortags_Bunkerbestand": 0.5,
|
||||||
|
},
|
||||||
|
index_scale=1.2,
|
||||||
|
)
|
||||||
|
for i, w in enumerate(widths):
|
||||||
|
ws1.set_column(i, i, w)
|
||||||
|
else:
|
||||||
|
ws1 = writer.sheets["Sheet1"]
|
||||||
|
widths = autosize_worksheet(ws1, df)
|
||||||
|
widths = adjust_widths_for_labels(
|
||||||
|
df,
|
||||||
|
widths,
|
||||||
|
{
|
||||||
|
"Reichwalde": 0.5,
|
||||||
|
"Nochten": 0.5,
|
||||||
|
"Welzow": 0.5,
|
||||||
|
"Gesamt": 0.5,
|
||||||
|
"Nachfrage": 0.5,
|
||||||
|
"Bunkerbestand": 0.5,
|
||||||
|
"Bunkerzufluss": 0.5,
|
||||||
|
"Vortags_Bunkerbestand": 0.5,
|
||||||
|
},
|
||||||
|
index_scale=1.2,
|
||||||
|
)
|
||||||
|
for i, w in enumerate(widths):
|
||||||
|
ws1.column_dimensions[chr(65 + i)].width = w
|
||||||
|
|
||||||
|
if bunker_sheet is not None:
|
||||||
|
bunker_sheet.to_excel(writer, sheet_name="mit_Bunkerbestand")
|
||||||
|
if excel_engine == "xlsxwriter":
|
||||||
|
workbook = writer.book
|
||||||
|
worksheet = writer.sheets["mit_Bunkerbestand"]
|
||||||
|
widths = autosize_worksheet(worksheet, bunker_sheet)
|
||||||
|
widths = adjust_widths_for_labels(
|
||||||
|
bunker_sheet,
|
||||||
|
widths,
|
||||||
|
{
|
||||||
|
"Reichwalde": 0.5,
|
||||||
|
"Nochten": 0.5,
|
||||||
|
"Welzow": 0.5,
|
||||||
|
"Gesamt": 0.5,
|
||||||
|
"Nachfrage": 0.5,
|
||||||
|
"Bunkerbestand": 0.5,
|
||||||
|
"Bunkerzufluss": 0.5,
|
||||||
|
"Vortags_Bunkerbestand": 0.5,
|
||||||
|
},
|
||||||
|
index_scale=1.2,
|
||||||
|
)
|
||||||
|
for i, w in enumerate(widths):
|
||||||
|
worksheet.set_column(i, i, w)
|
||||||
|
|
||||||
|
header_fmt = workbook.add_format({"bold": True, "bg_color": "#E6E6E6", "border": 1})
|
||||||
|
block_colors = ["#DCEFFE", "#FDEBD0", "#E8F8F5", "#FADBD8", "#E8DAEF", "#FEF9E7"]
|
||||||
|
block_formats = [
|
||||||
|
workbook.add_format({"bold": True, "bg_color": color, "border": 1}) for color in block_colors
|
||||||
|
]
|
||||||
|
|
||||||
|
index_cols = len(bunker_sheet.index.names)
|
||||||
|
n_header_rows = bunker_sheet.columns.nlevels
|
||||||
|
|
||||||
|
# Base header formatting for index columns.
|
||||||
|
for r in range(n_header_rows):
|
||||||
|
for c in range(index_cols):
|
||||||
|
worksheet.write(r, c, "", header_fmt)
|
||||||
|
|
||||||
|
# Apply block colors per Kraftwerk on header rows.
|
||||||
|
top_level = bunker_sheet.columns.get_level_values(0)
|
||||||
|
blocks = {}
|
||||||
|
for idx, label in enumerate(top_level):
|
||||||
|
blocks.setdefault(label, []).append(idx)
|
||||||
|
|
||||||
|
for b_idx, (label, cols) in enumerate(blocks.items()):
|
||||||
|
fmt = block_formats[b_idx % len(block_formats)]
|
||||||
|
for r in range(n_header_rows):
|
||||||
|
for c in cols:
|
||||||
|
value = bunker_sheet.columns.get_level_values(r)[c]
|
||||||
|
worksheet.write(r, index_cols + c, value, fmt)
|
||||||
|
else:
|
||||||
|
from openpyxl.styles import Border, Font, PatternFill, Side
|
||||||
|
|
||||||
|
worksheet = writer.sheets["mit_Bunkerbestand"]
|
||||||
|
widths = autosize_worksheet(worksheet, bunker_sheet)
|
||||||
|
widths = adjust_widths_for_labels(
|
||||||
|
bunker_sheet,
|
||||||
|
widths,
|
||||||
|
{
|
||||||
|
"Reichwalde": 0.5,
|
||||||
|
"Nochten": 0.5,
|
||||||
|
"Welzow": 0.5,
|
||||||
|
"Gesamt": 0.5,
|
||||||
|
"Nachfrage": 0.5,
|
||||||
|
"Bunkerbestand": 0.5,
|
||||||
|
"Bunkerzufluss": 0.5,
|
||||||
|
"Vortags_Bunkerbestand": 0.5,
|
||||||
|
},
|
||||||
|
index_scale=1.2,
|
||||||
|
)
|
||||||
|
for i, w in enumerate(widths):
|
||||||
|
worksheet.column_dimensions[chr(65 + i)].width = w
|
||||||
|
header_fill = PatternFill("solid", fgColor="E6E6E6")
|
||||||
|
block_colors = ["DCEFFE", "FDEBD0", "E8F8F5", "FADBD8", "E8DAEF", "FEF9E7"]
|
||||||
|
block_fills = [PatternFill("solid", fgColor=c) for c in block_colors]
|
||||||
|
bold_font = Font(bold=True)
|
||||||
|
border = Border(
|
||||||
|
left=Side(style="thin"),
|
||||||
|
right=Side(style="thin"),
|
||||||
|
top=Side(style="thin"),
|
||||||
|
bottom=Side(style="thin"),
|
||||||
|
)
|
||||||
|
|
||||||
|
index_cols = len(bunker_sheet.index.names)
|
||||||
|
n_header_rows = bunker_sheet.columns.nlevels
|
||||||
|
top_level = bunker_sheet.columns.get_level_values(0)
|
||||||
|
blocks = {}
|
||||||
|
for idx, label in enumerate(top_level):
|
||||||
|
blocks.setdefault(label, []).append(idx)
|
||||||
|
|
||||||
|
for r in range(n_header_rows):
|
||||||
|
for c in range(index_cols):
|
||||||
|
cell = worksheet.cell(row=r + 1, column=c + 1)
|
||||||
|
cell.fill = header_fill
|
||||||
|
cell.font = bold_font
|
||||||
|
cell.border = border
|
||||||
|
|
||||||
|
for b_idx, (label, cols) in enumerate(blocks.items()):
|
||||||
|
fill = block_fills[b_idx % len(block_fills)]
|
||||||
|
for r in range(n_header_rows):
|
||||||
|
for c in cols:
|
||||||
|
cell = worksheet.cell(row=r + 1, column=index_cols + c + 1)
|
||||||
|
cell.fill = fill
|
||||||
|
cell.font = bold_font
|
||||||
|
cell.border = border
|
||||||
|
|
||||||
|
# Kohlesorten-Mischverhaeltnis (gesamter Zeitraum)
|
||||||
|
j_name_map = {
|
||||||
|
"J": "Jänschwalde",
|
||||||
|
"SP": "Schwarze Pumpe",
|
||||||
|
"B3": "Boxberg Werk 3",
|
||||||
|
"B4": "Boxberg Werk 4",
|
||||||
|
}
|
||||||
|
i_name_map = {
|
||||||
|
"Reichwalde": "Reichwalder-Kohle",
|
||||||
|
"Nochten": "Nochtener-Kohle",
|
||||||
|
"Welzow": "Welzower-Kohle",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Empirical mix over full horizon based on delivered quantities (x).
|
||||||
|
total_delivered_by_j = {}
|
||||||
|
for j_code in j_name_map:
|
||||||
|
if j_code not in model.J:
|
||||||
|
continue
|
||||||
|
total_delivered_by_j[j_code] = sum(
|
||||||
|
safe_value(model.x[i, j_code, w, d, s])
|
||||||
|
for i in model.I
|
||||||
|
for w in model.W
|
||||||
|
for d in model.D
|
||||||
|
for s in model.S
|
||||||
|
if (i, j_code, w, d, s) in model.x
|
||||||
|
)
|
||||||
|
|
||||||
|
# Empirical bunker mix over full horizon based on bunker stock.
|
||||||
|
total_bunker_by_j = {}
|
||||||
|
if hasattr(model, "bunker"):
|
||||||
|
for j_code in j_name_map:
|
||||||
|
if j_code not in getattr(model, "J_BUNKER", []):
|
||||||
|
continue
|
||||||
|
total_bunker_by_j[j_code] = sum(
|
||||||
|
safe_value(model.bunker[i, j_code, w, d])
|
||||||
|
for i in model.I
|
||||||
|
for w in model.W
|
||||||
|
for d in model.D
|
||||||
|
if (i, j_code, w, d) in model.bunker
|
||||||
|
)
|
||||||
|
|
||||||
|
mix_rows = []
|
||||||
|
for j_code, j_name in j_name_map.items():
|
||||||
|
if j_code not in model.J:
|
||||||
|
continue
|
||||||
|
for i_code, i_name in i_name_map.items():
|
||||||
|
if i_code not in model.I:
|
||||||
|
continue
|
||||||
|
denom = total_delivered_by_j.get(j_code, 0.0)
|
||||||
|
num = sum(
|
||||||
|
safe_value(model.x[i_code, j_code, w, d, s])
|
||||||
|
for w in model.W
|
||||||
|
for d in model.D
|
||||||
|
for s in model.S
|
||||||
|
if (i_code, j_code, w, d, s) in model.x
|
||||||
|
)
|
||||||
|
empirisch = round(100 * num / denom, 2) if denom > 0 else 0.0
|
||||||
|
bunker_empirisch = 0.0
|
||||||
|
if hasattr(model, "bunker") and j_code in total_bunker_by_j:
|
||||||
|
denom_b = total_bunker_by_j.get(j_code, 0.0)
|
||||||
|
num_b = sum(
|
||||||
|
safe_value(model.bunker[i_code, j_code, w, d])
|
||||||
|
for w in model.W
|
||||||
|
for d in model.D
|
||||||
|
if (i_code, j_code, w, d) in model.bunker
|
||||||
|
)
|
||||||
|
bunker_empirisch = round(100 * num_b / denom_b, 2) if denom_b > 0 else 0.0
|
||||||
|
mix_rows.append(
|
||||||
|
{
|
||||||
|
"kraftwerk": j_name,
|
||||||
|
"kohlesorte": i_name,
|
||||||
|
"ziel_low": round(100 * pyo.value(model.alpha_target_low[i_code, j_code]), 2),
|
||||||
|
"ziel_high": round(100 * pyo.value(model.alpha_target_high[i_code, j_code]), 2),
|
||||||
|
"maximal": round(100 * pyo.value(model.alpha_max[i_code, j_code]), 2),
|
||||||
|
"minimal": round(100 * pyo.value(model.alpha_min[i_code, j_code]), 2),
|
||||||
|
"empirisch": empirisch,
|
||||||
|
"bunker_empirisch": bunker_empirisch,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
mix_df = pd.DataFrame(mix_rows)
|
||||||
|
mix_df.to_excel(writer, sheet_name="Kohlemischverhältnis", index=False)
|
||||||
|
if excel_engine == "xlsxwriter":
|
||||||
|
ws_mix = writer.sheets["Kohlemischverhältnis"]
|
||||||
|
workbook = writer.book
|
||||||
|
mix_block_colors = ["#E8F8F5", "#FDEBD0", "#DCEFFE", "#FADBD8"]
|
||||||
|
mix_formats = [workbook.add_format({"bg_color": c}) for c in mix_block_colors]
|
||||||
|
red_fill = workbook.add_format({"bg_color": "#F5B7B1"})
|
||||||
|
green_fill = workbook.add_format({"bg_color": "#D4EFDF"})
|
||||||
|
if not mix_df.empty:
|
||||||
|
emp_col = mix_df.columns.get_loc("empirisch")
|
||||||
|
bunker_emp_col = mix_df.columns.get_loc("bunker_empirisch")
|
||||||
|
current = None
|
||||||
|
block_idx = -1
|
||||||
|
for r, row in mix_df.iterrows():
|
||||||
|
if row["kraftwerk"] != current:
|
||||||
|
block_idx += 1
|
||||||
|
current = row["kraftwerk"]
|
||||||
|
fmt = mix_formats[block_idx % len(mix_formats)]
|
||||||
|
for c in range(0, min(6, mix_df.shape[1])):
|
||||||
|
ws_mix.write(r + 1, c, row.iloc[c], fmt)
|
||||||
|
emp_fmt = (
|
||||||
|
red_fill
|
||||||
|
if row["empirisch"] < row["ziel_low"] or row["empirisch"] > row["ziel_high"]
|
||||||
|
else green_fill
|
||||||
|
)
|
||||||
|
ws_mix.write(r + 1, emp_col, row["empirisch"], emp_fmt)
|
||||||
|
ws_mix.write(r + 1, bunker_emp_col, row["bunker_empirisch"])
|
||||||
|
widths = autosize_worksheet(ws_mix, mix_df, index_cols=[])
|
||||||
|
for i, w in enumerate(widths[1:]):
|
||||||
|
ws_mix.set_column(i, i, w)
|
||||||
|
else:
|
||||||
|
ws_mix = writer.sheets["Kohlemischverhältnis"]
|
||||||
|
if not mix_df.empty:
|
||||||
|
from openpyxl.styles import PatternFill
|
||||||
|
|
||||||
|
mix_block_colors = ["E8F8F5", "FDEBD0", "DCEFFE", "FADBD8"]
|
||||||
|
mix_fills = [PatternFill("solid", fgColor=c) for c in mix_block_colors]
|
||||||
|
red_fill = PatternFill("solid", fgColor="F5B7B1")
|
||||||
|
green_fill = PatternFill("solid", fgColor="D4EFDF")
|
||||||
|
current = None
|
||||||
|
block_idx = -1
|
||||||
|
for row_idx, kraftwerk in enumerate(mix_df["kraftwerk"], start=2):
|
||||||
|
if kraftwerk != current:
|
||||||
|
block_idx += 1
|
||||||
|
current = kraftwerk
|
||||||
|
fill = mix_fills[block_idx % len(mix_fills)]
|
||||||
|
for col_idx in range(1, min(7, mix_df.shape[1]) + 1):
|
||||||
|
ws_mix.cell(row=row_idx, column=col_idx).fill = fill
|
||||||
|
emp_col = mix_df.columns.get_loc("empirisch") + 1
|
||||||
|
bunker_emp_col = mix_df.columns.get_loc("bunker_empirisch") + 1
|
||||||
|
for r, row in mix_df.iterrows():
|
||||||
|
fill = red_fill if row["empirisch"] < row["ziel_low"] or row["empirisch"] > row["ziel_high"] else green_fill
|
||||||
|
ws_mix.cell(row=r + 2, column=emp_col).fill = fill
|
||||||
|
# No group fill for bunker_empirisch (column H).
|
||||||
|
widths = autosize_worksheet(ws_mix, mix_df, index_cols=[])
|
||||||
|
for i, w in enumerate(widths[1:]):
|
||||||
|
ws_mix.column_dimensions[chr(65 + i)].width = w
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(description="Run the Pyomo optimization model.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--data-dir",
|
||||||
|
type=Path,
|
||||||
|
default=Path("data/processed"),
|
||||||
|
help="Directory containing input parquet files.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--solver",
|
||||||
|
default="gurobi",
|
||||||
|
help="Solver name passed to Pyomo (default: gurobi).",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--max-rows",
|
||||||
|
type=int,
|
||||||
|
default=50,
|
||||||
|
help="Maximum number of non-zero decision variable rows to print.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--time-limit",
|
||||||
|
type=int,
|
||||||
|
default=600,
|
||||||
|
help="Time limit (seconds) for the solver (default: 600).",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output-xlsx",
|
||||||
|
type=Path,
|
||||||
|
default=Path("data/out/output.xlsx"),
|
||||||
|
help="Excel output file for deliveries by plant/week/day/shift.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--mip-gap",
|
||||||
|
type=float,
|
||||||
|
default=0.03,
|
||||||
|
help="MIP gap tolerance (default: 0.03).",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--step-size-tonnes",
|
||||||
|
type=int,
|
||||||
|
default=1000,
|
||||||
|
choices=[960, 1000],
|
||||||
|
help="Discrete train step size in tonnes (default: 1000).",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
tables = load_tables(args.data_dir)
|
||||||
|
model = build_model(tables, step_size_tonnes=args.step_size_tonnes)
|
||||||
|
|
||||||
|
solve_model(model, args.solver, args.time_limit, args.mip_gap)
|
||||||
|
# report_results(model, args.max_rows)
|
||||||
|
export_results(model, args.output_xlsx)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
||||||
|
|
||||||
|
# uv run python src/optimization/run_optimization.py --solver gurobi --mip-gap 0.05
|
||||||
|
# uv run python src/optimization/run_optimization.py --solver highs
|
||||||
505
src/preprocessing/exploration_preprocess.py
Normal file
505
src/preprocessing/exploration_preprocess.py
Normal file
@ -0,0 +1,505 @@
|
|||||||
|
# Generated from exploration.ipynb
|
||||||
|
# %%
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
# %%
|
||||||
|
PROJECT_ROOT = Path(__file__).resolve().parents[2]
|
||||||
|
DEFAULT_INPUT = PROJECT_ROOT / "data/input/PoC1_Rohkohleverteilung_Input_Parameter.xlsx"
|
||||||
|
DEFAULT_OUTPUT = PROJECT_ROOT / "data/processed"
|
||||||
|
INPUT_XLSX = Path(os.environ.get("POC1_INPUT_XLSX", str(DEFAULT_INPUT)))
|
||||||
|
OUTPUT_DIR = Path(os.environ.get("POC1_OUTPUT_DIR", str(DEFAULT_OUTPUT)))
|
||||||
|
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
path = INPUT_XLSX
|
||||||
|
# %% [markdown]
|
||||||
|
# # Mappe Parameter
|
||||||
|
# %% [markdown]
|
||||||
|
# ## Erlaubte Abweichung
|
||||||
|
# ### Kraftwerke
|
||||||
|
# %%
|
||||||
|
raw = pd.read_excel(path, sheet_name="Parameter", header=None)
|
||||||
|
|
||||||
|
t1 = raw.iloc[0:18, 0:8].copy()
|
||||||
|
# 1. komplett leere Zeilen raus
|
||||||
|
df = t1.dropna(how="all").reset_index(drop=True)
|
||||||
|
|
||||||
|
# 2. Einheitenzeile und Labelzeile identifizieren
|
||||||
|
unit_idx = df[df.apply(lambda r: r.astype(str).str.contains(r"\[kt\]").any(), axis=1)].index[0]
|
||||||
|
label_idx = unit_idx + 1
|
||||||
|
|
||||||
|
unit_row = df.loc[unit_idx]
|
||||||
|
label_row = df.loc[label_idx]
|
||||||
|
|
||||||
|
# 3. Spaltennamen bauen
|
||||||
|
headers = []
|
||||||
|
for u, l in zip(unit_row, label_row):
|
||||||
|
if pd.isna(l):
|
||||||
|
headers.append(str(u))
|
||||||
|
elif pd.isna(u):
|
||||||
|
headers.append(str(l))
|
||||||
|
else:
|
||||||
|
headers.append(f"{l} ({u})")
|
||||||
|
|
||||||
|
# 4. Daten unterhalb der Headerzeilen
|
||||||
|
data = df.loc[label_idx+1:].reset_index(drop=True)
|
||||||
|
data.columns = headers
|
||||||
|
|
||||||
|
# Neue Kopie mit eindeutigen Spaltennamen
|
||||||
|
data2 = data.copy()
|
||||||
|
data2.columns = [
|
||||||
|
"col0",
|
||||||
|
"titel",
|
||||||
|
"col2",
|
||||||
|
"zeitraum",
|
||||||
|
"minus_kt",
|
||||||
|
"plus_kt",
|
||||||
|
"minus_pct",
|
||||||
|
"plus_pct",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Titelzeilen entfernen
|
||||||
|
mask_title = data2["titel"].isin([
|
||||||
|
"Erlaubte Abweichungen der Bedarfserfüllung",
|
||||||
|
"Kraftwerk",
|
||||||
|
"Kraftwerke",
|
||||||
|
"Veredlung ISP"
|
||||||
|
])
|
||||||
|
data3 = data2[~mask_title].reset_index(drop=True)
|
||||||
|
|
||||||
|
# kraftwerk steht in col2, nach unten füllen
|
||||||
|
data3["kraftwerk"] = data3["col2"].ffill()
|
||||||
|
|
||||||
|
# Zeilen ohne Zeitraum entfernen
|
||||||
|
data3 = data3.dropna(subset=["zeitraum"]).reset_index(drop=True)
|
||||||
|
|
||||||
|
# numerische Spalten konvertieren
|
||||||
|
for col in ["minus_kt", "plus_kt", "minus_pct", "plus_pct"]:
|
||||||
|
data3[col] = pd.to_numeric(data3[col], errors="coerce")
|
||||||
|
|
||||||
|
# Endauswahl
|
||||||
|
result = data3[["kraftwerk", "zeitraum", "minus_kt", "plus_kt", "minus_pct", "plus_pct"]]
|
||||||
|
data3 = data2.copy()
|
||||||
|
|
||||||
|
data3["kraftwerk"] = data3["col2"].ffill()
|
||||||
|
data3 = data3.dropna(subset=["zeitraum"]).reset_index(drop=True)
|
||||||
|
|
||||||
|
for col in ["minus_kt", "plus_kt", "minus_pct", "plus_pct"]:
|
||||||
|
data3[col] = pd.to_numeric(data3[col], errors="coerce")
|
||||||
|
|
||||||
|
bounds_power_plants = data3[["kraftwerk", "zeitraum", "minus_kt", "plus_kt", "minus_pct", "plus_pct"]]
|
||||||
|
bounds_power_plants[["minus"]] = bounds_power_plants[["minus_kt"]]*1000
|
||||||
|
bounds_power_plants[["plus"]] = bounds_power_plants[["plus_kt"]]*1000
|
||||||
|
bounds_power_plants.drop(columns=["minus_kt", "plus_kt"],inplace=True)
|
||||||
|
bounds_power_plants.to_parquet(OUTPUT_DIR / "bounds_power_plants.parquet")
|
||||||
|
print("Saved bounds_power_plants.parquet")
|
||||||
|
bounds_power_plants
|
||||||
|
# %% [markdown]
|
||||||
|
# ### Veredlung
|
||||||
|
# %%
|
||||||
|
raw = pd.read_excel(path, sheet_name="Parameter", header=None)
|
||||||
|
|
||||||
|
t1_upper = raw.iloc[0:4, 0:8].copy()
|
||||||
|
t1_upper
|
||||||
|
t2 = raw.iloc[18:28, 0:8].copy()
|
||||||
|
|
||||||
|
t2 = pd.concat([t1_upper,t2], axis=0)
|
||||||
|
# 1. komplett leere Zeilen raus
|
||||||
|
df = t2.dropna(how="all").reset_index(drop=True)
|
||||||
|
|
||||||
|
# 2. Einheitenzeile und Labelzeile identifizieren
|
||||||
|
unit_idx = df[df.apply(lambda r: r.astype(str).str.contains(r"\[kt\]").any(), axis=1)].index[0]
|
||||||
|
label_idx = unit_idx + 1
|
||||||
|
|
||||||
|
unit_row = df.loc[unit_idx]
|
||||||
|
label_row = df.loc[label_idx]
|
||||||
|
|
||||||
|
# 3. Spaltennamen bauen
|
||||||
|
headers = []
|
||||||
|
for u, l in zip(unit_row, label_row):
|
||||||
|
if pd.isna(l):
|
||||||
|
headers.append(str(u))
|
||||||
|
elif pd.isna(u):
|
||||||
|
headers.append(str(l))
|
||||||
|
else:
|
||||||
|
headers.append(f"{l} ({u})")
|
||||||
|
|
||||||
|
# 4. Daten unterhalb der Headerzeilen
|
||||||
|
data = df.loc[label_idx+1:].reset_index(drop=True)
|
||||||
|
data.columns = headers
|
||||||
|
|
||||||
|
# Neue Kopie mit eindeutigen Spaltennamen
|
||||||
|
data2 = data.copy()
|
||||||
|
data2.columns = [
|
||||||
|
"col0",
|
||||||
|
"titel",
|
||||||
|
"col2",
|
||||||
|
"zeitraum",
|
||||||
|
"minus_kt",
|
||||||
|
"plus_kt",
|
||||||
|
"minus_pct",
|
||||||
|
"plus_pct",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Titelzeilen entfernen
|
||||||
|
mask_title = data2["titel"].isin([
|
||||||
|
"Erlaubte Abweichungen der Bedarfserfüllung",
|
||||||
|
"Kraftwerk",
|
||||||
|
"Kraftwerke",
|
||||||
|
"Veredlung ISP"
|
||||||
|
])
|
||||||
|
# kraftwerk steht in col2, nach unten füllen
|
||||||
|
data3["kraftwerk"] = data3["col2"].ffill()
|
||||||
|
|
||||||
|
# Zeilen ohne Zeitraum entfernen
|
||||||
|
data3 = data3.dropna(subset=["zeitraum"]).reset_index(drop=True)
|
||||||
|
|
||||||
|
# numerische Spalten konvertieren
|
||||||
|
for col in ["minus_kt", "plus_kt", "minus_pct", "plus_pct"]:
|
||||||
|
data3[col] = pd.to_numeric(data3[col], errors="coerce")
|
||||||
|
|
||||||
|
# Endauswahl
|
||||||
|
result = data3[["kraftwerk", "zeitraum", "minus_kt", "plus_kt", "minus_pct", "plus_pct"]]
|
||||||
|
data3 = data2.copy()
|
||||||
|
|
||||||
|
data3["kraftwerk"] = data3["col2"].ffill()
|
||||||
|
data3 = data3.dropna(subset=["zeitraum"]).reset_index(drop=True)
|
||||||
|
|
||||||
|
for col in ["minus_kt", "plus_kt", "minus_pct", "plus_pct"]:
|
||||||
|
data3[col] = pd.to_numeric(data3[col], errors="coerce")
|
||||||
|
|
||||||
|
veredelung_bounds = data3[["kraftwerk", "zeitraum", "minus_kt", "plus_kt", "minus_pct", "plus_pct"]]
|
||||||
|
# veredelung_bounds.rename({"kraftwerk":"kohleart"}, inplace=True)
|
||||||
|
veredelung_bounds = veredelung_bounds.rename(columns={"kraftwerk":"kohlesorte"})
|
||||||
|
veredelung_bounds[["minus"]] = veredelung_bounds[["minus_kt"]]*1000
|
||||||
|
veredelung_bounds[["plus"]] = veredelung_bounds[["plus_kt"]]*1000
|
||||||
|
veredelung_bounds.drop(columns=["minus_kt", "plus_kt"],inplace=True)
|
||||||
|
veredelung_bounds.to_parquet(OUTPUT_DIR / "veredelung_bounds.parquet")
|
||||||
|
print("Saved veredelung_bounds.parquet")
|
||||||
|
veredelung_bounds
|
||||||
|
# %% [markdown]
|
||||||
|
# ## Kohlesorten-Mischverhältnis
|
||||||
|
# %%
|
||||||
|
|
||||||
|
|
||||||
|
raw = pd.read_excel(path, sheet_name="Parameter", header=None)
|
||||||
|
|
||||||
|
# J3:P16 -> Zeilen 2:16, Spalten 9:16 (0-basiert, rechte Grenze exklusiv)
|
||||||
|
block = raw.iloc[2:16, 9:16].copy()
|
||||||
|
|
||||||
|
# Leere Zeilen entfernen
|
||||||
|
df = block.dropna(how="all").reset_index(drop=True)
|
||||||
|
|
||||||
|
# Header finden: Einheitenzeile enthält "[%]"
|
||||||
|
unit_idx = df[df[12].astype(str).str.contains(r"\[%\]", regex=True)].index[0]
|
||||||
|
label_idx = unit_idx + 1
|
||||||
|
|
||||||
|
unit_row = df.loc[unit_idx]
|
||||||
|
label_row = df.loc[label_idx]
|
||||||
|
|
||||||
|
# Spaltenüberschriften konstruieren
|
||||||
|
headers = []
|
||||||
|
for u, l in zip(unit_row, label_row):
|
||||||
|
if pd.isna(l):
|
||||||
|
headers.append(str(u))
|
||||||
|
elif pd.isna(u):
|
||||||
|
headers.append(str(l))
|
||||||
|
else:
|
||||||
|
headers.append(f"{l} ({u})")
|
||||||
|
|
||||||
|
# Daten unterhalb der Headerzeilen
|
||||||
|
data = df.loc[label_idx+1:].reset_index(drop=True)
|
||||||
|
data.columns = headers
|
||||||
|
|
||||||
|
# Spalten sinnvoll benennen
|
||||||
|
data2 = data.copy()
|
||||||
|
data2.columns = [
|
||||||
|
"titel",
|
||||||
|
"kraftwerk",
|
||||||
|
"kohlesorte",
|
||||||
|
"ziel_low",
|
||||||
|
"ziel_high",
|
||||||
|
"maximal",
|
||||||
|
"minimal",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Kraftwerk nach unten füllen
|
||||||
|
data2["kraftwerk"] = data2["kraftwerk"].ffill()
|
||||||
|
|
||||||
|
# *** WICHTIG: nach kohlesorte filtern, nicht nach titel ***
|
||||||
|
data3 = data2[data2["kohlesorte"].notna()].reset_index(drop=True)
|
||||||
|
|
||||||
|
# Finaler DataFrame
|
||||||
|
kohle_mix = data3[[
|
||||||
|
"kraftwerk",
|
||||||
|
"kohlesorte",
|
||||||
|
"ziel_low",
|
||||||
|
"ziel_high",
|
||||||
|
"maximal",
|
||||||
|
"minimal",
|
||||||
|
]]
|
||||||
|
|
||||||
|
# numerische Spalten in konsistente floats umwandeln
|
||||||
|
num_cols = ["ziel_low", "ziel_high", "maximal", "minimal"]
|
||||||
|
kohle_mix[num_cols] = kohle_mix[num_cols].apply(pd.to_numeric, errors="coerce")
|
||||||
|
|
||||||
|
kohle_mix.to_parquet(OUTPUT_DIR / "kohle_mix.parquet")
|
||||||
|
print("Saved kohle_mix.parquet")
|
||||||
|
kohle_mix
|
||||||
|
|
||||||
|
# %% [markdown]
|
||||||
|
# ## Förderkapazitäten
|
||||||
|
# %%
|
||||||
|
|
||||||
|
# J19:M23 -> Zeilen 18:23, Spalten 9:13 (0-basiert)
|
||||||
|
block = pd.read_excel(path, sheet_name="Parameter", header=None).iloc[18:23, 9:13].copy()
|
||||||
|
|
||||||
|
# leere Zeilen entfernen
|
||||||
|
df = block.dropna(how="all").reset_index(drop=True)
|
||||||
|
|
||||||
|
# Titelzeile "Förderkapazität" rausfiltern
|
||||||
|
df = df[df[9] != "Förderkapazität"].reset_index(drop=True)
|
||||||
|
|
||||||
|
# Spalten benennen
|
||||||
|
df.columns = ["kategorie", "tagebau", "zeitraum", "maximal"]
|
||||||
|
|
||||||
|
# "Tagebau" nach unten auffüllen (für Nochten, Gesamt, Welzow-Süd)
|
||||||
|
df["kategorie"] = df["kategorie"].ffill()
|
||||||
|
|
||||||
|
# Maximalwert in Zahl konvertieren
|
||||||
|
df["maximal"] = pd.to_numeric(df["maximal"], errors="coerce")
|
||||||
|
|
||||||
|
# falls du nur die wesentlichen Infos brauchst:
|
||||||
|
foerderkap = df[["tagebau", "zeitraum", "maximal"]]
|
||||||
|
|
||||||
|
foerderkap["maximal"] = foerderkap["maximal"]*1000
|
||||||
|
foerderkap.to_parquet(OUTPUT_DIR / "foerderkapaz.parquet")
|
||||||
|
print("Saved foerderkap.parquet")
|
||||||
|
foerderkap
|
||||||
|
# %% [markdown]
|
||||||
|
# ## Verladungskapazitäten
|
||||||
|
# %%
|
||||||
|
raw = pd.read_excel(path, sheet_name="Parameter", header=None)
|
||||||
|
|
||||||
|
# J26:M30 -> rows 25:30, cols 9:13
|
||||||
|
block = raw.iloc[25:30, 9:13].copy()
|
||||||
|
|
||||||
|
# vollständig leere Zeilen entfernen
|
||||||
|
df = block.dropna(how="all").reset_index(drop=True)
|
||||||
|
|
||||||
|
# Titelzeile entfernen
|
||||||
|
df = df[df[9] != "Verladungskapazität"].reset_index(drop=True)
|
||||||
|
|
||||||
|
# Spalten sinnvoll benennen
|
||||||
|
df.columns = ["kategorie", "verladung", "zeitraum", "maximal"]
|
||||||
|
|
||||||
|
# Verladung nach unten auffüllen
|
||||||
|
df["verladung"] = df["verladung"].ffill()
|
||||||
|
|
||||||
|
# Maximalwert numerisch
|
||||||
|
df["maximal"] = pd.to_numeric(df["maximal"], errors="coerce")
|
||||||
|
|
||||||
|
# finale Auswahl
|
||||||
|
verladung = df[["verladung", "zeitraum", "maximal"]]
|
||||||
|
verladung["maximal"] = verladung.maximal*1000
|
||||||
|
verladung.to_parquet(OUTPUT_DIR / "verladungskap.parquet")
|
||||||
|
print("Saved verladungskap.parquet")
|
||||||
|
verladung
|
||||||
|
# %% [markdown]
|
||||||
|
# ## Zugdurchlass
|
||||||
|
# %%
|
||||||
|
raw = pd.read_excel(path, sheet_name="Parameter", header=None)
|
||||||
|
block = raw.iloc[3:21, 17:24].copy()
|
||||||
|
block.replace("unlimitiert", np.inf, inplace=True)
|
||||||
|
# komplett leere Zeilen entfernen
|
||||||
|
df = block.dropna(how="all").reset_index(drop=True)
|
||||||
|
|
||||||
|
# Kopfzeilen / Überschrift entfernen
|
||||||
|
mask_header = df.apply(
|
||||||
|
lambda r: r.astype(str).str.contains(
|
||||||
|
"Zugdurchlasskapazität|Maximal|Vielfaches von", regex=True
|
||||||
|
).any(),
|
||||||
|
axis=1,
|
||||||
|
)
|
||||||
|
df = df[~mask_header].reset_index(drop=True)
|
||||||
|
|
||||||
|
# Spalten benennen
|
||||||
|
df.columns = ["von", "start", "zum", "ziel", "zeitraum", "maximal", "vielfaches_von"]
|
||||||
|
|
||||||
|
# numerische Spalten nach float konvertieren
|
||||||
|
for col in ["maximal", "vielfaches_von"]:
|
||||||
|
df[col] = pd.to_numeric(df[col], errors="coerce")
|
||||||
|
|
||||||
|
# alle numerischen Werte * 1000
|
||||||
|
df["maximal"] = df["maximal"] * 1000
|
||||||
|
|
||||||
|
# fertiger DataFrame
|
||||||
|
zugdurchlass = df.copy()
|
||||||
|
zugdurchlass.to_parquet(OUTPUT_DIR / "zugdurchlass.parquet")
|
||||||
|
zugdurchlass
|
||||||
|
# %% [markdown]
|
||||||
|
# # Mappe Rohkohlebedarf
|
||||||
|
# %%
|
||||||
|
raw = pd.read_excel(path, sheet_name="Rohkohlebedarf", header=None)
|
||||||
|
|
||||||
|
df = raw.iloc[2:36, 1:15].copy().reset_index(drop=True)
|
||||||
|
|
||||||
|
jahr = int(df.loc[0, 2])
|
||||||
|
monat = str(df.loc[1, 2])
|
||||||
|
|
||||||
|
kw_header = df.loc[1, 4:10].tolist()
|
||||||
|
ver_header = df.loc[1, 12:14].tolist()
|
||||||
|
|
||||||
|
kw_names = kw_header[:-1] + ["Gesamt_KW"]
|
||||||
|
ver_names = ver_header[:-1] + ["Gesamt_Veredlung"]
|
||||||
|
|
||||||
|
data = df.loc[3:].reset_index(drop=True)
|
||||||
|
|
||||||
|
out = pd.DataFrame()
|
||||||
|
out["jahr"] = jahr
|
||||||
|
out["monat"] = monat
|
||||||
|
out["datum"] = pd.to_datetime(data[1])
|
||||||
|
|
||||||
|
for idx, name in zip(range(4, 4+len(kw_names)), kw_names):
|
||||||
|
out[name] = pd.to_numeric(data[idx], errors="coerce")
|
||||||
|
|
||||||
|
for idx, name in zip(range(12, 12+len(ver_names)), ver_names):
|
||||||
|
out[name] = pd.to_numeric(data[idx], errors="coerce")
|
||||||
|
|
||||||
|
# units: convert kt to t
|
||||||
|
kw_cols = kw_names
|
||||||
|
out[kw_cols] = out[kw_cols] * 1000
|
||||||
|
|
||||||
|
# convert Gesamt_Veredlung from kt to t
|
||||||
|
welz_col, nocht_col, ges_ver_col = ver_names
|
||||||
|
out[ges_ver_col] = out[ges_ver_col] * 1000
|
||||||
|
|
||||||
|
out = pd.DataFrame()
|
||||||
|
out["datum"] = pd.to_datetime(data[1])
|
||||||
|
out["jahr"] = jahr
|
||||||
|
out["monat"] = monat
|
||||||
|
|
||||||
|
for idx, name in zip(range(4, 4+len(kw_names)), kw_names):
|
||||||
|
out[name] = pd.to_numeric(data[idx], errors="coerce")
|
||||||
|
|
||||||
|
for idx, name in zip(range(12, 12+len(ver_names)), ver_names):
|
||||||
|
out[name] = pd.to_numeric(data[idx], errors="coerce")
|
||||||
|
|
||||||
|
kw_cols = kw_names
|
||||||
|
out[kw_cols] = out[kw_cols] * 1000
|
||||||
|
welz_col, nocht_col, ges_ver_col = ver_names
|
||||||
|
out[ges_ver_col] = out[ges_ver_col] * 1000
|
||||||
|
|
||||||
|
out.rename(columns={"Welzower Kohle":"Veredel_Welzower", "Nochtener Kohle": "Veredel_Nochtener"}, inplace=True)
|
||||||
|
out.to_parquet(OUTPUT_DIR / "rohkohlebedarf.parquet")
|
||||||
|
print("Saved rohkohlebedarf.parquet")
|
||||||
|
out.round(5)
|
||||||
|
|
||||||
|
# %% [markdown]
|
||||||
|
# # Mappe Verfügbarkeit
|
||||||
|
# %%
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
raw = pd.read_excel(path, sheet_name="Verfügbarkeit", header=None)
|
||||||
|
|
||||||
|
# Jahr/Monat bleiben statisch in C3/C4
|
||||||
|
jahr = int(raw.iloc[2, 2])
|
||||||
|
monat = str(raw.iloc[3, 2])
|
||||||
|
|
||||||
|
# B8:J38 (Datum in Spalte B, Wochentag in Spalte C)
|
||||||
|
df = raw.iloc[7:38, 1:10].copy().reset_index(drop=True)
|
||||||
|
|
||||||
|
# "Datum"-Zeile entfernen
|
||||||
|
data = df.copy()
|
||||||
|
data = data[data[1] != "Datum"].reset_index(drop=True)
|
||||||
|
data = data[data[1].notna()].reset_index(drop=True)
|
||||||
|
|
||||||
|
ver = pd.DataFrame()
|
||||||
|
ver["datum"] = pd.to_datetime(data[1])
|
||||||
|
ver["jahr"] = jahr
|
||||||
|
ver["monat"] = monat
|
||||||
|
|
||||||
|
# Welzow-Süd Schicht 1 2 3 in cols 4,5,6
|
||||||
|
ver["Welzow_Sued_S1_t"] = pd.to_numeric(data[4], errors="coerce") * 1000
|
||||||
|
ver["Welzow_Sued_S2_t"] = pd.to_numeric(data[5], errors="coerce") * 1000
|
||||||
|
ver["Welzow_Sued_S3_t"] = pd.to_numeric(data[6], errors="coerce") * 1000
|
||||||
|
|
||||||
|
# Boxberg (NO+RW) Schicht 1 2 3 in cols 7,8,9
|
||||||
|
ver["Boxberg_NO_RW_S1_t"] = pd.to_numeric(data[7], errors="coerce") * 1000
|
||||||
|
ver["Boxberg_NO_RW_S2_t"] = pd.to_numeric(data[8], errors="coerce") * 1000
|
||||||
|
ver["Boxberg_NO_RW_S3_t"] = pd.to_numeric(data[9], errors="coerce") * 1000
|
||||||
|
|
||||||
|
# ver.to_parquet
|
||||||
|
ver.to_parquet(OUTPUT_DIR / "Verfuegbarkeiten.parquet")
|
||||||
|
print("Saved Verfuegbarkeiten.parquet")
|
||||||
|
ver.round(5)
|
||||||
|
|
||||||
|
# KVB Nord Zugdurchlasskapazitäten (L8:N38)
|
||||||
|
kvb_block = raw.iloc[7:38, 11:14].copy().reset_index(drop=True)
|
||||||
|
kvb_block = kvb_block.iloc[: len(data)].reset_index(drop=True)
|
||||||
|
|
||||||
|
kvb = pd.DataFrame()
|
||||||
|
kvb["datum"] = pd.to_datetime(data[1])
|
||||||
|
kvb["jahr"] = jahr
|
||||||
|
kvb["monat"] = monat
|
||||||
|
kvb["KVB_Nord_S1_t"] = pd.to_numeric(kvb_block[11], errors="coerce") * 1000
|
||||||
|
kvb["KVB_Nord_S2_t"] = pd.to_numeric(kvb_block[12], errors="coerce") * 1000
|
||||||
|
kvb["KVB_Nord_S3_t"] = pd.to_numeric(kvb_block[13], errors="coerce") * 1000
|
||||||
|
|
||||||
|
kvb.to_parquet(OUTPUT_DIR / "zugdurchlass_kvb_nord.parquet")
|
||||||
|
print("Saved zugdurchlass_kvb_nord.parquet")
|
||||||
|
|
||||||
|
|
||||||
|
# %% [markdown]
|
||||||
|
# # Bunker
|
||||||
|
|
||||||
|
raw = pd.read_excel(path, sheet_name="Parameter", header=None)
|
||||||
|
|
||||||
|
# Ausschnitt R23:W30 (0-basiert: Zeilen 22–29, Spalten 17–22)
|
||||||
|
bunker = raw.iloc[22:30, 17:23].copy()
|
||||||
|
|
||||||
|
# Kraftwerks- und Veredlungsbunker (Jänschwalde, SP, BW3, ISP)
|
||||||
|
plants = bunker.iloc[25-22:29-22].reset_index(drop=True)
|
||||||
|
plants.columns = [
|
||||||
|
"typ",
|
||||||
|
"anlage",
|
||||||
|
"anfang_mo_di_kt",
|
||||||
|
"anfang_rest_kt",
|
||||||
|
"zielbestand_kt",
|
||||||
|
"maximal_kt",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Typ auffüllen
|
||||||
|
plants["typ"] = plants["typ"].ffill()
|
||||||
|
|
||||||
|
# numerische Werte konvertieren
|
||||||
|
for col in ["anfang_mo_di_kt", "anfang_rest_kt", "zielbestand_kt", "maximal_kt"]:
|
||||||
|
plants[col] = pd.to_numeric(plants[col], errors="coerce")
|
||||||
|
|
||||||
|
# *** Umrechnung in t ***
|
||||||
|
plants = plants.rename(columns={
|
||||||
|
"anfang_mo_di_kt": "anfang_mo_di_t",
|
||||||
|
"anfang_rest_kt": "anfang_rest_t",
|
||||||
|
"zielbestand_kt": "zielbestand_t",
|
||||||
|
"maximal_kt": "maximal_t",
|
||||||
|
})
|
||||||
|
|
||||||
|
plants[["anfang_mo_di_t", "anfang_rest_t", "zielbestand_t", "maximal_t"]] *= 1000
|
||||||
|
|
||||||
|
# Vorfahrfenster
|
||||||
|
vorfahrfenster_tage = pd.to_numeric(bunker.loc[29, 19], errors="coerce")
|
||||||
|
|
||||||
|
plants.to_parquet(OUTPUT_DIR / "bunker.parquet")
|
||||||
|
print("Saved bunker.parquet")
|
||||||
|
plants
|
||||||
|
|
||||||
|
pd.DataFrame([{"vorfahrfenster_tage": vorfahrfenster_tage}]).to_parquet(
|
||||||
|
OUTPUT_DIR / "bunker_vorfahrfenster.parquet"
|
||||||
|
)
|
||||||
|
print("Saved bunker_vorfahrfenster.parquet")
|
||||||
|
|
||||||
|
# %%
|
||||||
|
print("\n ####################### Done preprocessing exploration_preprocess.py ####################### \n")
|
||||||
Loading…
x
Reference in New Issue
Block a user