Initial clean commit
This commit is contained in:
commit
f984bc4cf8
14
.gitignore
vendored
Normal file
14
.gitignore
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
data/
|
||||
*.parquet
|
||||
*.xlsx
|
||||
*.pdf
|
||||
.venv/
|
||||
node_modules/
|
||||
.DS_Store
|
||||
latex/*.pdf
|
||||
*.log
|
||||
data/
|
||||
*.parquet
|
||||
*.xlsx
|
||||
latex/
|
||||
latex/*.pdf
|
||||
3
src/optimization/__init__.py
Normal file
3
src/optimization/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
from .model_builder import build_model, load_tables, solve_model
|
||||
|
||||
__all__ = ["build_model", "load_tables", "solve_model"]
|
||||
BIN
src/optimization/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
src/optimization/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
src/optimization/__pycache__/model_builder.cpython-313.pyc
Normal file
BIN
src/optimization/__pycache__/model_builder.cpython-313.pyc
Normal file
Binary file not shown.
BIN
src/optimization/__pycache__/run_optimization.cpython-313.pyc
Normal file
BIN
src/optimization/__pycache__/run_optimization.cpython-313.pyc
Normal file
Binary file not shown.
1479
src/optimization/model_builder.py
Normal file
1479
src/optimization/model_builder.py
Normal file
File diff suppressed because it is too large
Load Diff
731
src/optimization/run_optimization.py
Normal file
731
src/optimization/run_optimization.py
Normal file
@ -0,0 +1,731 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
import pyomo.environ as pyo
|
||||
from pyomo.environ import value
|
||||
|
||||
SRC_ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(SRC_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(SRC_ROOT))
|
||||
|
||||
from optimization.model_builder import build_model, load_tables, solve_model
|
||||
|
||||
|
||||
|
||||
|
||||
def report_results(model: pyo.ConcreteModel, max_rows: int) -> None:
|
||||
print("Objective value:", value(model.obj))
|
||||
print("Non-zero production decisions (k):")
|
||||
printed = 0
|
||||
for i in model.I:
|
||||
for j in model.J:
|
||||
for w in model.W:
|
||||
for d in model.D:
|
||||
for s in model.S:
|
||||
qty = value(model.k[i, j, w, d, s])
|
||||
if qty > 1e-6:
|
||||
print(f" {i} -> {j} (W{w} D{d} S{s}): {qty:.0f}")
|
||||
printed += 1
|
||||
if printed >= max_rows:
|
||||
print(" ... output truncated ...")
|
||||
return
|
||||
|
||||
|
||||
def export_results(model: pyo.ConcreteModel, output_path: Path) -> None:
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
wd_to_date = getattr(model, "_wd_to_date", {})
|
||||
|
||||
def safe_value(var) -> float:
|
||||
val = pyo.value(var, exception=False)
|
||||
return float(val) if val is not None else 0.0
|
||||
|
||||
def autosize_worksheet(ws, df, index_cols=None, max_width=25):
|
||||
if index_cols is None:
|
||||
index_cols = list(df.index.names)
|
||||
idx_names = list(index_cols)
|
||||
col_widths = [max(10, max(len(str(n)) for n in idx_names if n is not None) + 2) if idx_names else 10]
|
||||
for col_idx, col in enumerate(df.columns):
|
||||
header = " / ".join([str(c) for c in col]) if isinstance(col, tuple) else str(col)
|
||||
max_len = max(len(header), 8)
|
||||
sample = df.iloc[:200, col_idx]
|
||||
max_len = max(max_len, sample.astype(str).str.len().max())
|
||||
col_widths.append(min(max_width, int(max_len) + 2))
|
||||
return col_widths
|
||||
|
||||
def adjust_widths_for_labels(df, widths, label_scale, index_scale=None):
|
||||
adjusted = widths[:]
|
||||
if index_scale is not None and adjusted:
|
||||
adjusted[0] = max(10, int(adjusted[0] * index_scale))
|
||||
if hasattr(df.columns, "get_level_values"):
|
||||
top_level = df.columns.get_level_values(0)
|
||||
for idx, label in enumerate(top_level, start=1):
|
||||
if label in label_scale:
|
||||
adjusted[idx] = max(6, int(adjusted[idx] * label_scale[label]))
|
||||
return adjusted
|
||||
lieferungen_schicht = []
|
||||
for j in model.J:
|
||||
for w in model.W:
|
||||
for d in model.D:
|
||||
for s in model.S:
|
||||
if j == "V":
|
||||
nachfrage = pyo.value(model.dV_N[w, d] + model.dV_W[w, d])
|
||||
else:
|
||||
nachfrage = pyo.value(model.d[j, w, d])
|
||||
use_bunker_out = hasattr(model, "bunker_out") and j in getattr(model, "J_BUNKER", [])
|
||||
delivery_sum = sum(safe_value(model.x[i, j, w, d, s]) for i in model.I)
|
||||
out_sum = (
|
||||
sum(safe_value(model.bunker_out[i, j, w, d, s]) for i in model.I)
|
||||
if use_bunker_out
|
||||
else delivery_sum
|
||||
)
|
||||
bunker_inflow = 0.0
|
||||
if use_bunker_out:
|
||||
x_sum = sum(safe_value(model.x[i, j, w, d, s]) for i in model.I)
|
||||
out_sum = sum(safe_value(model.bunker_out[i, j, w, d, s]) for i in model.I)
|
||||
bunker_inflow = round(x_sum - out_sum, 2)
|
||||
def flow_val(i_name: str) -> float:
|
||||
return safe_value(model.x[i_name, j, w, d, s])
|
||||
lieferungen_schicht.append(
|
||||
{
|
||||
"kraftwerk": j,
|
||||
"woche": w,
|
||||
"tag": d,
|
||||
"datum": wd_to_date.get((w, d)),
|
||||
"schicht": s,
|
||||
"nachfrage_tonnen": nachfrage,
|
||||
"lieferung_tonnen": delivery_sum,
|
||||
"lieferungsabweichung_tonnen": round(delivery_sum - nachfrage, 2),
|
||||
"bunkerzufluss_tonnen": bunker_inflow,
|
||||
"Nochten": flow_val("Nochten"),
|
||||
"Reichwalde": flow_val("Reichwalde"),
|
||||
"Welzow": flow_val("Welzow"),
|
||||
}
|
||||
)
|
||||
|
||||
order_k_pw = ["J", "SP", "B3", "B4"]
|
||||
order_k_v = ["V"]
|
||||
order_sources = ["Reichwalde", "Nochten", "Welzow"]
|
||||
order_s = ["F", "S", "N"]
|
||||
|
||||
df_raw = pd.DataFrame(lieferungen_schicht).copy()
|
||||
df_raw["datum"] = pd.to_datetime(df_raw["datum"])
|
||||
|
||||
v_demand_map = {
|
||||
(int(w), d): {
|
||||
"welzow": float(pyo.value(model.dV_W[w, d])),
|
||||
"nochten": float(pyo.value(model.dV_N[w, d])),
|
||||
}
|
||||
for w in model.W
|
||||
for d in model.D
|
||||
}
|
||||
|
||||
df_raw["nachfrage_welzow"] = df_raw.get("nachfrage_welzow", pd.Series(index=df_raw.index))
|
||||
df_raw["nachfrage_nochten"] = df_raw.get("nachfrage_nochten", pd.Series(index=df_raw.index))
|
||||
df_raw["nachfrage_welzow"] = df_raw.apply(
|
||||
lambda r: (
|
||||
r["nachfrage_welzow"]
|
||||
if pd.notna(r["nachfrage_welzow"])
|
||||
else v_demand_map.get((int(r["woche"]), r["tag"]), {}).get("welzow", 0)
|
||||
),
|
||||
axis=1,
|
||||
)
|
||||
df_raw["nachfrage_nochten"] = df_raw.apply(
|
||||
lambda r: (
|
||||
r["nachfrage_nochten"]
|
||||
if pd.notna(r["nachfrage_nochten"])
|
||||
else v_demand_map.get((int(r["woche"]), r["tag"]), {}).get("nochten", 0)
|
||||
),
|
||||
axis=1,
|
||||
)
|
||||
|
||||
df = df_raw.rename(columns={"lieferungen_tonnen": "lieferung_tonnen"}).copy()
|
||||
if "lieferung_tonnen" not in df.columns:
|
||||
df["lieferung_tonnen"] = df[order_sources].sum(axis=1)
|
||||
|
||||
present_sources = [c for c in order_sources if c in df.columns]
|
||||
|
||||
df_src = (
|
||||
df.pivot_table(
|
||||
index=["datum", "woche", "tag"],
|
||||
columns=["kraftwerk", "schicht"],
|
||||
values=present_sources,
|
||||
aggfunc="sum",
|
||||
)
|
||||
.fillna(0)
|
||||
)
|
||||
df_src.columns = df_src.columns.reorder_levels([1, 0, 2])
|
||||
df_src = df_src.reindex(
|
||||
columns=pd.MultiIndex.from_product([order_k_pw + order_k_v, present_sources, order_s]),
|
||||
fill_value=0,
|
||||
)
|
||||
|
||||
df_demand = (
|
||||
df.groupby(["datum", "woche", "tag", "kraftwerk"])["nachfrage_tonnen"]
|
||||
.first()
|
||||
.unstack("kraftwerk")
|
||||
.reindex(columns=order_k_pw + order_k_v, fill_value=0)
|
||||
.reindex(df_src.index, fill_value=0)
|
||||
)
|
||||
|
||||
df_v_demand_split = (
|
||||
df[df["kraftwerk"] == "V"]
|
||||
.groupby(["datum", "woche", "tag"])[["nachfrage_welzow", "nachfrage_nochten"]]
|
||||
.first()
|
||||
.reindex(df_src.index, fill_value=0)
|
||||
)
|
||||
|
||||
totals_plain = df_src.T.groupby(level=0).sum().T
|
||||
totals_plain = totals_plain.reindex(columns=order_k_pw + order_k_v, fill_value=0)
|
||||
|
||||
day_diff_rows = []
|
||||
for w in model.W:
|
||||
for d in model.D:
|
||||
date = wd_to_date.get((w, d))
|
||||
if date is None:
|
||||
continue
|
||||
row = {"datum": date, "woche": w, "tag": d}
|
||||
for j in order_k_pw + order_k_v:
|
||||
if j == "V":
|
||||
demand = pyo.value(model.dV_N[w, d] + model.dV_W[w, d])
|
||||
else:
|
||||
demand = pyo.value(model.d[j, w, d])
|
||||
delivered = pyo.value(model.y_delivery[j, w, d])
|
||||
row[j] = delivered - demand
|
||||
day_diff_rows.append(row)
|
||||
|
||||
day_diff_plain = (
|
||||
pd.DataFrame(day_diff_rows)
|
||||
.set_index(["datum", "woche", "tag"])
|
||||
.reindex(totals_plain.index, fill_value=0)
|
||||
.reindex(columns=order_k_pw + order_k_v, fill_value=0)
|
||||
)
|
||||
|
||||
totals = totals_plain.copy()
|
||||
totals.columns = pd.MultiIndex.from_tuples([(k, "Gesamt", "") for k in totals.columns])
|
||||
|
||||
demand_cols = df_demand.copy()
|
||||
demand_cols.columns = pd.MultiIndex.from_tuples([(k, "Nachfrage", "") for k in demand_cols.columns])
|
||||
|
||||
day_diff_cols = day_diff_plain.copy()
|
||||
day_diff_cols.columns = pd.MultiIndex.from_tuples(
|
||||
[(k, "Lieferungstagesabweichung", "") for k in day_diff_cols.columns]
|
||||
)
|
||||
|
||||
v_demand_cols = df_v_demand_split.copy()
|
||||
v_demand_cols.columns = pd.MultiIndex.from_tuples(
|
||||
[
|
||||
("V", "Nachfrage_Welzow", ""),
|
||||
("V", "Nachfrage_Nochtener", ""),
|
||||
]
|
||||
)
|
||||
|
||||
has_bunker = hasattr(model, "bunker")
|
||||
col_order = []
|
||||
for k in order_k_pw:
|
||||
for src in present_sources:
|
||||
for sch in order_s:
|
||||
col_order.append((k, src, sch))
|
||||
col_order.append((k, "Gesamt", ""))
|
||||
col_order.append((k, "Nachfrage", ""))
|
||||
col_order.append((k, "Lieferungstagesabweichung", ""))
|
||||
|
||||
col_order += [( "V", src, sch) for src in present_sources for sch in order_s]
|
||||
col_order += [
|
||||
("V", "Nachfrage_Welzow", ""),
|
||||
("V", "Nachfrage_Nochtener", ""),
|
||||
("V", "Gesamt", ""),
|
||||
("V", "Nachfrage", ""),
|
||||
("V", "Lieferungstagesabweichung", ""),
|
||||
]
|
||||
|
||||
df_out = pd.concat([df_src, v_demand_cols, totals, demand_cols, day_diff_cols], axis=1)
|
||||
|
||||
df_out = df_out.reindex(
|
||||
columns=col_order,
|
||||
fill_value=0,
|
||||
)
|
||||
|
||||
weekday_order = ["Mo", "Di", "Mi", "Do", "Fr", "Sa", "So"]
|
||||
idx = df_out.index
|
||||
df_out = df_out.copy()
|
||||
df_out.index = pd.MultiIndex.from_arrays(
|
||||
[
|
||||
pd.to_datetime(idx.get_level_values("datum")),
|
||||
idx.get_level_values("woche"),
|
||||
pd.Categorical(idx.get_level_values("tag"), categories=weekday_order, ordered=True),
|
||||
],
|
||||
names=["datum", "woche", "tag"],
|
||||
)
|
||||
df_out = df_out.sort_index(level=["datum", "woche", "tag"])
|
||||
|
||||
df = df_out.copy()
|
||||
df_out = df_out / 1000
|
||||
|
||||
bunker_sheet = None
|
||||
if has_bunker:
|
||||
bunker_rows = []
|
||||
for j in getattr(model, "J_BUNKER", []):
|
||||
for w in model.W:
|
||||
for d in model.D:
|
||||
date = wd_to_date.get((w, d))
|
||||
if date is None:
|
||||
continue
|
||||
bunker_total = sum(safe_value(model.bunker[i, j, w, d]) for i in model.I)
|
||||
bunker_rows.append(
|
||||
{
|
||||
"kraftwerk": j,
|
||||
"woche": w,
|
||||
"tag": d,
|
||||
"datum": date,
|
||||
"bunkerbestand_tonnen": bunker_total,
|
||||
}
|
||||
)
|
||||
|
||||
if bunker_rows:
|
||||
bunker_df = pd.DataFrame(bunker_rows)
|
||||
bunker_df["datum"] = pd.to_datetime(bunker_df["datum"])
|
||||
bunker_df = bunker_df.sort_values(["kraftwerk", "datum", "woche", "tag"])
|
||||
bunker_df["vortags_bunkerbestand_tonnen"] = bunker_df.groupby("kraftwerk")[
|
||||
"bunkerbestand_tonnen"
|
||||
].shift(1)
|
||||
bunker_df["vortags_bunkerbestand_tonnen"] = bunker_df["vortags_bunkerbestand_tonnen"].fillna(0)
|
||||
bunker_pivot = (
|
||||
bunker_df.pivot_table(
|
||||
index=["datum", "woche", "tag"],
|
||||
columns=["kraftwerk"],
|
||||
values=["bunkerbestand_tonnen"],
|
||||
aggfunc="first",
|
||||
)
|
||||
.fillna(0)
|
||||
)
|
||||
bunker_pivot = bunker_pivot.reindex(columns=order_k_pw + order_k_v, level=1, fill_value=0)
|
||||
bunker_pivot.columns = pd.MultiIndex.from_tuples(
|
||||
[(k, "Bunkerbestand", "") for k in bunker_pivot.columns.get_level_values(1)]
|
||||
)
|
||||
bunker_prev_pivot = (
|
||||
bunker_df.pivot_table(
|
||||
index=["datum", "woche", "tag"],
|
||||
columns=["kraftwerk"],
|
||||
values=["vortags_bunkerbestand_tonnen"],
|
||||
aggfunc="first",
|
||||
)
|
||||
.fillna(0)
|
||||
)
|
||||
bunker_prev_pivot = bunker_prev_pivot.reindex(columns=order_k_pw + order_k_v, level=1, fill_value=0)
|
||||
bunker_prev_pivot.columns = pd.MultiIndex.from_tuples(
|
||||
[(k, "Vortags_Bunkerbestand", "") for k in bunker_prev_pivot.columns.get_level_values(1)]
|
||||
)
|
||||
inflow_pivot = None
|
||||
if "bunkerzufluss_tonnen" in df_raw.columns:
|
||||
inflow_pivot = (
|
||||
df_raw.pivot_table(
|
||||
index=["datum", "woche", "tag"],
|
||||
columns=["kraftwerk"],
|
||||
values=["bunkerzufluss_tonnen"],
|
||||
aggfunc="sum",
|
||||
)
|
||||
.fillna(0)
|
||||
)
|
||||
inflow_pivot.columns = pd.MultiIndex.from_tuples(
|
||||
[(k, "Bunkerzufluss", "") for k in inflow_pivot.columns.get_level_values(1)]
|
||||
)
|
||||
|
||||
frames = [df]
|
||||
frames.append(bunker_prev_pivot.reindex(df.index, fill_value=0))
|
||||
if inflow_pivot is not None:
|
||||
frames.append(inflow_pivot.reindex(df.index, fill_value=0))
|
||||
frames.append(bunker_pivot.reindex(df.index, fill_value=0))
|
||||
bunker_sheet = pd.concat(frames, axis=1)
|
||||
|
||||
col_order_bunker = []
|
||||
for k in order_k_pw:
|
||||
for src in present_sources:
|
||||
for sch in order_s:
|
||||
col_order_bunker.append((k, src, sch))
|
||||
col_order_bunker.append((k, "Gesamt", ""))
|
||||
col_order_bunker.append((k, "Nachfrage", ""))
|
||||
col_order_bunker.append((k, "Lieferungstagesabweichung", ""))
|
||||
if (k, "Vortags_Bunkerbestand", "") in bunker_sheet.columns:
|
||||
col_order_bunker.append((k, "Vortags_Bunkerbestand", ""))
|
||||
if (k, "Bunkerzufluss", "") in bunker_sheet.columns:
|
||||
col_order_bunker.append((k, "Bunkerzufluss", ""))
|
||||
if (k, "Bunkerbestand", "") in bunker_sheet.columns:
|
||||
col_order_bunker.append((k, "Bunkerbestand", ""))
|
||||
|
||||
col_order_bunker += [("V", src, sch) for src in present_sources for sch in order_s]
|
||||
col_order_bunker += [
|
||||
("V", "Nachfrage_Welzow", ""),
|
||||
("V", "Nachfrage_Nochtener", ""),
|
||||
("V", "Gesamt", ""),
|
||||
("V", "Nachfrage", ""),
|
||||
("V", "Lieferungstagesabweichung", ""),
|
||||
]
|
||||
if ("V", "Vortags_Bunkerbestand", "") in bunker_sheet.columns:
|
||||
col_order_bunker.append(("V", "Vortags_Bunkerbestand", ""))
|
||||
if ("V", "Bunkerzufluss", "") in bunker_sheet.columns:
|
||||
col_order_bunker.append(("V", "Bunkerzufluss", ""))
|
||||
if ("V", "Bunkerbestand", "") in bunker_sheet.columns:
|
||||
col_order_bunker.append(("V", "Bunkerbestand", ""))
|
||||
|
||||
bunker_sheet = bunker_sheet.reindex(columns=col_order_bunker, fill_value=0)
|
||||
|
||||
try:
|
||||
import xlsxwriter # type: ignore
|
||||
|
||||
excel_engine = "xlsxwriter"
|
||||
except Exception:
|
||||
excel_engine = "openpyxl"
|
||||
|
||||
with pd.ExcelWriter(output_path, engine=excel_engine) as writer:
|
||||
df.to_excel(writer, sheet_name="Sheet1")
|
||||
if excel_engine == "xlsxwriter":
|
||||
ws1 = writer.sheets["Sheet1"]
|
||||
widths = autosize_worksheet(ws1, df)
|
||||
widths = adjust_widths_for_labels(
|
||||
df,
|
||||
widths,
|
||||
{
|
||||
"Reichwalde": 0.5,
|
||||
"Nochten": 0.5,
|
||||
"Welzow": 0.5,
|
||||
"Gesamt": 0.5,
|
||||
"Nachfrage": 0.5,
|
||||
"Bunkerbestand": 0.5,
|
||||
"Bunkerzufluss": 0.5,
|
||||
"Vortags_Bunkerbestand": 0.5,
|
||||
},
|
||||
index_scale=1.2,
|
||||
)
|
||||
for i, w in enumerate(widths):
|
||||
ws1.set_column(i, i, w)
|
||||
else:
|
||||
ws1 = writer.sheets["Sheet1"]
|
||||
widths = autosize_worksheet(ws1, df)
|
||||
widths = adjust_widths_for_labels(
|
||||
df,
|
||||
widths,
|
||||
{
|
||||
"Reichwalde": 0.5,
|
||||
"Nochten": 0.5,
|
||||
"Welzow": 0.5,
|
||||
"Gesamt": 0.5,
|
||||
"Nachfrage": 0.5,
|
||||
"Bunkerbestand": 0.5,
|
||||
"Bunkerzufluss": 0.5,
|
||||
"Vortags_Bunkerbestand": 0.5,
|
||||
},
|
||||
index_scale=1.2,
|
||||
)
|
||||
for i, w in enumerate(widths):
|
||||
ws1.column_dimensions[chr(65 + i)].width = w
|
||||
|
||||
if bunker_sheet is not None:
|
||||
bunker_sheet.to_excel(writer, sheet_name="mit_Bunkerbestand")
|
||||
if excel_engine == "xlsxwriter":
|
||||
workbook = writer.book
|
||||
worksheet = writer.sheets["mit_Bunkerbestand"]
|
||||
widths = autosize_worksheet(worksheet, bunker_sheet)
|
||||
widths = adjust_widths_for_labels(
|
||||
bunker_sheet,
|
||||
widths,
|
||||
{
|
||||
"Reichwalde": 0.5,
|
||||
"Nochten": 0.5,
|
||||
"Welzow": 0.5,
|
||||
"Gesamt": 0.5,
|
||||
"Nachfrage": 0.5,
|
||||
"Bunkerbestand": 0.5,
|
||||
"Bunkerzufluss": 0.5,
|
||||
"Vortags_Bunkerbestand": 0.5,
|
||||
},
|
||||
index_scale=1.2,
|
||||
)
|
||||
for i, w in enumerate(widths):
|
||||
worksheet.set_column(i, i, w)
|
||||
|
||||
header_fmt = workbook.add_format({"bold": True, "bg_color": "#E6E6E6", "border": 1})
|
||||
block_colors = ["#DCEFFE", "#FDEBD0", "#E8F8F5", "#FADBD8", "#E8DAEF", "#FEF9E7"]
|
||||
block_formats = [
|
||||
workbook.add_format({"bold": True, "bg_color": color, "border": 1}) for color in block_colors
|
||||
]
|
||||
|
||||
index_cols = len(bunker_sheet.index.names)
|
||||
n_header_rows = bunker_sheet.columns.nlevels
|
||||
|
||||
# Base header formatting for index columns.
|
||||
for r in range(n_header_rows):
|
||||
for c in range(index_cols):
|
||||
worksheet.write(r, c, "", header_fmt)
|
||||
|
||||
# Apply block colors per Kraftwerk on header rows.
|
||||
top_level = bunker_sheet.columns.get_level_values(0)
|
||||
blocks = {}
|
||||
for idx, label in enumerate(top_level):
|
||||
blocks.setdefault(label, []).append(idx)
|
||||
|
||||
for b_idx, (label, cols) in enumerate(blocks.items()):
|
||||
fmt = block_formats[b_idx % len(block_formats)]
|
||||
for r in range(n_header_rows):
|
||||
for c in cols:
|
||||
value = bunker_sheet.columns.get_level_values(r)[c]
|
||||
worksheet.write(r, index_cols + c, value, fmt)
|
||||
else:
|
||||
from openpyxl.styles import Border, Font, PatternFill, Side
|
||||
|
||||
worksheet = writer.sheets["mit_Bunkerbestand"]
|
||||
widths = autosize_worksheet(worksheet, bunker_sheet)
|
||||
widths = adjust_widths_for_labels(
|
||||
bunker_sheet,
|
||||
widths,
|
||||
{
|
||||
"Reichwalde": 0.5,
|
||||
"Nochten": 0.5,
|
||||
"Welzow": 0.5,
|
||||
"Gesamt": 0.5,
|
||||
"Nachfrage": 0.5,
|
||||
"Bunkerbestand": 0.5,
|
||||
"Bunkerzufluss": 0.5,
|
||||
"Vortags_Bunkerbestand": 0.5,
|
||||
},
|
||||
index_scale=1.2,
|
||||
)
|
||||
for i, w in enumerate(widths):
|
||||
worksheet.column_dimensions[chr(65 + i)].width = w
|
||||
header_fill = PatternFill("solid", fgColor="E6E6E6")
|
||||
block_colors = ["DCEFFE", "FDEBD0", "E8F8F5", "FADBD8", "E8DAEF", "FEF9E7"]
|
||||
block_fills = [PatternFill("solid", fgColor=c) for c in block_colors]
|
||||
bold_font = Font(bold=True)
|
||||
border = Border(
|
||||
left=Side(style="thin"),
|
||||
right=Side(style="thin"),
|
||||
top=Side(style="thin"),
|
||||
bottom=Side(style="thin"),
|
||||
)
|
||||
|
||||
index_cols = len(bunker_sheet.index.names)
|
||||
n_header_rows = bunker_sheet.columns.nlevels
|
||||
top_level = bunker_sheet.columns.get_level_values(0)
|
||||
blocks = {}
|
||||
for idx, label in enumerate(top_level):
|
||||
blocks.setdefault(label, []).append(idx)
|
||||
|
||||
for r in range(n_header_rows):
|
||||
for c in range(index_cols):
|
||||
cell = worksheet.cell(row=r + 1, column=c + 1)
|
||||
cell.fill = header_fill
|
||||
cell.font = bold_font
|
||||
cell.border = border
|
||||
|
||||
for b_idx, (label, cols) in enumerate(blocks.items()):
|
||||
fill = block_fills[b_idx % len(block_fills)]
|
||||
for r in range(n_header_rows):
|
||||
for c in cols:
|
||||
cell = worksheet.cell(row=r + 1, column=index_cols + c + 1)
|
||||
cell.fill = fill
|
||||
cell.font = bold_font
|
||||
cell.border = border
|
||||
|
||||
# Kohlesorten-Mischverhaeltnis (gesamter Zeitraum)
|
||||
j_name_map = {
|
||||
"J": "Jänschwalde",
|
||||
"SP": "Schwarze Pumpe",
|
||||
"B3": "Boxberg Werk 3",
|
||||
"B4": "Boxberg Werk 4",
|
||||
}
|
||||
i_name_map = {
|
||||
"Reichwalde": "Reichwalder-Kohle",
|
||||
"Nochten": "Nochtener-Kohle",
|
||||
"Welzow": "Welzower-Kohle",
|
||||
}
|
||||
|
||||
# Empirical mix over full horizon based on delivered quantities (x).
|
||||
total_delivered_by_j = {}
|
||||
for j_code in j_name_map:
|
||||
if j_code not in model.J:
|
||||
continue
|
||||
total_delivered_by_j[j_code] = sum(
|
||||
safe_value(model.x[i, j_code, w, d, s])
|
||||
for i in model.I
|
||||
for w in model.W
|
||||
for d in model.D
|
||||
for s in model.S
|
||||
if (i, j_code, w, d, s) in model.x
|
||||
)
|
||||
|
||||
# Empirical bunker mix over full horizon based on bunker stock.
|
||||
total_bunker_by_j = {}
|
||||
if hasattr(model, "bunker"):
|
||||
for j_code in j_name_map:
|
||||
if j_code not in getattr(model, "J_BUNKER", []):
|
||||
continue
|
||||
total_bunker_by_j[j_code] = sum(
|
||||
safe_value(model.bunker[i, j_code, w, d])
|
||||
for i in model.I
|
||||
for w in model.W
|
||||
for d in model.D
|
||||
if (i, j_code, w, d) in model.bunker
|
||||
)
|
||||
|
||||
mix_rows = []
|
||||
for j_code, j_name in j_name_map.items():
|
||||
if j_code not in model.J:
|
||||
continue
|
||||
for i_code, i_name in i_name_map.items():
|
||||
if i_code not in model.I:
|
||||
continue
|
||||
denom = total_delivered_by_j.get(j_code, 0.0)
|
||||
num = sum(
|
||||
safe_value(model.x[i_code, j_code, w, d, s])
|
||||
for w in model.W
|
||||
for d in model.D
|
||||
for s in model.S
|
||||
if (i_code, j_code, w, d, s) in model.x
|
||||
)
|
||||
empirisch = round(100 * num / denom, 2) if denom > 0 else 0.0
|
||||
bunker_empirisch = 0.0
|
||||
if hasattr(model, "bunker") and j_code in total_bunker_by_j:
|
||||
denom_b = total_bunker_by_j.get(j_code, 0.0)
|
||||
num_b = sum(
|
||||
safe_value(model.bunker[i_code, j_code, w, d])
|
||||
for w in model.W
|
||||
for d in model.D
|
||||
if (i_code, j_code, w, d) in model.bunker
|
||||
)
|
||||
bunker_empirisch = round(100 * num_b / denom_b, 2) if denom_b > 0 else 0.0
|
||||
mix_rows.append(
|
||||
{
|
||||
"kraftwerk": j_name,
|
||||
"kohlesorte": i_name,
|
||||
"ziel_low": round(100 * pyo.value(model.alpha_target_low[i_code, j_code]), 2),
|
||||
"ziel_high": round(100 * pyo.value(model.alpha_target_high[i_code, j_code]), 2),
|
||||
"maximal": round(100 * pyo.value(model.alpha_max[i_code, j_code]), 2),
|
||||
"minimal": round(100 * pyo.value(model.alpha_min[i_code, j_code]), 2),
|
||||
"empirisch": empirisch,
|
||||
"bunker_empirisch": bunker_empirisch,
|
||||
}
|
||||
)
|
||||
|
||||
mix_df = pd.DataFrame(mix_rows)
|
||||
mix_df.to_excel(writer, sheet_name="Kohlemischverhältnis", index=False)
|
||||
if excel_engine == "xlsxwriter":
|
||||
ws_mix = writer.sheets["Kohlemischverhältnis"]
|
||||
workbook = writer.book
|
||||
mix_block_colors = ["#E8F8F5", "#FDEBD0", "#DCEFFE", "#FADBD8"]
|
||||
mix_formats = [workbook.add_format({"bg_color": c}) for c in mix_block_colors]
|
||||
red_fill = workbook.add_format({"bg_color": "#F5B7B1"})
|
||||
green_fill = workbook.add_format({"bg_color": "#D4EFDF"})
|
||||
if not mix_df.empty:
|
||||
emp_col = mix_df.columns.get_loc("empirisch")
|
||||
bunker_emp_col = mix_df.columns.get_loc("bunker_empirisch")
|
||||
current = None
|
||||
block_idx = -1
|
||||
for r, row in mix_df.iterrows():
|
||||
if row["kraftwerk"] != current:
|
||||
block_idx += 1
|
||||
current = row["kraftwerk"]
|
||||
fmt = mix_formats[block_idx % len(mix_formats)]
|
||||
for c in range(0, min(6, mix_df.shape[1])):
|
||||
ws_mix.write(r + 1, c, row.iloc[c], fmt)
|
||||
emp_fmt = (
|
||||
red_fill
|
||||
if row["empirisch"] < row["ziel_low"] or row["empirisch"] > row["ziel_high"]
|
||||
else green_fill
|
||||
)
|
||||
ws_mix.write(r + 1, emp_col, row["empirisch"], emp_fmt)
|
||||
ws_mix.write(r + 1, bunker_emp_col, row["bunker_empirisch"])
|
||||
widths = autosize_worksheet(ws_mix, mix_df, index_cols=[])
|
||||
for i, w in enumerate(widths[1:]):
|
||||
ws_mix.set_column(i, i, w)
|
||||
else:
|
||||
ws_mix = writer.sheets["Kohlemischverhältnis"]
|
||||
if not mix_df.empty:
|
||||
from openpyxl.styles import PatternFill
|
||||
|
||||
mix_block_colors = ["E8F8F5", "FDEBD0", "DCEFFE", "FADBD8"]
|
||||
mix_fills = [PatternFill("solid", fgColor=c) for c in mix_block_colors]
|
||||
red_fill = PatternFill("solid", fgColor="F5B7B1")
|
||||
green_fill = PatternFill("solid", fgColor="D4EFDF")
|
||||
current = None
|
||||
block_idx = -1
|
||||
for row_idx, kraftwerk in enumerate(mix_df["kraftwerk"], start=2):
|
||||
if kraftwerk != current:
|
||||
block_idx += 1
|
||||
current = kraftwerk
|
||||
fill = mix_fills[block_idx % len(mix_fills)]
|
||||
for col_idx in range(1, min(7, mix_df.shape[1]) + 1):
|
||||
ws_mix.cell(row=row_idx, column=col_idx).fill = fill
|
||||
emp_col = mix_df.columns.get_loc("empirisch") + 1
|
||||
bunker_emp_col = mix_df.columns.get_loc("bunker_empirisch") + 1
|
||||
for r, row in mix_df.iterrows():
|
||||
fill = red_fill if row["empirisch"] < row["ziel_low"] or row["empirisch"] > row["ziel_high"] else green_fill
|
||||
ws_mix.cell(row=r + 2, column=emp_col).fill = fill
|
||||
# No group fill for bunker_empirisch (column H).
|
||||
widths = autosize_worksheet(ws_mix, mix_df, index_cols=[])
|
||||
for i, w in enumerate(widths[1:]):
|
||||
ws_mix.column_dimensions[chr(65 + i)].width = w
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Run the Pyomo optimization model.")
|
||||
parser.add_argument(
|
||||
"--data-dir",
|
||||
type=Path,
|
||||
default=Path("data/processed"),
|
||||
help="Directory containing input parquet files.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--solver",
|
||||
default="gurobi",
|
||||
help="Solver name passed to Pyomo (default: gurobi).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-rows",
|
||||
type=int,
|
||||
default=50,
|
||||
help="Maximum number of non-zero decision variable rows to print.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--time-limit",
|
||||
type=int,
|
||||
default=600,
|
||||
help="Time limit (seconds) for the solver (default: 600).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-xlsx",
|
||||
type=Path,
|
||||
default=Path("data/out/output.xlsx"),
|
||||
help="Excel output file for deliveries by plant/week/day/shift.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mip-gap",
|
||||
type=float,
|
||||
default=0.03,
|
||||
help="MIP gap tolerance (default: 0.03).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--step-size-tonnes",
|
||||
type=int,
|
||||
default=1000,
|
||||
choices=[960, 1000],
|
||||
help="Discrete train step size in tonnes (default: 1000).",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
tables = load_tables(args.data_dir)
|
||||
model = build_model(tables, step_size_tonnes=args.step_size_tonnes)
|
||||
|
||||
solve_model(model, args.solver, args.time_limit, args.mip_gap)
|
||||
# report_results(model, args.max_rows)
|
||||
export_results(model, args.output_xlsx)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
||||
# uv run python src/optimization/run_optimization.py --solver gurobi --mip-gap 0.05
|
||||
# uv run python src/optimization/run_optimization.py --solver highs
|
||||
505
src/preprocessing/exploration_preprocess.py
Normal file
505
src/preprocessing/exploration_preprocess.py
Normal file
@ -0,0 +1,505 @@
|
||||
# Generated from exploration.ipynb
|
||||
# %%
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
# %%
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[2]
|
||||
DEFAULT_INPUT = PROJECT_ROOT / "data/input/PoC1_Rohkohleverteilung_Input_Parameter.xlsx"
|
||||
DEFAULT_OUTPUT = PROJECT_ROOT / "data/processed"
|
||||
INPUT_XLSX = Path(os.environ.get("POC1_INPUT_XLSX", str(DEFAULT_INPUT)))
|
||||
OUTPUT_DIR = Path(os.environ.get("POC1_OUTPUT_DIR", str(DEFAULT_OUTPUT)))
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
path = INPUT_XLSX
|
||||
# %% [markdown]
|
||||
# # Mappe Parameter
|
||||
# %% [markdown]
|
||||
# ## Erlaubte Abweichung
|
||||
# ### Kraftwerke
|
||||
# %%
|
||||
raw = pd.read_excel(path, sheet_name="Parameter", header=None)
|
||||
|
||||
t1 = raw.iloc[0:18, 0:8].copy()
|
||||
# 1. komplett leere Zeilen raus
|
||||
df = t1.dropna(how="all").reset_index(drop=True)
|
||||
|
||||
# 2. Einheitenzeile und Labelzeile identifizieren
|
||||
unit_idx = df[df.apply(lambda r: r.astype(str).str.contains(r"\[kt\]").any(), axis=1)].index[0]
|
||||
label_idx = unit_idx + 1
|
||||
|
||||
unit_row = df.loc[unit_idx]
|
||||
label_row = df.loc[label_idx]
|
||||
|
||||
# 3. Spaltennamen bauen
|
||||
headers = []
|
||||
for u, l in zip(unit_row, label_row):
|
||||
if pd.isna(l):
|
||||
headers.append(str(u))
|
||||
elif pd.isna(u):
|
||||
headers.append(str(l))
|
||||
else:
|
||||
headers.append(f"{l} ({u})")
|
||||
|
||||
# 4. Daten unterhalb der Headerzeilen
|
||||
data = df.loc[label_idx+1:].reset_index(drop=True)
|
||||
data.columns = headers
|
||||
|
||||
# Neue Kopie mit eindeutigen Spaltennamen
|
||||
data2 = data.copy()
|
||||
data2.columns = [
|
||||
"col0",
|
||||
"titel",
|
||||
"col2",
|
||||
"zeitraum",
|
||||
"minus_kt",
|
||||
"plus_kt",
|
||||
"minus_pct",
|
||||
"plus_pct",
|
||||
]
|
||||
|
||||
# Titelzeilen entfernen
|
||||
mask_title = data2["titel"].isin([
|
||||
"Erlaubte Abweichungen der Bedarfserfüllung",
|
||||
"Kraftwerk",
|
||||
"Kraftwerke",
|
||||
"Veredlung ISP"
|
||||
])
|
||||
data3 = data2[~mask_title].reset_index(drop=True)
|
||||
|
||||
# kraftwerk steht in col2, nach unten füllen
|
||||
data3["kraftwerk"] = data3["col2"].ffill()
|
||||
|
||||
# Zeilen ohne Zeitraum entfernen
|
||||
data3 = data3.dropna(subset=["zeitraum"]).reset_index(drop=True)
|
||||
|
||||
# numerische Spalten konvertieren
|
||||
for col in ["minus_kt", "plus_kt", "minus_pct", "plus_pct"]:
|
||||
data3[col] = pd.to_numeric(data3[col], errors="coerce")
|
||||
|
||||
# Endauswahl
|
||||
result = data3[["kraftwerk", "zeitraum", "minus_kt", "plus_kt", "minus_pct", "plus_pct"]]
|
||||
data3 = data2.copy()
|
||||
|
||||
data3["kraftwerk"] = data3["col2"].ffill()
|
||||
data3 = data3.dropna(subset=["zeitraum"]).reset_index(drop=True)
|
||||
|
||||
for col in ["minus_kt", "plus_kt", "minus_pct", "plus_pct"]:
|
||||
data3[col] = pd.to_numeric(data3[col], errors="coerce")
|
||||
|
||||
bounds_power_plants = data3[["kraftwerk", "zeitraum", "minus_kt", "plus_kt", "minus_pct", "plus_pct"]]
|
||||
bounds_power_plants[["minus"]] = bounds_power_plants[["minus_kt"]]*1000
|
||||
bounds_power_plants[["plus"]] = bounds_power_plants[["plus_kt"]]*1000
|
||||
bounds_power_plants.drop(columns=["minus_kt", "plus_kt"],inplace=True)
|
||||
bounds_power_plants.to_parquet(OUTPUT_DIR / "bounds_power_plants.parquet")
|
||||
print("Saved bounds_power_plants.parquet")
|
||||
bounds_power_plants
|
||||
# %% [markdown]
|
||||
# ### Veredlung
|
||||
# %%
|
||||
raw = pd.read_excel(path, sheet_name="Parameter", header=None)
|
||||
|
||||
t1_upper = raw.iloc[0:4, 0:8].copy()
|
||||
t1_upper
|
||||
t2 = raw.iloc[18:28, 0:8].copy()
|
||||
|
||||
t2 = pd.concat([t1_upper,t2], axis=0)
|
||||
# 1. komplett leere Zeilen raus
|
||||
df = t2.dropna(how="all").reset_index(drop=True)
|
||||
|
||||
# 2. Einheitenzeile und Labelzeile identifizieren
|
||||
unit_idx = df[df.apply(lambda r: r.astype(str).str.contains(r"\[kt\]").any(), axis=1)].index[0]
|
||||
label_idx = unit_idx + 1
|
||||
|
||||
unit_row = df.loc[unit_idx]
|
||||
label_row = df.loc[label_idx]
|
||||
|
||||
# 3. Spaltennamen bauen
|
||||
headers = []
|
||||
for u, l in zip(unit_row, label_row):
|
||||
if pd.isna(l):
|
||||
headers.append(str(u))
|
||||
elif pd.isna(u):
|
||||
headers.append(str(l))
|
||||
else:
|
||||
headers.append(f"{l} ({u})")
|
||||
|
||||
# 4. Daten unterhalb der Headerzeilen
|
||||
data = df.loc[label_idx+1:].reset_index(drop=True)
|
||||
data.columns = headers
|
||||
|
||||
# Neue Kopie mit eindeutigen Spaltennamen
|
||||
data2 = data.copy()
|
||||
data2.columns = [
|
||||
"col0",
|
||||
"titel",
|
||||
"col2",
|
||||
"zeitraum",
|
||||
"minus_kt",
|
||||
"plus_kt",
|
||||
"minus_pct",
|
||||
"plus_pct",
|
||||
]
|
||||
|
||||
# Titelzeilen entfernen
|
||||
mask_title = data2["titel"].isin([
|
||||
"Erlaubte Abweichungen der Bedarfserfüllung",
|
||||
"Kraftwerk",
|
||||
"Kraftwerke",
|
||||
"Veredlung ISP"
|
||||
])
|
||||
# kraftwerk steht in col2, nach unten füllen
|
||||
data3["kraftwerk"] = data3["col2"].ffill()
|
||||
|
||||
# Zeilen ohne Zeitraum entfernen
|
||||
data3 = data3.dropna(subset=["zeitraum"]).reset_index(drop=True)
|
||||
|
||||
# numerische Spalten konvertieren
|
||||
for col in ["minus_kt", "plus_kt", "minus_pct", "plus_pct"]:
|
||||
data3[col] = pd.to_numeric(data3[col], errors="coerce")
|
||||
|
||||
# Endauswahl
|
||||
result = data3[["kraftwerk", "zeitraum", "minus_kt", "plus_kt", "minus_pct", "plus_pct"]]
|
||||
data3 = data2.copy()
|
||||
|
||||
data3["kraftwerk"] = data3["col2"].ffill()
|
||||
data3 = data3.dropna(subset=["zeitraum"]).reset_index(drop=True)
|
||||
|
||||
for col in ["minus_kt", "plus_kt", "minus_pct", "plus_pct"]:
|
||||
data3[col] = pd.to_numeric(data3[col], errors="coerce")
|
||||
|
||||
veredelung_bounds = data3[["kraftwerk", "zeitraum", "minus_kt", "plus_kt", "minus_pct", "plus_pct"]]
|
||||
# veredelung_bounds.rename({"kraftwerk":"kohleart"}, inplace=True)
|
||||
veredelung_bounds = veredelung_bounds.rename(columns={"kraftwerk":"kohlesorte"})
|
||||
veredelung_bounds[["minus"]] = veredelung_bounds[["minus_kt"]]*1000
|
||||
veredelung_bounds[["plus"]] = veredelung_bounds[["plus_kt"]]*1000
|
||||
veredelung_bounds.drop(columns=["minus_kt", "plus_kt"],inplace=True)
|
||||
veredelung_bounds.to_parquet(OUTPUT_DIR / "veredelung_bounds.parquet")
|
||||
print("Saved veredelung_bounds.parquet")
|
||||
veredelung_bounds
|
||||
# %% [markdown]
|
||||
# ## Kohlesorten-Mischverhältnis
|
||||
# %%
|
||||
|
||||
|
||||
raw = pd.read_excel(path, sheet_name="Parameter", header=None)
|
||||
|
||||
# J3:P16 -> Zeilen 2:16, Spalten 9:16 (0-basiert, rechte Grenze exklusiv)
|
||||
block = raw.iloc[2:16, 9:16].copy()
|
||||
|
||||
# Leere Zeilen entfernen
|
||||
df = block.dropna(how="all").reset_index(drop=True)
|
||||
|
||||
# Header finden: Einheitenzeile enthält "[%]"
|
||||
unit_idx = df[df[12].astype(str).str.contains(r"\[%\]", regex=True)].index[0]
|
||||
label_idx = unit_idx + 1
|
||||
|
||||
unit_row = df.loc[unit_idx]
|
||||
label_row = df.loc[label_idx]
|
||||
|
||||
# Spaltenüberschriften konstruieren
|
||||
headers = []
|
||||
for u, l in zip(unit_row, label_row):
|
||||
if pd.isna(l):
|
||||
headers.append(str(u))
|
||||
elif pd.isna(u):
|
||||
headers.append(str(l))
|
||||
else:
|
||||
headers.append(f"{l} ({u})")
|
||||
|
||||
# Daten unterhalb der Headerzeilen
|
||||
data = df.loc[label_idx+1:].reset_index(drop=True)
|
||||
data.columns = headers
|
||||
|
||||
# Spalten sinnvoll benennen
|
||||
data2 = data.copy()
|
||||
data2.columns = [
|
||||
"titel",
|
||||
"kraftwerk",
|
||||
"kohlesorte",
|
||||
"ziel_low",
|
||||
"ziel_high",
|
||||
"maximal",
|
||||
"minimal",
|
||||
]
|
||||
|
||||
# Kraftwerk nach unten füllen
|
||||
data2["kraftwerk"] = data2["kraftwerk"].ffill()
|
||||
|
||||
# *** WICHTIG: nach kohlesorte filtern, nicht nach titel ***
|
||||
data3 = data2[data2["kohlesorte"].notna()].reset_index(drop=True)
|
||||
|
||||
# Finaler DataFrame
|
||||
kohle_mix = data3[[
|
||||
"kraftwerk",
|
||||
"kohlesorte",
|
||||
"ziel_low",
|
||||
"ziel_high",
|
||||
"maximal",
|
||||
"minimal",
|
||||
]]
|
||||
|
||||
# numerische Spalten in konsistente floats umwandeln
|
||||
num_cols = ["ziel_low", "ziel_high", "maximal", "minimal"]
|
||||
kohle_mix[num_cols] = kohle_mix[num_cols].apply(pd.to_numeric, errors="coerce")
|
||||
|
||||
kohle_mix.to_parquet(OUTPUT_DIR / "kohle_mix.parquet")
|
||||
print("Saved kohle_mix.parquet")
|
||||
kohle_mix
|
||||
|
||||
# %% [markdown]
|
||||
# ## Förderkapazitäten
|
||||
# %%
|
||||
|
||||
# J19:M23 -> Zeilen 18:23, Spalten 9:13 (0-basiert)
|
||||
block = pd.read_excel(path, sheet_name="Parameter", header=None).iloc[18:23, 9:13].copy()
|
||||
|
||||
# leere Zeilen entfernen
|
||||
df = block.dropna(how="all").reset_index(drop=True)
|
||||
|
||||
# Titelzeile "Förderkapazität" rausfiltern
|
||||
df = df[df[9] != "Förderkapazität"].reset_index(drop=True)
|
||||
|
||||
# Spalten benennen
|
||||
df.columns = ["kategorie", "tagebau", "zeitraum", "maximal"]
|
||||
|
||||
# "Tagebau" nach unten auffüllen (für Nochten, Gesamt, Welzow-Süd)
|
||||
df["kategorie"] = df["kategorie"].ffill()
|
||||
|
||||
# Maximalwert in Zahl konvertieren
|
||||
df["maximal"] = pd.to_numeric(df["maximal"], errors="coerce")
|
||||
|
||||
# falls du nur die wesentlichen Infos brauchst:
|
||||
foerderkap = df[["tagebau", "zeitraum", "maximal"]]
|
||||
|
||||
foerderkap["maximal"] = foerderkap["maximal"]*1000
|
||||
foerderkap.to_parquet(OUTPUT_DIR / "foerderkapaz.parquet")
|
||||
print("Saved foerderkap.parquet")
|
||||
foerderkap
|
||||
# %% [markdown]
|
||||
# ## Verladungskapazitäten
|
||||
# %%
|
||||
raw = pd.read_excel(path, sheet_name="Parameter", header=None)
|
||||
|
||||
# J26:M30 -> rows 25:30, cols 9:13
|
||||
block = raw.iloc[25:30, 9:13].copy()
|
||||
|
||||
# vollständig leere Zeilen entfernen
|
||||
df = block.dropna(how="all").reset_index(drop=True)
|
||||
|
||||
# Titelzeile entfernen
|
||||
df = df[df[9] != "Verladungskapazität"].reset_index(drop=True)
|
||||
|
||||
# Spalten sinnvoll benennen
|
||||
df.columns = ["kategorie", "verladung", "zeitraum", "maximal"]
|
||||
|
||||
# Verladung nach unten auffüllen
|
||||
df["verladung"] = df["verladung"].ffill()
|
||||
|
||||
# Maximalwert numerisch
|
||||
df["maximal"] = pd.to_numeric(df["maximal"], errors="coerce")
|
||||
|
||||
# finale Auswahl
|
||||
verladung = df[["verladung", "zeitraum", "maximal"]]
|
||||
verladung["maximal"] = verladung.maximal*1000
|
||||
verladung.to_parquet(OUTPUT_DIR / "verladungskap.parquet")
|
||||
print("Saved verladungskap.parquet")
|
||||
verladung
|
||||
# %% [markdown]
|
||||
# ## Zugdurchlass
|
||||
# %%
|
||||
raw = pd.read_excel(path, sheet_name="Parameter", header=None)
|
||||
block = raw.iloc[3:21, 17:24].copy()
|
||||
block.replace("unlimitiert", np.inf, inplace=True)
|
||||
# komplett leere Zeilen entfernen
|
||||
df = block.dropna(how="all").reset_index(drop=True)
|
||||
|
||||
# Kopfzeilen / Überschrift entfernen
|
||||
mask_header = df.apply(
|
||||
lambda r: r.astype(str).str.contains(
|
||||
"Zugdurchlasskapazität|Maximal|Vielfaches von", regex=True
|
||||
).any(),
|
||||
axis=1,
|
||||
)
|
||||
df = df[~mask_header].reset_index(drop=True)
|
||||
|
||||
# Spalten benennen
|
||||
df.columns = ["von", "start", "zum", "ziel", "zeitraum", "maximal", "vielfaches_von"]
|
||||
|
||||
# numerische Spalten nach float konvertieren
|
||||
for col in ["maximal", "vielfaches_von"]:
|
||||
df[col] = pd.to_numeric(df[col], errors="coerce")
|
||||
|
||||
# alle numerischen Werte * 1000
|
||||
df["maximal"] = df["maximal"] * 1000
|
||||
|
||||
# fertiger DataFrame
|
||||
zugdurchlass = df.copy()
|
||||
zugdurchlass.to_parquet(OUTPUT_DIR / "zugdurchlass.parquet")
|
||||
zugdurchlass
|
||||
# %% [markdown]
|
||||
# # Mappe Rohkohlebedarf
|
||||
# %%
|
||||
raw = pd.read_excel(path, sheet_name="Rohkohlebedarf", header=None)
|
||||
|
||||
df = raw.iloc[2:36, 1:15].copy().reset_index(drop=True)
|
||||
|
||||
jahr = int(df.loc[0, 2])
|
||||
monat = str(df.loc[1, 2])
|
||||
|
||||
kw_header = df.loc[1, 4:10].tolist()
|
||||
ver_header = df.loc[1, 12:14].tolist()
|
||||
|
||||
kw_names = kw_header[:-1] + ["Gesamt_KW"]
|
||||
ver_names = ver_header[:-1] + ["Gesamt_Veredlung"]
|
||||
|
||||
data = df.loc[3:].reset_index(drop=True)
|
||||
|
||||
out = pd.DataFrame()
|
||||
out["jahr"] = jahr
|
||||
out["monat"] = monat
|
||||
out["datum"] = pd.to_datetime(data[1])
|
||||
|
||||
for idx, name in zip(range(4, 4+len(kw_names)), kw_names):
|
||||
out[name] = pd.to_numeric(data[idx], errors="coerce")
|
||||
|
||||
for idx, name in zip(range(12, 12+len(ver_names)), ver_names):
|
||||
out[name] = pd.to_numeric(data[idx], errors="coerce")
|
||||
|
||||
# units: convert kt to t
|
||||
kw_cols = kw_names
|
||||
out[kw_cols] = out[kw_cols] * 1000
|
||||
|
||||
# convert Gesamt_Veredlung from kt to t
|
||||
welz_col, nocht_col, ges_ver_col = ver_names
|
||||
out[ges_ver_col] = out[ges_ver_col] * 1000
|
||||
|
||||
out = pd.DataFrame()
|
||||
out["datum"] = pd.to_datetime(data[1])
|
||||
out["jahr"] = jahr
|
||||
out["monat"] = monat
|
||||
|
||||
for idx, name in zip(range(4, 4+len(kw_names)), kw_names):
|
||||
out[name] = pd.to_numeric(data[idx], errors="coerce")
|
||||
|
||||
for idx, name in zip(range(12, 12+len(ver_names)), ver_names):
|
||||
out[name] = pd.to_numeric(data[idx], errors="coerce")
|
||||
|
||||
kw_cols = kw_names
|
||||
out[kw_cols] = out[kw_cols] * 1000
|
||||
welz_col, nocht_col, ges_ver_col = ver_names
|
||||
out[ges_ver_col] = out[ges_ver_col] * 1000
|
||||
|
||||
out.rename(columns={"Welzower Kohle":"Veredel_Welzower", "Nochtener Kohle": "Veredel_Nochtener"}, inplace=True)
|
||||
out.to_parquet(OUTPUT_DIR / "rohkohlebedarf.parquet")
|
||||
print("Saved rohkohlebedarf.parquet")
|
||||
out.round(5)
|
||||
|
||||
# %% [markdown]
|
||||
# # Mappe Verfügbarkeit
|
||||
# %%
|
||||
import pandas as pd
|
||||
|
||||
|
||||
raw = pd.read_excel(path, sheet_name="Verfügbarkeit", header=None)
|
||||
|
||||
# Jahr/Monat bleiben statisch in C3/C4
|
||||
jahr = int(raw.iloc[2, 2])
|
||||
monat = str(raw.iloc[3, 2])
|
||||
|
||||
# B8:J38 (Datum in Spalte B, Wochentag in Spalte C)
|
||||
df = raw.iloc[7:38, 1:10].copy().reset_index(drop=True)
|
||||
|
||||
# "Datum"-Zeile entfernen
|
||||
data = df.copy()
|
||||
data = data[data[1] != "Datum"].reset_index(drop=True)
|
||||
data = data[data[1].notna()].reset_index(drop=True)
|
||||
|
||||
ver = pd.DataFrame()
|
||||
ver["datum"] = pd.to_datetime(data[1])
|
||||
ver["jahr"] = jahr
|
||||
ver["monat"] = monat
|
||||
|
||||
# Welzow-Süd Schicht 1 2 3 in cols 4,5,6
|
||||
ver["Welzow_Sued_S1_t"] = pd.to_numeric(data[4], errors="coerce") * 1000
|
||||
ver["Welzow_Sued_S2_t"] = pd.to_numeric(data[5], errors="coerce") * 1000
|
||||
ver["Welzow_Sued_S3_t"] = pd.to_numeric(data[6], errors="coerce") * 1000
|
||||
|
||||
# Boxberg (NO+RW) Schicht 1 2 3 in cols 7,8,9
|
||||
ver["Boxberg_NO_RW_S1_t"] = pd.to_numeric(data[7], errors="coerce") * 1000
|
||||
ver["Boxberg_NO_RW_S2_t"] = pd.to_numeric(data[8], errors="coerce") * 1000
|
||||
ver["Boxberg_NO_RW_S3_t"] = pd.to_numeric(data[9], errors="coerce") * 1000
|
||||
|
||||
# ver.to_parquet
|
||||
ver.to_parquet(OUTPUT_DIR / "Verfuegbarkeiten.parquet")
|
||||
print("Saved Verfuegbarkeiten.parquet")
|
||||
ver.round(5)
|
||||
|
||||
# KVB Nord Zugdurchlasskapazitäten (L8:N38)
|
||||
kvb_block = raw.iloc[7:38, 11:14].copy().reset_index(drop=True)
|
||||
kvb_block = kvb_block.iloc[: len(data)].reset_index(drop=True)
|
||||
|
||||
kvb = pd.DataFrame()
|
||||
kvb["datum"] = pd.to_datetime(data[1])
|
||||
kvb["jahr"] = jahr
|
||||
kvb["monat"] = monat
|
||||
kvb["KVB_Nord_S1_t"] = pd.to_numeric(kvb_block[11], errors="coerce") * 1000
|
||||
kvb["KVB_Nord_S2_t"] = pd.to_numeric(kvb_block[12], errors="coerce") * 1000
|
||||
kvb["KVB_Nord_S3_t"] = pd.to_numeric(kvb_block[13], errors="coerce") * 1000
|
||||
|
||||
kvb.to_parquet(OUTPUT_DIR / "zugdurchlass_kvb_nord.parquet")
|
||||
print("Saved zugdurchlass_kvb_nord.parquet")
|
||||
|
||||
|
||||
# %% [markdown]
|
||||
# # Bunker
|
||||
|
||||
raw = pd.read_excel(path, sheet_name="Parameter", header=None)
|
||||
|
||||
# Ausschnitt R23:W30 (0-basiert: Zeilen 22–29, Spalten 17–22)
|
||||
bunker = raw.iloc[22:30, 17:23].copy()
|
||||
|
||||
# Kraftwerks- und Veredlungsbunker (Jänschwalde, SP, BW3, ISP)
|
||||
plants = bunker.iloc[25-22:29-22].reset_index(drop=True)
|
||||
plants.columns = [
|
||||
"typ",
|
||||
"anlage",
|
||||
"anfang_mo_di_kt",
|
||||
"anfang_rest_kt",
|
||||
"zielbestand_kt",
|
||||
"maximal_kt",
|
||||
]
|
||||
|
||||
# Typ auffüllen
|
||||
plants["typ"] = plants["typ"].ffill()
|
||||
|
||||
# numerische Werte konvertieren
|
||||
for col in ["anfang_mo_di_kt", "anfang_rest_kt", "zielbestand_kt", "maximal_kt"]:
|
||||
plants[col] = pd.to_numeric(plants[col], errors="coerce")
|
||||
|
||||
# *** Umrechnung in t ***
|
||||
plants = plants.rename(columns={
|
||||
"anfang_mo_di_kt": "anfang_mo_di_t",
|
||||
"anfang_rest_kt": "anfang_rest_t",
|
||||
"zielbestand_kt": "zielbestand_t",
|
||||
"maximal_kt": "maximal_t",
|
||||
})
|
||||
|
||||
plants[["anfang_mo_di_t", "anfang_rest_t", "zielbestand_t", "maximal_t"]] *= 1000
|
||||
|
||||
# Vorfahrfenster
|
||||
vorfahrfenster_tage = pd.to_numeric(bunker.loc[29, 19], errors="coerce")
|
||||
|
||||
plants.to_parquet(OUTPUT_DIR / "bunker.parquet")
|
||||
print("Saved bunker.parquet")
|
||||
plants
|
||||
|
||||
pd.DataFrame([{"vorfahrfenster_tage": vorfahrfenster_tage}]).to_parquet(
|
||||
OUTPUT_DIR / "bunker_vorfahrfenster.parquet"
|
||||
)
|
||||
print("Saved bunker_vorfahrfenster.parquet")
|
||||
|
||||
# %%
|
||||
print("\n ####################### Done preprocessing exploration_preprocess.py ####################### \n")
|
||||
Loading…
x
Reference in New Issue
Block a user