meta data for this page
  •  

Ez a dokumentum egy előző változata!


#!/bin/bash
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
OUTPUT_FILE="${1:-tsm_output_${TIMESTAMP}.xlsx}"
shopt -s nullglob
OUT_FILES=("$SCRIPT_DIR"/*.out)
shopt -u nullglob
if [ ${#OUT_FILES[@]} -eq 0 ]; then
    echo "HIBA: Nem talalhato .out fajl: $SCRIPT_DIR"
    exit 1
fi
python3 -c "import openpyxl" 2>/dev/null || pip3 install openpyxl --quiet --break-system-packages 2>/dev/null || pip3 install openpyxl --quiet --user
python3 - "$SCRIPT_DIR" "$OUTPUT_FILE" << 'PYEOF'
import os, sys, re, csv
from io import StringIO
from pathlib import Path
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter

def convert_value(val):
    if not val or not isinstance(val, str):
        return val
    val = val.strip()
    if not val:
        return val
    cleaned = val
    if ',' in val and '.' in val:
        cleaned = val.replace(',', '')
    elif ',' in val and '.' not in val:
        parts = val.split(',')
        if len(parts) == 2 and len(parts[1]) == 3 and parts[1].isdigit():
            cleaned = val.replace(',', '')
        elif len(parts) == 2 and parts[1].replace('-','').isdigit():
            cleaned = val.replace(',', '.')
    try:
        if '.' not in cleaned and cleaned.lstrip('-').isdigit():
            return int(cleaned)
    except:
        pass
    try:
        f = float(cleaned)
        if f == int(f) and abs(f) < 1e15:
            return int(f)
        return f
    except:
        pass
    return val

def remove_hdr(c):
    ls = c.split('\n')
    r = []
    ds = False
    for l in ls:
        if 'ANS8000I' in l:
            ds = True
            continue
        if l.strip().startswith('ANS800'):
            continue
        if not ds:
            if any(x in l for x in ['IBM Spectrum', 'Command Line', '(c) Copyright', 
                                     'Session established', 'Server Version', 'Server date', 'Last access']):
                continue
            if l.strip() == '' and not r:
                continue
        if ds:
            r.append(l)
    if not r:
        r = [l for l in ls if not any(x in l for x in ['IBM Spectrum', 'Command Line', 
             '(c) Copyright', 'Session established', 'Server Version', 'Server date', 'Last access'])]
    return '\n'.join(r)

def parse_fw(c):
    ls = c.split('\n')
    
    # Megkeressuk a kotojelsor indexet
    di = None
    for i, l in enumerate(ls):
        if re.match(r'^[\s-]+$', l) and l.count('-') > 10:
            di = i
            break
    if di is None or di == 0:
        return None
    
    # Fejlec sorok: kozvetlenul a kotojelsor ELOTT levo sorok
    # DE: csak azok a sorok, amik NEM ures sorok es NEM info sorok (pl. "License information...")
    # Visszafele megyunk a kotojelsor elott es megkeressuk a fejlec sorokat
    header_end = di
    header_start = di
    for i in range(di - 1, -1, -1):
        line = ls[i].strip()
        if line == '':
            # Ures sor - itt kezdodik a fejlec blokk
            header_start = i + 1
            break
        # Ha nem ures, akkor ez fejlec sor VAGY egy info sor
        # Info sorok altalaban mondatszeruek (pl. "License information as of...")
        # Fejlec sorok altalaban fejlec-szeruek (oszlopnevek)
        header_start = i
    
    hl = ls[header_start:header_end]
    dl = ls[di]
    dls = ls[di + 1:]
    
    cp = []
    ind = False
    st = 0
    for i, ch in enumerate(dl):
        if ch == '-' and not ind:
            ind = True
            st = i
        elif ch != '-' and ind:
            ind = False
            cp.append((st, i))
    if ind:
        cp.append((st, len(dl)))
    if not cp:
        return None
    
    hd = []
    for s, e in cp:
        pts = []
        for h in hl:
            if len(h) > s:
                p = h[s:min(e, len(h))].strip()
                if p and p not in ['-', '--']:
                    pts.append(p.replace('-', ''))
        hd.append(' '.join(pts))
    
    res = [hd]
    cr = None
    prev_had_hyphen = False
    
    for l in dls:
        if not l.strip():
            continue
        if l.strip().startswith('ANS'):
            continue
        
        first_col_raw = l[:cp[0][1]] if cp else ''
        first_col_val = first_col_raw.strip()
        
        is_continuation = False
        if first_col_val == '' and l.strip():
            is_continuation = True
        elif prev_had_hyphen and len(l) > 0 and l[0] == ' ':
            is_continuation = True
        
        if is_continuation and cr is not None:
            for idx, (s, e) in enumerate(cp):
                cell_val = l[s:min(e, len(l))].strip() if len(l) > s else ''
                if cell_val:
                    prev_val = cr[idx]
                    if prev_val:
                        if prev_val.endswith('-'):
                            cr[idx] = prev_val[:-1] + cell_val
                        else:
                            cr[idx] = prev_val + ' ' + cell_val
                    else:
                        cr[idx] = cell_val
            prev_had_hyphen = any(v.endswith('-') for v in cr if v)
        else:
            if cr is not None:
                cleaned = []
                for val in cr:
                    if val:
                        val = re.sub(r'-\s+', '', val)
                        val = re.sub(r'\s+', ' ', val)
                        val = val.strip()
                    cleaned.append(convert_value(val))
                res.append(cleaned)
            
            cr = []
            for s, e in cp:
                cr.append(l[s:min(e, len(l))].strip() if len(l) > s else '')
            prev_had_hyphen = any(v.endswith('-') for v in cr if v)
    
    if cr is not None:
        cleaned = []
        for val in cr:
            if val:
                val = re.sub(r'-\s+', '', val)
                val = re.sub(r'\s+', ' ', val)
                val = val.strip()
            cleaned.append(convert_value(val))
        res.append(cleaned)
    
    return res

def parse_csv(c):
    ls = c.strip().split('\n')
    r = []
    for l in ls:
        if not l.strip() or l.strip().startswith('ANS'):
            continue
        try:
            rd = csv.reader(StringIO(l))
            for rw in rd:
                if rw:
                    r.append([convert_value(x.strip()) for x in rw])
        except:
            r.append([convert_value(x.strip().strip('"')) for x in l.split(',')])
    return r if r else None

def parse_sys(c):
    secs = re.split(r'\*{10,}', c)
    ad = []
    for s in secs:
        s = s.strip()
        if not s:
            continue
        tm = re.search(r'---> (.+?)(?:\n|$)', s)
        t = tm.group(1).strip() if tm else ""
        tb = parse_fw(s)
        if tb and len(tb) > 1:
            for i, r in enumerate(tb):
                r.insert(0, "Section" if i == 0 else t)
            ad.extend(tb if not ad else tb[1:])
    return ad if ad else None

def detect(c):
    c = remove_hdr(c)
    if not c.strip():
        return None
    if '***' in c and '--->' in c:
        return parse_sys(c)
    ls = c.split('\n')
    for l in ls[:20]:
        if re.match(r'^[\s-]+$', l) and l.count('-') > 10:
            return parse_fw(c)
    cc = sum(1 for l in ls[:10] if ',' in l)
    if cc > 3:
        return parse_csv(c)
    return [[convert_value(l.strip())] for l in ls if l.strip()]

def sn(fn):
    n = Path(fn).stem
    if '_' in n:
        p = n.split('_', 1)
        n = p[1] if len(p) > 1 and p[1] else n
    for c in ['\\', '/', '*', '?', ':', '[', ']']:
        n = n.replace(c, '_')
    return n[:31]

sd = Path(sys.argv[1])
of = sd / sys.argv[2]
fs = sorted(sd.glob('*.out'))
if not fs:
    print("HIBA")
    sys.exit(1)
print(f"Talaltam {len(fs)} .out fajlt")

wb = Workbook()
wb.remove(wb.active)
hf = PatternFill(start_color='4472C4', end_color='4472C4', fill_type='solid')
hfn = Font(bold=True, color='FFFFFF')
bd = Border(left=Side(style='thin'), right=Side(style='thin'), 
            top=Side(style='thin'), bottom=Side(style='thin'))
un = {}

for f in fs:
    print(f"  Feldolgozas: {f.name}")
    try:
        ct = open(f, 'r', encoding='utf-8', errors='replace').read()
    except Exception as e:
        print(f"    HIBA: {e}")
        continue
    
    dt = detect(ct)
    if not dt:
        dt = [[l] for l in ct.split('\n') if l.strip()][:100]
    if not dt:
        continue
    
    bs = sn(f.name)
    nm = bs
    cn = 1
    while nm in un:
        nm = f"{bs[:28]}_{cn}"
        cn += 1
    un[nm] = True
    
    ws = wb.create_sheet(title=nm)
    for ri, rw in enumerate(dt, 1):
        for ci, cl in enumerate(rw, 1):
            ws.cell(ri, ci, value=cl)
            if ri == 1:
                ws.cell(ri, ci).font = hfn
                ws.cell(ri, ci).fill = hf
                ws.cell(ri, ci).alignment = Alignment(wrap_text=True)
            ws.cell(ri, ci).border = bd
    
    for ci in range(1, ws.max_column + 1):
        mx = max((len(str(c.value or '')) for c in ws[get_column_letter(ci)]), default=10)
        ws.column_dimensions[get_column_letter(ci)].width = min(mx + 2, 50)
    
    if len(dt) > 1:
        ws.auto_filter.ref = ws.dimensions
    ws.freeze_panes = 'A2'

wb.save(of)
print(f"\nKesz! {of}")
print(f"Osszesen {len(wb.sheetnames)} munkalap")
PYEOF