﻿using System;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;

class Program
{
    // Declare variables
    private static Regex SegIdRegex;
    private static Regex ParaRegex;
    private static Regex StructureLineRegex;
    private static Regex SourceTagAnywhere;
    private static Regex SegIdAnywhere;
    private static Regex ParaAnywhere;
    private static Regex ArrowCleaner;

    static void Main()
    {
        Console.OutputEncoding = Encoding.UTF8;

        // 1. INITIALIZE REGEX (Zero Backslash Strategy)
        if (!InitializeRegexPatterns())
        {
            Console.WriteLine("Press any key to exit...");
            Console.ReadKey();
            return;
        }

        Console.WriteLine("--------------------------------------------------");
        Console.WriteLine("      Translation Output Cleaner v2.5 (No-Slash)");
        Console.WriteLine("--------------------------------------------------");
        Console.WriteLine("Select Mode:");
        Console.WriteLine("1. Simple ID Removal (Legacy)");
        Console.WriteLine("2. Gemini Pro 3 (Standard Re-flow)");
        Console.WriteLine("3. Complex/Noisy Mode (Fixes [source] & jammed tags)");
        Console.Write("\nEnter choice (1-3): ");

        var key = Console.ReadKey().KeyChar;
        Console.WriteLine();

        // 2. Get Input Path
        Console.Write("\nEnter the input text file path: ");
        var inputPath = (Console.ReadLine() ?? string.Empty).Trim().Trim('"');

        if (string.IsNullOrWhiteSpace(inputPath) || !File.Exists(inputPath))
        {
            Console.WriteLine("Invalid path or file does not exist.");
            Console.ReadKey();
            return;
        }

        try
        {
            string cleanedText = "";
            string rawContent = File.ReadAllText(inputPath, Encoding.UTF8);

            switch (key)
            {
                case '3':
                    Console.WriteLine("Processing in Complex/Noisy Mode...");
                    cleanedText = ProcessComplexMode(rawContent);
                    break;
                case '2':
                case 'y':
                    Console.WriteLine("Processing in Gemini Pro 3 Paragraph Re-flow Mode...");
                    cleanedText = ProcessGeminiOutput(rawContent);
                    break;
                default:
                    Console.WriteLine("Processing in Simple ID Removal Mode...");
                    cleanedText = ProcessSimpleMode(rawContent);
                    break;
            }

            // 3. Save Output
            var dir = Path.GetDirectoryName(inputPath) ?? "";
            var name = Path.GetFileNameWithoutExtension(inputPath);
            var ext = Path.GetExtension(inputPath);
            var outputPath = Path.Combine(dir, $"{name}_clean{ext}");

            var utf8NoBom = new UTF8Encoding(false);
            File.WriteAllText(outputPath, cleanedText, utf8NoBom);

            Console.WriteLine($"\nSuccess! Cleaned file saved to:");
            Console.WriteLine(outputPath);
        }
        catch (Exception ex)
        {
            Console.WriteLine($"\nError during processing: {ex.Message}");
        }

        Console.WriteLine("\nPress any key to exit...");
        Console.ReadKey();
    }

    // ---------------------------------------------------------
    // ZERO BACKSLASH INITIALIZER
    // ---------------------------------------------------------
    private static bool InitializeRegexPatterns()
    {
        try
        {
            // STRATEGY: Use [character classes] instead of backslashes to avoid parsing errors.
            // [ \t] matches space or tab
            // [0-9] matches digits
            // [.] matches a literal dot
            // [[] matches a literal open bracket
            // []] matches a literal close bracket

            // 1. SegID at start (Matches "SegID 1.1" or "SegID 1")
            // Old: ^\s*SegID\s+\d+(\.\d+)?\s*
            SegIdRegex = new Regex(@"^[ \t]*SegID[ \t]+[0-9]+([.][0-9]+)?[ \t]*", RegexOptions.None);

            // 2. PARA at start (Matches "PARA 1")
            // Old: ^\s*PARA\s+\d+\s*$
            ParaRegex = new Regex(@"^[ \t]*PARA[ \t]+[0-9]+[ \t]*$", RegexOptions.None);

            // 3. Structure Line (Matches "Clean Copy" or "End of Part")
            StructureLineRegex = new Regex(@"^(Clean Copy|.*?End of Part)", RegexOptions.None);

            // 4. Source Tag (Matches "")
            // Old: \
            // We use [[] to match [ and []] to match ]
            SourceTagAnywhere = new Regex(@"[[]source:.*?[\]]", RegexOptions.None);

            // 5. SegID Anywhere (Matches "SegID 1.1" inside text)
            SegIdAnywhere = new Regex(@"SegID[ \t]+[0-9]+([.][0-9]+)?", RegexOptions.None);

            // 6. PARA Anywhere (Matches "PARA 12" inside text)
            ParaAnywhere = new Regex(@"PARA[ \t]+[0-9]+", RegexOptions.None);

            // 7. Arrow/Quote Cleaner (Matches "> ")
            ArrowCleaner = new Regex(@">[ \t]*", RegexOptions.None);

            return true;
        }
        catch (Exception ex)
        {
            Console.WriteLine("CRITICAL ERROR initializing Regex patterns.");
            Console.WriteLine($"Error: {ex.Message}");
            return false;
        }
    }

    // ---------------------------------------------------------
    // MODE 3: COMPLEX PROCESSING
    // ---------------------------------------------------------
    private static string ProcessComplexMode(string raw)
    {
        var sb = new StringBuilder();
        var currentParagraphBuffer = new StringBuilder();
        var lines = raw.Replace("\r\n", "\n").Split('\n');

        foreach (var line in lines)
        {
            string text = line.Trim();

            // 1. Remove tags
            text = SourceTagAnywhere.Replace(text, "");

            // 2. Check for PARA markers 
            if (ParaAnywhere.IsMatch(text))
            {
                FlushBuffer(sb, currentParagraphBuffer);
                text = ParaAnywhere.Replace(text, "");
            }

            // 3. Remove SegIDs 
            text = SegIdAnywhere.Replace(text, "");

            // 4. Remove blockquote markers (>)
            text = ArrowCleaner.Replace(text, "");

            // 5. Trim
            text = text.Trim();

            if (string.IsNullOrWhiteSpace(text)) continue;

            // 6. Merge Logic
            if (currentParagraphBuffer.Length > 0)
            {
                char lastChar = currentParagraphBuffer[currentParagraphBuffer.Length - 1];
                if (!IsCjk(lastChar))
                {
                    currentParagraphBuffer.Append(" ");
                }
            }
            currentParagraphBuffer.Append(text);
        }

        FlushBuffer(sb, currentParagraphBuffer);
        return sb.ToString().Trim();
    }

    // ---------------------------------------------------------
    // HELPERS & LEGACY MODES
    // ---------------------------------------------------------
    private static void FlushBuffer(StringBuilder output, StringBuilder buffer)
    {
        if (buffer.Length > 0)
        {
            output.AppendLine(buffer.ToString());
            output.AppendLine();
            buffer.Clear();
        }
    }

    private static bool IsCjk(char c)
    {
        return (c >= 0x4E00 && c <= 0x9FFF) ||
               (c >= 0x3040 && c <= 0x309F) ||
               (c >= 0x30A0 && c <= 0x30FF);
    }

    private static string ProcessGeminiOutput(string raw)
    {
        var sb = new StringBuilder();
        var currentParagraphBuffer = new StringBuilder();
        var lines = raw.Replace("\r\n", "\n").Split('\n');

        foreach (var line in lines)
        {
            string trimmed = line.Trim();
            if (StructureLineRegex.IsMatch(trimmed)) continue;

            if (ParaRegex.IsMatch(trimmed))
            {
                FlushBuffer(sb, currentParagraphBuffer);
                continue;
            }

            string content = SegIdRegex.Replace(trimmed, "");

            if (string.IsNullOrWhiteSpace(content)) continue;

            if (content.StartsWith(">"))
            {
                FlushBuffer(sb, currentParagraphBuffer);
                sb.AppendLine(content + Environment.NewLine);
                continue;
            }

            if (currentParagraphBuffer.Length > 0)
            {
                char lastChar = currentParagraphBuffer[currentParagraphBuffer.Length - 1];
                if (!IsCjk(lastChar))
                {
                    currentParagraphBuffer.Append(" ");
                }
            }
            currentParagraphBuffer.Append(content);
        }

        FlushBuffer(sb, currentParagraphBuffer);
        return sb.ToString().Trim();
    }

    private static string ProcessSimpleMode(string raw)
    {
        var sb = new StringBuilder();
        var lines = raw.Replace("\r\n", "\n").Split('\n');

        foreach (var line in lines)
        {
            string clean = SegIdRegex.Replace(line, "");
            sb.AppendLine(clean);
        }
        return sb.ToString();
    }
}