Spaces:

Natwar
/

Text_Summarization_Multipurpose

Sleeping

File size: 9,509 Bytes

8f6e6b8
 
 
 
 
 
b44bc34
f01d54d
fdebaf9
f01d54d
 
 
fdebaf9
f01d54d
 
 
 
 
 
 
 
fdebaf9
f01d54d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fdebaf9
f01d54d
fdebaf9
 
 
 
 
 
 
f01d54d
fdebaf9
 
 
 
 
f01d54d
fdebaf9
 
8f6e6b8
fdebaf9
 
b44bc34
fdebaf9
8f6e6b8
fdebaf9
 
 
f01d54d
fdebaf9
8f6e6b8
 
 
 
fdebaf9
 
 
 
8f6e6b8
 
 
f01d54d
8f6e6b8
 
b44bc34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f6e6b8
 
b44bc34
 
8f6e6b8
 
 
 
b44bc34
8f6e6b8
 
f01d54d
 
 
8f6e6b8
f01d54d
 
b44bc34
f01d54d
 
b44bc34
 
 
f01d54d
 
 
8f6e6b8
b44bc34
8f6e6b8
b44bc34
8f6e6b8
 
 
 
 
b44bc34
fdebaf9
f01d54d
8f6e6b8
 
b44bc34
 
 
8f6e6b8
 
 
 
 
 
 
b44bc34
8f6e6b8
f01d54d
b44bc34
8f6e6b8
 
 
 
 
b44bc34
8f6e6b8
 
 
 
 
 
 
b44bc34
8f6e6b8
 
 
 
 
 
 
b44bc34
8f6e6b8
f01d54d
8f6e6b8
 
 
 
 
 
 
 
b44bc34
8f6e6b8
 
 
 
 
b44bc34
8f6e6b8
f01d54d
8f6e6b8
 
 
b44bc34
8f6e6b8
 
 
 
b44bc34
 
 
8f6e6b8
 
 
 
b44bc34

import os
import subprocess
import sys
import warnings
warnings.filterwarnings("ignore")


def run_pip(*args):
    """Run a pip install command and raise on failure."""
    subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-cache-dir"] + list(args))


# ── Phase 1: Install packages ─────────────────────────────────────────────────

print("=== Installing gradio (if needed) ===")
try:
    import gradio  # noqa: F401
    print("gradio already installed.")
except ImportError:
    run_pip("gradio")

print("=== Installing torch (CPU-only, ~190 MB) ===")
try:
    import torch  # noqa: F401
    print("torch already installed.")
except ImportError:
    run_pip("torch", "--index-url", "https://download.pytorch.org/whl/cpu")

print("=== Installing transformers 4.46.3 ===")
# Pin to last v4 release — transformers 5.x removed the 'summarization' pipeline task.
try:
    import transformers as _tf
    if _tf.__version__ != "4.46.3":
        raise ImportError("wrong version")
    print("transformers 4.46.3 already installed.")
except (ImportError, AttributeError):
    run_pip("transformers==4.46.3")

# ── Phase 2: Fix the requests-vs-httpx incompatibility ───────────────────────
#
# What happens:
#   - transformers 4.46.3 requires huggingface-hub<1.0, so pip installs 0.36.x.
#   - huggingface-hub 0.36.x makes get_session() return an httpx.Client when
#     httpx is present (it is — gradio depends on it).
#   - transformers' own hub.py then calls that client with requests-style kwargs:
#       get_session().head(url, allow_redirects=False, proxies=proxies, timeout=10)
#   - httpx.Client rejects every one of these: allow_redirects, proxies, etc.
#
# Fix:
#   After importing transformers (so its module object is in sys.modules), replace
#   the `get_session` name inside the `transformers.utils.hub` namespace with a
#   lambda that returns a plain requests.Session.  A requests.Session accepts all
#   of those kwargs natively, so every existing call in hub.py works unchanged.

import transformers.utils.hub as _t_hub  # noqa: E402
import requests as _requests              # noqa: E402

_t_hub.get_session = lambda: _requests.Session()
print("Patched transformers.utils.hub.get_session → requests.Session()")

# ── Phase 3: Safe imports ─────────────────────────────────────────────────────

import gradio as gr                       # noqa: E402
import torch                              # noqa: E402
from transformers import pipeline         # noqa: E402

# ── App setup ─────────────────────────────────────────────────────────────────

DEFAULT_MODEL = "sshleifer/distilbart-cnn-6-6"

AVAILABLE_MODELS = {
    "sshleifer/distilbart-cnn-6-6":   "Fast & light, good for general summarization",
    "facebook/bart-large-cnn":         "Larger BART model, better detail retention",
    "google/pegasus-cnn_dailymail":    "Pegasus model for high-quality summarization",
    "allenai/led-base-16384":          "Handles longer scientific documents",
}

print(f"Loading default model: {DEFAULT_MODEL}")
summarizer = pipeline("summarization", model=DEFAULT_MODEL, device=-1)  # device=-1 → CPU

EXAMPLE_TEXTS = {
    "news_article": (
        "In a historic move, global leaders have agreed to phase out fossil fuels over the next two "
        "decades. This landmark decision came after weeks of intense negotiations during the international "
        "climate summit. Experts believe this will drastically cut carbon emissions and pave the way for "
        "sustainable energy sources worldwide. Countries will now be held accountable through annual "
        "environmental reviews."
    ),
    "scientific_abstract": (
        "The rise of antibiotic-resistant bacteria poses one of the most significant threats to global "
        "health in the 21st century. Recent studies have shown that the overuse and misuse of antibiotics "
        "in both human medicine and agriculture have accelerated the evolution of resistant strains. In "
        "this review, we summarize current research on bacterial resistance mechanisms, including horizontal "
        "gene transfer and biofilm formation. We also explore novel approaches to combating resistance, "
        "such as bacteriophage therapy, antimicrobial peptides, and CRISPR-based gene editing technologies. "
        "The paper further outlines a strategic framework for integrating surveillance, policy reforms, and "
        "public health initiatives to curb the spread of resistance. While scientific innovation holds "
        "promise, global cooperation and responsible antibiotic stewardship remain essential to preventing "
        "a post-antibiotic era where common infections could once again become deadly."
    ),
    "business_report": (
        "The company reported a 32% increase in quarterly revenue, largely driven by the success of its "
        "latest AI-powered product line. International markets, particularly in Asia and Europe, showed "
        "strong adoption rates. Leadership announced plans to reinvest earnings into R&D and global "
        "expansion, while shareholders reacted positively with a 15% spike in stock prices."
    ),
}


def summarize_text(text, model_name, summary_length, num_beams):
    if not text.strip():
        return "Please provide some text to summarize."
    try:
        global summarizer
        summarizer = pipeline("summarization", model=model_name, device=-1)
        length_mapping = {
            "very_short": (30, 50),
            "short":      (50, 70),
            "medium":     (70, 100),
            "long":       (100, 130),
        }
        min_len, max_len = length_mapping.get(summary_length, (70, 100))
        result = summarizer(
            text,
            max_length=int(max_len),
            min_length=int(min_len),
            num_beams=int(num_beams),
            do_sample=False,
        )
        return result[0]["summary_text"]
    except Exception as exc:
        return f"Error: {exc}"


def count_words(text):
    return f"{len(text.split())} words"


def paste_example(example_type):
    return EXAMPLE_TEXTS.get(example_type, "")


# ── Gradio UI ──────────────────────────────────────────────────────────────────

with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 📝 Multimodel Text Summarization")
    gr.Markdown(
        "Summarize news, reports, or scientific content using various models like BART, Pegasus, or LED."
    )

    with gr.Row():
        with gr.Column(scale=3):
            text_input = gr.Textbox(
                lines=12,
                label="Text to Summarize",
                placeholder="Paste or type your text here...",
                elem_id="text_input",
            )
            word_counter = gr.Markdown("0 words")
            text_input.change(count_words, inputs=[text_input], outputs=[word_counter])

            with gr.Row():
                example_dropdown = gr.Dropdown(
                    choices=list(EXAMPLE_TEXTS.keys()),
                    value=None,
                    label="Load Example Text",
                )
                example_load_btn = gr.Button("Load Example")

            with gr.Row():
                model_choice = gr.Dropdown(
                    choices=list(AVAILABLE_MODELS.keys()),
                    value=DEFAULT_MODEL,
                    label="Select Summarization Model",
                )
                model_info = gr.Markdown(f"**Model info:** {AVAILABLE_MODELS[DEFAULT_MODEL]}")

            with gr.Row():
                summary_length = gr.Radio(
                    choices=["very_short", "short", "medium", "long"],
                    value="medium",
                    label="Summary Length",
                )
                num_beams = gr.Slider(minimum=1, maximum=8, value=4, step=1, label="Beam Size")

            summarize_button = gr.Button("Generate Summary", variant="primary", size="lg")

        with gr.Column(scale=2):
            gr.Markdown("### Summary Result")
            summary_output = gr.Textbox(
                label="Generated Summary",
                lines=12,
                placeholder="Your summary will appear here...",
            )

    model_choice.change(
        fn=lambda x: f"**Model info:** {AVAILABLE_MODELS.get(x, 'Custom model')}",
        inputs=[model_choice],
        outputs=[model_info],
    )
    example_load_btn.click(fn=paste_example, inputs=[example_dropdown], outputs=[text_input])
    summarize_button.click(
        fn=summarize_text,
        inputs=[text_input, model_choice, summary_length, num_beams],
        outputs=[summary_output],
    )

    gr.Markdown("""
    ---
    ✅ Choose from different summarization models  
    ✅ Works great for academic, news, or business content  
    ✅ Customize summary length and beam search for better quality  
    Built using Gradio and Hugging Face Transformers
    """)

if __name__ == "__main__":
    demo.launch()