File size: 9,509 Bytes
8f6e6b8
 
 
 
 
 
b44bc34
f01d54d
fdebaf9
f01d54d
 
 
fdebaf9
f01d54d
 
 
 
 
 
 
 
fdebaf9
f01d54d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fdebaf9
f01d54d
fdebaf9
 
 
 
 
 
 
f01d54d
fdebaf9
 
 
 
 
f01d54d
fdebaf9
 
8f6e6b8
fdebaf9
 
b44bc34
fdebaf9
8f6e6b8
fdebaf9
 
 
f01d54d
fdebaf9
8f6e6b8
 
 
 
fdebaf9
 
 
 
8f6e6b8
 
 
f01d54d
8f6e6b8
 
b44bc34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f6e6b8
 
b44bc34
 
8f6e6b8
 
 
 
b44bc34
8f6e6b8
 
f01d54d
 
 
8f6e6b8
f01d54d
 
b44bc34
f01d54d
 
b44bc34
 
 
f01d54d
 
 
8f6e6b8
b44bc34
8f6e6b8
b44bc34
8f6e6b8
 
 
 
 
b44bc34
fdebaf9
f01d54d
8f6e6b8
 
b44bc34
 
 
8f6e6b8
 
 
 
 
 
 
b44bc34
8f6e6b8
f01d54d
b44bc34
8f6e6b8
 
 
 
 
b44bc34
8f6e6b8
 
 
 
 
 
 
b44bc34
8f6e6b8
 
 
 
 
 
 
b44bc34
8f6e6b8
f01d54d
8f6e6b8
 
 
 
 
 
 
 
b44bc34
8f6e6b8
 
 
 
 
b44bc34
8f6e6b8
f01d54d
8f6e6b8
 
 
b44bc34
8f6e6b8
 
 
 
b44bc34
 
 
8f6e6b8
 
 
 
b44bc34
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
import os
import subprocess
import sys
import warnings
warnings.filterwarnings("ignore")


def run_pip(*args):
    """Run a pip install command and raise on failure."""
    subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-cache-dir"] + list(args))


# ── Phase 1: Install packages ─────────────────────────────────────────────────

print("=== Installing gradio (if needed) ===")
try:
    import gradio  # noqa: F401
    print("gradio already installed.")
except ImportError:
    run_pip("gradio")

print("=== Installing torch (CPU-only, ~190 MB) ===")
try:
    import torch  # noqa: F401
    print("torch already installed.")
except ImportError:
    run_pip("torch", "--index-url", "https://download.pytorch.org/whl/cpu")

print("=== Installing transformers 4.46.3 ===")
# Pin to last v4 release β€” transformers 5.x removed the 'summarization' pipeline task.
try:
    import transformers as _tf
    if _tf.__version__ != "4.46.3":
        raise ImportError("wrong version")
    print("transformers 4.46.3 already installed.")
except (ImportError, AttributeError):
    run_pip("transformers==4.46.3")

# ── Phase 2: Fix the requests-vs-httpx incompatibility ───────────────────────
#
# What happens:
#   - transformers 4.46.3 requires huggingface-hub<1.0, so pip installs 0.36.x.
#   - huggingface-hub 0.36.x makes get_session() return an httpx.Client when
#     httpx is present (it is β€” gradio depends on it).
#   - transformers' own hub.py then calls that client with requests-style kwargs:
#       get_session().head(url, allow_redirects=False, proxies=proxies, timeout=10)
#   - httpx.Client rejects every one of these: allow_redirects, proxies, etc.
#
# Fix:
#   After importing transformers (so its module object is in sys.modules), replace
#   the `get_session` name inside the `transformers.utils.hub` namespace with a
#   lambda that returns a plain requests.Session.  A requests.Session accepts all
#   of those kwargs natively, so every existing call in hub.py works unchanged.

import transformers.utils.hub as _t_hub  # noqa: E402
import requests as _requests              # noqa: E402

_t_hub.get_session = lambda: _requests.Session()
print("Patched transformers.utils.hub.get_session β†’ requests.Session()")

# ── Phase 3: Safe imports ─────────────────────────────────────────────────────

import gradio as gr                       # noqa: E402
import torch                              # noqa: E402
from transformers import pipeline         # noqa: E402

# ── App setup ─────────────────────────────────────────────────────────────────

DEFAULT_MODEL = "sshleifer/distilbart-cnn-6-6"

AVAILABLE_MODELS = {
    "sshleifer/distilbart-cnn-6-6":   "Fast & light, good for general summarization",
    "facebook/bart-large-cnn":         "Larger BART model, better detail retention",
    "google/pegasus-cnn_dailymail":    "Pegasus model for high-quality summarization",
    "allenai/led-base-16384":          "Handles longer scientific documents",
}

print(f"Loading default model: {DEFAULT_MODEL}")
summarizer = pipeline("summarization", model=DEFAULT_MODEL, device=-1)  # device=-1 β†’ CPU

EXAMPLE_TEXTS = {
    "news_article": (
        "In a historic move, global leaders have agreed to phase out fossil fuels over the next two "
        "decades. This landmark decision came after weeks of intense negotiations during the international "
        "climate summit. Experts believe this will drastically cut carbon emissions and pave the way for "
        "sustainable energy sources worldwide. Countries will now be held accountable through annual "
        "environmental reviews."
    ),
    "scientific_abstract": (
        "The rise of antibiotic-resistant bacteria poses one of the most significant threats to global "
        "health in the 21st century. Recent studies have shown that the overuse and misuse of antibiotics "
        "in both human medicine and agriculture have accelerated the evolution of resistant strains. In "
        "this review, we summarize current research on bacterial resistance mechanisms, including horizontal "
        "gene transfer and biofilm formation. We also explore novel approaches to combating resistance, "
        "such as bacteriophage therapy, antimicrobial peptides, and CRISPR-based gene editing technologies. "
        "The paper further outlines a strategic framework for integrating surveillance, policy reforms, and "
        "public health initiatives to curb the spread of resistance. While scientific innovation holds "
        "promise, global cooperation and responsible antibiotic stewardship remain essential to preventing "
        "a post-antibiotic era where common infections could once again become deadly."
    ),
    "business_report": (
        "The company reported a 32% increase in quarterly revenue, largely driven by the success of its "
        "latest AI-powered product line. International markets, particularly in Asia and Europe, showed "
        "strong adoption rates. Leadership announced plans to reinvest earnings into R&D and global "
        "expansion, while shareholders reacted positively with a 15% spike in stock prices."
    ),
}


def summarize_text(text, model_name, summary_length, num_beams):
    if not text.strip():
        return "Please provide some text to summarize."
    try:
        global summarizer
        summarizer = pipeline("summarization", model=model_name, device=-1)
        length_mapping = {
            "very_short": (30, 50),
            "short":      (50, 70),
            "medium":     (70, 100),
            "long":       (100, 130),
        }
        min_len, max_len = length_mapping.get(summary_length, (70, 100))
        result = summarizer(
            text,
            max_length=int(max_len),
            min_length=int(min_len),
            num_beams=int(num_beams),
            do_sample=False,
        )
        return result[0]["summary_text"]
    except Exception as exc:
        return f"Error: {exc}"


def count_words(text):
    return f"{len(text.split())} words"


def paste_example(example_type):
    return EXAMPLE_TEXTS.get(example_type, "")


# ── Gradio UI ──────────────────────────────────────────────────────────────────

with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# πŸ“ Multimodel Text Summarization")
    gr.Markdown(
        "Summarize news, reports, or scientific content using various models like BART, Pegasus, or LED."
    )

    with gr.Row():
        with gr.Column(scale=3):
            text_input = gr.Textbox(
                lines=12,
                label="Text to Summarize",
                placeholder="Paste or type your text here...",
                elem_id="text_input",
            )
            word_counter = gr.Markdown("0 words")
            text_input.change(count_words, inputs=[text_input], outputs=[word_counter])

            with gr.Row():
                example_dropdown = gr.Dropdown(
                    choices=list(EXAMPLE_TEXTS.keys()),
                    value=None,
                    label="Load Example Text",
                )
                example_load_btn = gr.Button("Load Example")

            with gr.Row():
                model_choice = gr.Dropdown(
                    choices=list(AVAILABLE_MODELS.keys()),
                    value=DEFAULT_MODEL,
                    label="Select Summarization Model",
                )
                model_info = gr.Markdown(f"**Model info:** {AVAILABLE_MODELS[DEFAULT_MODEL]}")

            with gr.Row():
                summary_length = gr.Radio(
                    choices=["very_short", "short", "medium", "long"],
                    value="medium",
                    label="Summary Length",
                )
                num_beams = gr.Slider(minimum=1, maximum=8, value=4, step=1, label="Beam Size")

            summarize_button = gr.Button("Generate Summary", variant="primary", size="lg")

        with gr.Column(scale=2):
            gr.Markdown("### Summary Result")
            summary_output = gr.Textbox(
                label="Generated Summary",
                lines=12,
                placeholder="Your summary will appear here...",
            )

    model_choice.change(
        fn=lambda x: f"**Model info:** {AVAILABLE_MODELS.get(x, 'Custom model')}",
        inputs=[model_choice],
        outputs=[model_info],
    )
    example_load_btn.click(fn=paste_example, inputs=[example_dropdown], outputs=[text_input])
    summarize_button.click(
        fn=summarize_text,
        inputs=[text_input, model_choice, summary_length, num_beams],
        outputs=[summary_output],
    )

    gr.Markdown("""
    ---
    βœ… Choose from different summarization models  
    βœ… Works great for academic, news, or business content  
    βœ… Customize summary length and beam search for better quality  
    Built using Gradio and Hugging Face Transformers
    """)

if __name__ == "__main__":
    demo.launch()