File size: 9,509 Bytes
8f6e6b8 b44bc34 f01d54d fdebaf9 f01d54d fdebaf9 f01d54d fdebaf9 f01d54d fdebaf9 f01d54d fdebaf9 f01d54d fdebaf9 f01d54d fdebaf9 8f6e6b8 fdebaf9 b44bc34 fdebaf9 8f6e6b8 fdebaf9 f01d54d fdebaf9 8f6e6b8 fdebaf9 8f6e6b8 f01d54d 8f6e6b8 b44bc34 8f6e6b8 b44bc34 8f6e6b8 b44bc34 8f6e6b8 f01d54d 8f6e6b8 f01d54d b44bc34 f01d54d b44bc34 f01d54d 8f6e6b8 b44bc34 8f6e6b8 b44bc34 8f6e6b8 b44bc34 fdebaf9 f01d54d 8f6e6b8 b44bc34 8f6e6b8 b44bc34 8f6e6b8 f01d54d b44bc34 8f6e6b8 b44bc34 8f6e6b8 b44bc34 8f6e6b8 b44bc34 8f6e6b8 f01d54d 8f6e6b8 b44bc34 8f6e6b8 b44bc34 8f6e6b8 f01d54d 8f6e6b8 b44bc34 8f6e6b8 b44bc34 8f6e6b8 b44bc34 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 | import os
import subprocess
import sys
import warnings
warnings.filterwarnings("ignore")
def run_pip(*args):
"""Run a pip install command and raise on failure."""
subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-cache-dir"] + list(args))
# ββ Phase 1: Install packages βββββββββββββββββββββββββββββββββββββββββββββββββ
print("=== Installing gradio (if needed) ===")
try:
import gradio # noqa: F401
print("gradio already installed.")
except ImportError:
run_pip("gradio")
print("=== Installing torch (CPU-only, ~190 MB) ===")
try:
import torch # noqa: F401
print("torch already installed.")
except ImportError:
run_pip("torch", "--index-url", "https://download.pytorch.org/whl/cpu")
print("=== Installing transformers 4.46.3 ===")
# Pin to last v4 release β transformers 5.x removed the 'summarization' pipeline task.
try:
import transformers as _tf
if _tf.__version__ != "4.46.3":
raise ImportError("wrong version")
print("transformers 4.46.3 already installed.")
except (ImportError, AttributeError):
run_pip("transformers==4.46.3")
# ββ Phase 2: Fix the requests-vs-httpx incompatibility βββββββββββββββββββββββ
#
# What happens:
# - transformers 4.46.3 requires huggingface-hub<1.0, so pip installs 0.36.x.
# - huggingface-hub 0.36.x makes get_session() return an httpx.Client when
# httpx is present (it is β gradio depends on it).
# - transformers' own hub.py then calls that client with requests-style kwargs:
# get_session().head(url, allow_redirects=False, proxies=proxies, timeout=10)
# - httpx.Client rejects every one of these: allow_redirects, proxies, etc.
#
# Fix:
# After importing transformers (so its module object is in sys.modules), replace
# the `get_session` name inside the `transformers.utils.hub` namespace with a
# lambda that returns a plain requests.Session. A requests.Session accepts all
# of those kwargs natively, so every existing call in hub.py works unchanged.
import transformers.utils.hub as _t_hub # noqa: E402
import requests as _requests # noqa: E402
_t_hub.get_session = lambda: _requests.Session()
print("Patched transformers.utils.hub.get_session β requests.Session()")
# ββ Phase 3: Safe imports βββββββββββββββββββββββββββββββββββββββββββββββββββββ
import gradio as gr # noqa: E402
import torch # noqa: E402
from transformers import pipeline # noqa: E402
# ββ App setup βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
DEFAULT_MODEL = "sshleifer/distilbart-cnn-6-6"
AVAILABLE_MODELS = {
"sshleifer/distilbart-cnn-6-6": "Fast & light, good for general summarization",
"facebook/bart-large-cnn": "Larger BART model, better detail retention",
"google/pegasus-cnn_dailymail": "Pegasus model for high-quality summarization",
"allenai/led-base-16384": "Handles longer scientific documents",
}
print(f"Loading default model: {DEFAULT_MODEL}")
summarizer = pipeline("summarization", model=DEFAULT_MODEL, device=-1) # device=-1 β CPU
EXAMPLE_TEXTS = {
"news_article": (
"In a historic move, global leaders have agreed to phase out fossil fuels over the next two "
"decades. This landmark decision came after weeks of intense negotiations during the international "
"climate summit. Experts believe this will drastically cut carbon emissions and pave the way for "
"sustainable energy sources worldwide. Countries will now be held accountable through annual "
"environmental reviews."
),
"scientific_abstract": (
"The rise of antibiotic-resistant bacteria poses one of the most significant threats to global "
"health in the 21st century. Recent studies have shown that the overuse and misuse of antibiotics "
"in both human medicine and agriculture have accelerated the evolution of resistant strains. In "
"this review, we summarize current research on bacterial resistance mechanisms, including horizontal "
"gene transfer and biofilm formation. We also explore novel approaches to combating resistance, "
"such as bacteriophage therapy, antimicrobial peptides, and CRISPR-based gene editing technologies. "
"The paper further outlines a strategic framework for integrating surveillance, policy reforms, and "
"public health initiatives to curb the spread of resistance. While scientific innovation holds "
"promise, global cooperation and responsible antibiotic stewardship remain essential to preventing "
"a post-antibiotic era where common infections could once again become deadly."
),
"business_report": (
"The company reported a 32% increase in quarterly revenue, largely driven by the success of its "
"latest AI-powered product line. International markets, particularly in Asia and Europe, showed "
"strong adoption rates. Leadership announced plans to reinvest earnings into R&D and global "
"expansion, while shareholders reacted positively with a 15% spike in stock prices."
),
}
def summarize_text(text, model_name, summary_length, num_beams):
if not text.strip():
return "Please provide some text to summarize."
try:
global summarizer
summarizer = pipeline("summarization", model=model_name, device=-1)
length_mapping = {
"very_short": (30, 50),
"short": (50, 70),
"medium": (70, 100),
"long": (100, 130),
}
min_len, max_len = length_mapping.get(summary_length, (70, 100))
result = summarizer(
text,
max_length=int(max_len),
min_length=int(min_len),
num_beams=int(num_beams),
do_sample=False,
)
return result[0]["summary_text"]
except Exception as exc:
return f"Error: {exc}"
def count_words(text):
return f"{len(text.split())} words"
def paste_example(example_type):
return EXAMPLE_TEXTS.get(example_type, "")
# ββ Gradio UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as demo:
gr.Markdown("# π Multimodel Text Summarization")
gr.Markdown(
"Summarize news, reports, or scientific content using various models like BART, Pegasus, or LED."
)
with gr.Row():
with gr.Column(scale=3):
text_input = gr.Textbox(
lines=12,
label="Text to Summarize",
placeholder="Paste or type your text here...",
elem_id="text_input",
)
word_counter = gr.Markdown("0 words")
text_input.change(count_words, inputs=[text_input], outputs=[word_counter])
with gr.Row():
example_dropdown = gr.Dropdown(
choices=list(EXAMPLE_TEXTS.keys()),
value=None,
label="Load Example Text",
)
example_load_btn = gr.Button("Load Example")
with gr.Row():
model_choice = gr.Dropdown(
choices=list(AVAILABLE_MODELS.keys()),
value=DEFAULT_MODEL,
label="Select Summarization Model",
)
model_info = gr.Markdown(f"**Model info:** {AVAILABLE_MODELS[DEFAULT_MODEL]}")
with gr.Row():
summary_length = gr.Radio(
choices=["very_short", "short", "medium", "long"],
value="medium",
label="Summary Length",
)
num_beams = gr.Slider(minimum=1, maximum=8, value=4, step=1, label="Beam Size")
summarize_button = gr.Button("Generate Summary", variant="primary", size="lg")
with gr.Column(scale=2):
gr.Markdown("### Summary Result")
summary_output = gr.Textbox(
label="Generated Summary",
lines=12,
placeholder="Your summary will appear here...",
)
model_choice.change(
fn=lambda x: f"**Model info:** {AVAILABLE_MODELS.get(x, 'Custom model')}",
inputs=[model_choice],
outputs=[model_info],
)
example_load_btn.click(fn=paste_example, inputs=[example_dropdown], outputs=[text_input])
summarize_button.click(
fn=summarize_text,
inputs=[text_input, model_choice, summary_length, num_beams],
outputs=[summary_output],
)
gr.Markdown("""
---
β
Choose from different summarization models
β
Works great for academic, news, or business content
β
Customize summary length and beam search for better quality
Built using Gradio and Hugging Face Transformers
""")
if __name__ == "__main__":
demo.launch() |