Spaces:

deepaksarika01
/

youtube-video-qa-lamini

Runtime error

App Files Files Community

youtube-video-qa-lamini / model.py

deepaksarika01

fix typo

0a0c342 about 1 year ago

raw

history blame contribute delete

No virus

6.19 kB

	from langchain.llms import HuggingFacePipeline
	from langchain.embeddings import HuggingFaceInstructEmbeddings
	from langchain.chains import RetrievalQA
	from transformers import (
	AutoTokenizer,
	AutoModelForSeq2SeqLM,
	pipeline,
	GenerationConfig
	)
	from textwrap import dedent

	class lamini:
	def __init__(self):
	pass

	def load_model(self, task="text2text-generation", **kwargs) -> HuggingFacePipeline:
	"""Returns a pipeline for the model
	- model: MBZUAI/LaMini-Flan-T5-248M

	Returns:
	_type_: _description_
	"""
	model_id = "MBZUAI/LaMini-Flan-T5-248M"
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
	gen_config = GenerationConfig.from_pretrained(model_id)

	max_length = kwargs.get("max_length", 512)
	temperature = kwargs.get("temperature", 0)
	top_p = kwargs.get("top_p", 0.95)
	repetition_penalty = kwargs.get("repetition_penalty", 1.15)

	pipe = pipeline(
	"text2text-generation",
	model=model,
	tokenizer=tokenizer,
	generation_config=gen_config,
	max_length=max_length,
	top_p=top_p,
	temperature=temperature,
	repetition_penalty=repetition_penalty,
	)

	llm = HuggingFacePipeline(pipeline=pipe)
	return llm

	class templates:
	def __init__(self, llm: HuggingFacePipeline):
	self.llm = llm

	def summarize(self, text, **kwargs):
	"""Summarize text

	Args:
	text (str): text to summarize

	Returns:
	str: summarized text
	"""

	instruction = "summarize for better understanding: "
	text = instruction + text
	return self.llm(text, **kwargs)

	def generate_tile(self, text, **kwargs):
	"""Generate a title for text

	Args:
	text (str): text to generate title for

	Returns:
	str: title
	"""

	instruction = "generate a title for this text: "
	text = instruction + text
	return self.llm(text, **kwargs)

	class qa_template:
	def __init__(self, llm):
	from langchain.chains.retrieval_qa.base import BaseRetrievalQA
	self.llm = llm
	self.qa_inf: BaseRetrievalQA

	def load(self, knowledge_base):
	"""Load knowledge base

	Args:
	knowledge_base (str): knowledge base to load

	Returns:
	BaseRetrievalQA: (optional to use) returns QA interface
	"""
	from utils import LangChainChunker
	from langchain.vectorstores import Chroma
	from langchain.chains import RetrievalQA

	embeds = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large")
	chunker = LangChainChunker(knowledge_base)
	chunks = chunker.chunker(size=512)
	db = Chroma.from_texts(chunks, embeds)
	retriever = db.as_retriever()

	qa_inf = RetrievalQA.from_chain_type(
	llm=self.llm, chain_type="stuff", retriever=retriever
	)

	self.qa_inf = qa_inf
	return qa_inf

	def start_gradio(self, title: str):
	"""Start gradio interface

	Returns:
	_type_: _description_
	"""
	import gradio as gr
	load = self.load

	def interface(msg, history):
	res = self.qa_inf.run(msg)
	history.append((msg, res))
	return "", history

	def reload(video_id):
	from utils import getSubsText
	print(f"Setting up {video_id}")
	subs = getSubsText(video_id)
	_ = load(subs)

	with gr.Blocks() as demo:
	with gr.Column():
	gr.Markdown(dedent(f"""
	# video to QA
	A test implementation to use vectorstores and mini llms to create
	a question answer chatbot interface for _youtube videos_
	"""))
	chatbot = gr.Chatbot()
	with gr.Row():
	with gr.Column():
	videoId = gr.Textbox(label="Video ID", placeholder="Enter video ID here")
	msg = gr.Textbox(label="Question Box" , placeholder="Enter your question here")
	clear = gr.ClearButton([msg, videoId, chatbot])

	gr.Markdown(
	dedent("""

	## Getting started
	to start up you need to enter the video ID of youtube video first

	Get a youtube video which has English dialog
	> ex: https://www.youtube.com/watch?v=BsnCpESUEqM

	in this `BsnCpESUEqM` is the video ID

	```
	https://www.youtube.com/watch?v=BsnCpESUEqM
	^^^^^^^^^^^
	video_id
	```
	> in url paramets are seperated by `?` and for video id its `?v`

	copy-paste the video id to the textbox and press return/enter and wait ~5 seconds to fetch video information

	---

	Now in the Question Box _box_/feild start typing the quesions and press return/enter to send to llm
	""")
	)
	msg.submit(interface, [msg, chatbot], [msg, chatbot])
	videoId.submit(reload, [videoId])


	# ui = gr.ChatInterface(
	# fn=interface,
	# examples=["What is the video about?", "key points of the video"],
	# title=f"Question Mode - {title}",
	# )

	# ui.launch()
	demo.launch()