Meteor21 commited on
Commit
f02b11f
1 Parent(s): a3cb8ba

Upload 6 files

Browse files
Files changed (6) hide show
  1. Dockerfile +17 -0
  2. README.md +53 -13
  3. app.py +127 -0
  4. constants.py +2 -0
  5. requirements.txt +10 -0
  6. search.py +108 -0
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.8.15
2
+
3
+ WORKDIR /Users/me_teor21/Workspace/item-search
4
+
5
+ COPY requirements.txt ./
6
+
7
+ RUN pip install -r requirements.txt
8
+
9
+ COPY search.py ./
10
+
11
+ COPY app.py ./
12
+
13
+ COPY constants.py ./
14
+
15
+ # COPY . .
16
+
17
+ ENTRYPOINT ["python", "app.py"]
README.md CHANGED
@@ -1,13 +1,53 @@
1
- ---
2
- title: Item Search
3
- emoji: 🐢
4
- colorFrom: blue
5
- colorTo: green
6
- sdk: gradio
7
- sdk_version: 4.16.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Shopping Search Engine
2
+
3
+ ## Description
4
+
5
+ Look for the ideal clothing items 😎
6
+
7
+ ## Instructions
8
+
9
+ 1. Install libraries
10
+
11
+ ```
12
+ pip install -r requirements.txt
13
+ ```
14
+
15
+ 2. Run
16
+
17
+ ```
18
+ python app.py
19
+ ```
20
+
21
+ ## Build and run container
22
+
23
+ 1. Build container (uncomment launch call in app.py)
24
+
25
+ ```
26
+ docker build --tag item-search .
27
+ ```
28
+
29
+ 2. Run container
30
+
31
+ ```
32
+ docker run -it -d --name item-search-engine -p 7000:7000 item-search:latest
33
+ ```
34
+
35
+ ## Structure
36
+
37
+ ```
38
+ .
39
+ ├── app.py
40
+ ├── Dockerfile
41
+ ├── LICENSE
42
+ ├── README.md
43
+ ├── search.py
44
+ └── requirements.txt
45
+ ```
46
+
47
+ ## Author
48
+
49
+ [Ismael C.](https://ismaelmekene.com)
50
+
51
+ ## License
52
+
53
+ Licensed under the MIT License, Version 2.0.
app.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import os
5
+ from pinecone import Pinecone, ServerlessSpec
6
+ from pinecone_text.sparse import BM25Encoder
7
+ from datasets import load_dataset
8
+ from sentence_transformers import SentenceTransformer
9
+ import torch
10
+ from io import BytesIO
11
+ from base64 import b64encode
12
+ from tqdm.auto import tqdm
13
+ from PIL import Image
14
+ import gradio as gr
15
+ from constants import *
16
+
17
+ from search import SearchItem
18
+
19
+ from fastapi import FastAPI
20
+
21
+
22
+
23
+
24
+
25
+
26
+ # initialize connection to pinecone (get API key at app.pinecone.io)
27
+ api_key = PINECONE_API_KEY or os.getenv(PINECONE_API_KEY) # or "PINECONE_API_KEY"
28
+ # find your environment next to the api key in pinecone console
29
+ env = PINECONE_ENVIRONMENT or os.getenv(PINECONE_ENVIRONMENT) # or "PINECONE_ENVIRONMENT"
30
+
31
+ fashion_processor = SearchItem(api_key, env)
32
+
33
+
34
+ def retrieve_images(query, image=None):
35
+ if image:
36
+ # If image is provided, use retrieve_image_from_image function
37
+ return retrieve_image_from_image(image, query)
38
+ else:
39
+ # If image is not provided, use retrieve_image_from_query function
40
+ return retrieve_image_from_query(query)
41
+
42
+
43
+
44
+ def retrieve_image_from_query(query):
45
+
46
+ # create sparse and dense vectors
47
+ sparse = fashion_processor.bm25.encode_queries(query)
48
+ dense = fashion_processor.clip_model.encode(query).tolist()
49
+ hdense, hsparse = fashion_processor.hybrid_scale(dense, sparse)
50
+
51
+ result = fashion_processor.index.query(
52
+ top_k=10,
53
+ vector=hdense,
54
+ sparse_vector=hsparse,
55
+ include_metadata=True
56
+ )
57
+
58
+ imgs = [fashion_processor.images[int(r["id"])] for r in result["matches"]]
59
+
60
+ return imgs
61
+
62
+
63
+ def retrieve_image_from_image(image, query):
64
+
65
+ try:
66
+ # create sparse and dense vectors
67
+ sparse = fashion_processor.bm25.encode_queries(query)
68
+ w, h = 60, 80
69
+ image = Image.open(image.name).resize((w, h))
70
+ dense = fashion_processor.clip_model.encode(image).tolist()
71
+ hdense, hsparse = fashion_processor.hybrid_scale(dense, sparse)
72
+
73
+
74
+ result = fashion_processor.index.query(
75
+ top_k=10,
76
+ vector=hdense,
77
+ sparse_vector=hsparse,
78
+ include_metadata=True
79
+ )
80
+
81
+ imgs = [fashion_processor.images[int(r["id"])] for r in result["matches"]]
82
+
83
+ return imgs
84
+
85
+ except Exception as e:
86
+ # print(f"Error processing image: {e}")
87
+ print(e)
88
+ return None
89
+
90
+
91
+
92
+ def show_img(image):
93
+ return image.name if image else "No image provided"
94
+
95
+
96
+ with gr.Blocks() as demo:
97
+ gr.Markdown(
98
+ """
99
+ # Shopping Search Engine
100
+
101
+ Look for the ideal clothing items 😎
102
+ """)
103
+
104
+ with gr.Row():
105
+ with gr.Column():
106
+
107
+ query = gr.Textbox(placeholder="Search Items")
108
+ gr.HTML("OR")
109
+ photo = gr.Image()
110
+ with gr.Row():
111
+ button = gr.UploadButton(label="Upload Image", file_types=["image"])
112
+ textbox = gr.Textbox(placeholder="Additional Details ?")
113
+ submit_button = gr.Button(text="Submit")
114
+
115
+ with gr.Column():
116
+ gallery = gr.Gallery().style(
117
+ object_fit='contain',
118
+ height='auto',
119
+ preview=True
120
+ )
121
+
122
+ query.submit(fn=lambda query: retrieve_images(query), inputs=[query], outputs=[gallery])
123
+ submit_button.click(fn=lambda image, query: show_img(image), inputs=[button, textbox], outputs=[photo]) \
124
+ .then(fn=lambda image, query: retrieve_images(query, image), inputs=[button, textbox], outputs=[gallery])
125
+
126
+ if __name__ == "__main__":
127
+ demo.launch(server_name="0.0.0.0", server_port=8000)
constants.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ PINECONE_API_KEY = '810e1b45-1489-41a8-998e-1ed0fb2d21a5'
2
+ PINECONE_ENVIRONMENT = 'gcp-starter'
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ datasets
2
+ transformers
3
+ sentence-transformers
4
+ huggingface-hub
5
+ pinecone-client
6
+ pinecone-text
7
+ protobuf==3.20.3
8
+ gradio==3.41.2
9
+ fastapi
10
+ uvicorn==0.23.1
search.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import os
5
+ from pinecone import Pinecone, ServerlessSpec
6
+ from pinecone_text.sparse import BM25Encoder
7
+ from datasets import load_dataset
8
+ from sentence_transformers import SentenceTransformer
9
+ import torch
10
+ from io import BytesIO
11
+ from base64 import b64encode
12
+ from tqdm.auto import tqdm
13
+ from constants import *
14
+
15
+
16
+
17
+ # initialize connection to pinecone (get API key at app.pinecone.io)
18
+ api_key = PINECONE_API_KEY or os.getenv(PINECONE_API_KEY) # or "PINECONE_API_KEY"
19
+ # find your environment next to the api key in pinecone console
20
+ env = PINECONE_ENVIRONMENT or os.getenv(PINECONE_ENVIRONMENT) # or "PINECONE_ENVIRONMENT"
21
+
22
+
23
+
24
+ class SearchItem():
25
+ def __init__(self, api_key=None, env=None, device='cuda' if torch.cuda.is_available() else 'cpu'):
26
+ self.api_key = api_key
27
+ self.environment = env
28
+ self.pinecone_instance = self.connect_to_pinecone(self.api_key,self.environment)
29
+ self.index = self.pinecone_instance.Index('clip')
30
+ self.images, self.metadata = self.load_fashion_dataset()
31
+ self.clip_model = self.initialize_clip_model(device=device)
32
+ self.bm25 = self.initialize_bm25_encoder(self.metadata)
33
+
34
+
35
+
36
+ def connect_to_pinecone(self, api_key, env):
37
+ api_key = api_key or os.getenv('PINECONE_API_KEY')
38
+ env = env or os.getenv('PINECONE_ENVIRONMENT')
39
+
40
+ if not api_key or not env:
41
+ raise ValueError("Pinecone API key and environment are required.")
42
+
43
+ pinecone_instance = Pinecone(api_key=api_key, environment=env)
44
+ return pinecone_instance
45
+
46
+ def load_fashion_dataset(self):
47
+ fashion = load_dataset("ashraq/fashion-product-images-small", split="train")
48
+ images = fashion["image"]
49
+ metadata = fashion.remove_columns("image").to_pandas()
50
+ return images, metadata
51
+
52
+ def initialize_clip_model(self, device='cuda' if torch.cuda.is_available() else 'cpu'):
53
+ model = SentenceTransformer('sentence-transformers/clip-ViT-B-32', device=device)
54
+ return model
55
+
56
+ def initialize_bm25_encoder(self, metadata):
57
+ bm25 = BM25Encoder()
58
+ bm25.fit(metadata['productDisplayName'])
59
+ return bm25
60
+
61
+ @staticmethod
62
+ def hybrid_scale(dense, sparse, alpha=0.05):
63
+ """Hybrid vector scaling using a convex combination
64
+
65
+ alpha * dense + (1 - alpha) * sparse
66
+
67
+ Args:
68
+ dense: Array of floats representing
69
+ sparse: a dict of `indices` and `values`
70
+ alpha: float between 0 and 1 where 0 == sparse only
71
+ and 1 == dense only
72
+ """
73
+ if alpha < 0 or alpha > 1:
74
+ raise ValueError("Alpha must be between 0 and 1")
75
+
76
+ # Scale sparse and dense vectors to create hybrid search vectors
77
+ hsparse = {
78
+ 'indices': sparse['indices'],
79
+ 'values': [v * (1 - alpha) for v in sparse['values']]
80
+ }
81
+ hdense = [v * alpha for v in dense]
82
+
83
+ return hdense, hsparse
84
+
85
+
86
+ if __name__ == "__main__":
87
+
88
+
89
+ fashion_processor = SearchItem(api_key, env)
90
+
91
+ query = "blue shoes"
92
+ # create sparse and dense vectors
93
+ sparse = fashion_processor.bm25.encode_queries(query)
94
+ dense = fashion_processor.clip_model.encode(query).tolist()
95
+
96
+ hdense, hsparse = fashion_processor.hybrid_scale(dense, sparse)
97
+
98
+ result = fashion_processor.index.query(
99
+ top_k=5,
100
+ vector=hdense,
101
+ sparse_vector=hsparse,
102
+ include_metadata=True
103
+ )
104
+
105
+ imgs = [fashion_processor.images[int(r["id"])] for r in result["matches"]]
106
+
107
+ print('Ok')
108
+ # breakpoint()